"""<br>
    @Author: Deven Gupta<br>
    @Date: 3-09-2024<br>
    @Last Modified by: Deven Gupta<br>
    @Last Modified time: 3-09-2024<br>
    @Title : Perform Read and Write operation in pyspark<br>
<br>
"""

In [1]:
from pyspark.sql import SparkSession
spark=SparkSession.builder.appName('R_W_pyspark').config("spark.jars.packages", "org.apache.spark:spark-avro_2.12:3.5.2").master("local[*]").getOrCreate()

In [2]:
spark

## <Center><h1 style="background:white;color:blue;font-weight:bold">Conversion From Avro to CSV</h1></Center>

In [3]:
#Reading the avro file
df_avro=spark.read.format("avro").load("file:///C:/Users/Deven/Desktop/Python Libraries/Pyspark/Files/input.avro")

In [4]:
#Printing Scahema
df_avro.printSchema()

root
 |-- registration_dttm: string (nullable = true)
 |-- id: long (nullable = true)
 |-- first_name: string (nullable = true)
 |-- last_name: string (nullable = true)
 |-- email: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- ip_address: string (nullable = true)
 |-- cc: long (nullable = true)
 |-- country: string (nullable = true)
 |-- birthdate: string (nullable = true)
 |-- salary: double (nullable = true)
 |-- title: string (nullable = true)
 |-- comments: string (nullable = true)



In [5]:
#Display the data
df_avro.show()

+--------------------+---+-----------+---------+--------------------+------+---------------+-------------------+-------------+----------+---------+--------------------+--------------------+
|   registration_dttm| id| first_name|last_name|               email|gender|     ip_address|                 cc|      country| birthdate|   salary|               title|            comments|
+--------------------+---+-----------+---------+--------------------+------+---------------+-------------------+-------------+----------+---------+--------------------+--------------------+
|2016-02-04T10:34:07Z|  1|      Kelly|    Ortiz|kortiz0@omniture.com|Female|252.115.158.159|   3537905681760845|       Russia| 4/23/1980|277302.99|               Nurse|                    |
|2016-02-04T22:54:01Z|  2|     Sharon|  Carroll|scarroll1@disqus.com|Female|  29.217.252.62|  56022458507191696|    Indonesia| 8/28/1992|209258.05|           Recruiter|         åß∂ƒ©˙∆˚¬…æ|
|2016-02-04T17:59:50Z|  3|       Ruth|     Ross|  

In [16]:
#Writing the data into CSV file(Converting to CSV File)
df_avro.write.mode("overwrite").csv("Files/csv_output")

In [17]:
#reading CSV file
df=spark.read.csv("Files/csv_output",header=True)

#Display the data of CSV file
df.show()

+--------------------+---+-----------+---------+--------------------+------+---------------+-------------------+-------------+----------+---------+--------------------+--------------------+
|2016-02-04T10:34:07Z|  1|      Kelly|    Ortiz|kortiz0@omniture.com|Female|252.115.158.159|   3537905681760845|       Russia| 4/23/1980|277302.99|               Nurse|                _c12|
+--------------------+---+-----------+---------+--------------------+------+---------------+-------------------+-------------+----------+---------+--------------------+--------------------+
|2016-02-04T22:54:01Z|  2|     Sharon|  Carroll|scarroll1@disqus.com|Female|  29.217.252.62|  56022458507191696|    Indonesia| 8/28/1992|209258.05|           Recruiter|         åß∂ƒ©˙∆˚¬…æ|
|2016-02-04T17:59:50Z|  3|       Ruth|     Ross|       rross2@cbc.ca|Female|  220.224.80.32|   3589642396435648|        Benin| 6/13/1994|  18270.7|     Design Engineer|                NULL|
|2016-02-04T16:03:17Z|  4|      Kelly|    Meyer| k

## <Center><h1 style="background:white;color:blue;font-weight:bold">Conversion CSV TO JSON</h1></Center>

In [13]:
#Reading CSV file
df_csv=spark.read.csv("Files/input.csv",header=True)
df_csv.printSchema()
df_csv.show()

root
 |-- stud_ID: string (nullable = true)
 |-- FirstName: string (nullable = true)
 |-- LastName: string (nullable = true)
 |-- Dept: string (nullable = true)
 |-- Dept_ID: string (nullable = true)

+-------+---------+---------+----+-------+
|stud_ID|FirstName| LastName|Dept|Dept_ID|
+-------+---------+---------+----+-------+
|      1|    Deven|    Gupta|  IT|      1|
|      2|   Prayag|    Bhoir|  IT|      1|
|      3|    Ayush|Prajapati|COMP|      2|
|      4|     Shiv|    Yelve|COMP|      2|
|      5|   Nikhil|    Patil|MECH|      3|
+-------+---------+---------+----+-------+



In [None]:
#Write to json(Convert the csv to json)
df_csv.write.json("Files/json_output")

In [19]:
#Reading json file
df_json=spark.read.json("Files/json_output")

#Display the json file Contents
df_json.show()

+----+-------+---------+---------+-------+
|Dept|Dept_ID|FirstName| LastName|stud_ID|
+----+-------+---------+---------+-------+
|  IT|      1|    Deven|    Gupta|      1|
|  IT|      1|   Prayag|    Bhoir|      2|
|COMP|      2|    Ayush|Prajapati|      3|
|COMP|      2|     Shiv|    Yelve|      4|
|MECH|      3|   Nikhil|    Patil|      5|
+----+-------+---------+---------+-------+



In [None]:
#stop spark session
spark.stop()