In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Read CSV Example").getOrCreate()

#Creating CSV

In [2]:
data = """id,name,city,age,salary
1,Arjun,Hyderabad,25,45000
2,Meera,Chennai,32,52000
3,Rajesh,Bangalore,29,61000
4,Priya,Delhi,22,38000
5,Sanjay,Mumbai,35,72000
"""

with open("employees.csv","w") as f:
  f.write(data)

In [4]:
df = spark.read.option("header", True).option("inferSchema",True).csv("employees.csv")

df.show()
df.printSchema()

+---+------+---------+---+------+
| id|  name|     city|age|salary|
+---+------+---------+---+------+
|  1| Arjun|Hyderabad| 25| 45000|
|  2| Meera|  Chennai| 32| 52000|
|  3|Rajesh|Bangalore| 29| 61000|
|  4| Priya|    Delhi| 22| 38000|
|  5|Sanjay|   Mumbai| 35| 72000|
+---+------+---------+---+------+

root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- age: integer (nullable = true)
 |-- salary: integer (nullable = true)



#Writing and Reading a Json

In [5]:
json_data = [
 {"id":1,"name":"Arjun","dept":"IT","salary":45000},
{"id":2,"name":"Meera","dept":"HR","salary":52000},
{"id":3,"name":"Rajesh","dept":"Finance","salary":61000}
]
df_json = spark.createDataFrame(json_data)
df_json.write.mode("overwrite").json("employees.json")

In [6]:
df = spark.read.json("employees.json")
df.show()
df.printSchema()

+-------+---+------+------+
|   dept| id|  name|salary|
+-------+---+------+------+
|     HR|  2| Meera| 52000|
|Finance|  3|Rajesh| 61000|
|     IT|  1| Arjun| 45000|
+-------+---+------+------+

root
 |-- dept: string (nullable = true)
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- salary: long (nullable = true)



#Writing and Reading Parquet

In [7]:
df.write.mode("overwrite").parquet("employees.parquet")

In [8]:
df_parquet = spark.read.parquet("employees.parquet")
df_parquet.show()

+-------+---+------+------+
|   dept| id|  name|salary|
+-------+---+------+------+
|     HR|  2| Meera| 52000|
|Finance|  3|Rajesh| 61000|
|     IT|  1| Arjun| 45000|
+-------+---+------+------+

