In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.appName("Read CSV Example") \
.getOrCreate()

In [2]:
data = """id, name, city, age, salary
1, Arjun, Hyderabad, 25, 45000
2, Meera, Chennai, 32, 52000
3, Rajesh, Bangalore, 29, 61000
4, Priya, Delhi, 22, 38000
5, Sanjay, Mumbai, 35, 72000
"""

with open("employees.csv", "w") as f:
  f.write(data)

In [3]:
df = spark.read \
.option("header",True) \
.option("inferSchema", True) \
.csv("employees.csv")

df.show()
df.printSchema()

+---+-------+----------+----+-------+
| id|   name|      city| age| salary|
+---+-------+----------+----+-------+
|  1|  Arjun| Hyderabad|25.0|45000.0|
|  2|  Meera|   Chennai|32.0|52000.0|
|  3| Rajesh| Bangalore|29.0|61000.0|
|  4|  Priya|     Delhi|22.0|38000.0|
|  5| Sanjay|    Mumbai|35.0|72000.0|
+---+-------+----------+----+-------+

root
 |-- id: integer (nullable = true)
 |--  name: string (nullable = true)
 |--  city: string (nullable = true)
 |--  age: double (nullable = true)
 |--  salary: double (nullable = true)



In [4]:
json_data = [
    {"id":1, "name": "Arjun", "dept": "IT", "salary": 50000},
    {"id":2, "name": "Meera", "dept": "HR", "salary": 45000},
    {"id":3, "name": "Rajesh", "dept": "Finance", "salary": 60000}
]
df_json = spark.createDataFrame(json_data)
df_json.write.mode("overwrite").json("employees.json")

In [5]:
df = spark.read.json("employees.json")
df.show()
df.printSchema()

+-------+---+------+------+
|   dept| id|  name|salary|
+-------+---+------+------+
|     HR|  2| Meera| 45000|
|Finance|  3|Rajesh| 60000|
|     IT|  1| Arjun| 50000|
+-------+---+------+------+

root
 |-- dept: string (nullable = true)
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- salary: long (nullable = true)



In [6]:
df.write.mode("overwrite").parquet("employees.parquet")

In [7]:
df_parquet = spark.read.parquet("employees.parquet")
df_parquet.show()

+-------+---+------+------+
|   dept| id|  name|salary|
+-------+---+------+------+
|     HR|  2| Meera| 45000|
|Finance|  3|Rajesh| 60000|
|     IT|  1| Arjun| 50000|
+-------+---+------+------+

