In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *

spark = SparkSession.builder.appName("ReadFiles").getOrCreate()

# Define schema (used for CSV and JSON)
schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("salary", DoubleType(), True)
])

# Read CSV with schema
df_csv = spark.read \
    .format("csv") \
    .option("header", "true") \
    .schema(schema) \
    .load("path/file.csv")

# Read JSON with schema
df_json = spark.read \
    .format("json") \
    .schema(schema) \
    .load("path/file.json")

# Read Parquet (schema stored inside file)
df_parquet = spark.read \
    .format("parquet") \
    .load("path/file.parquet")

# Read Delta
df_delta = spark.read \
    .format("delta") \
    .load("path/file_delta")

# Show sample output
df_csv.show()
df_json.show()
df_parquet.show()
df_delta.show()

# Print schemas
df_csv.printSchema()
df_json.printSchema()
df_parquet.printSchema()
df_delta.printSchema()