In [0]:
from pyspark.sql.types import StructField, StringType, IntegerType, StructType, BooleanType, ArrayType

In [0]:
csv_schema = StructType([StructField('Name', StringType(), True),
                        StructField('Age', IntegerType(), True),
                        StructField('Subject', StringType(), True),
                        StructField('Other', StringType(), False),
                        StructField('Active', BooleanType(), True)])

In [0]:
sample_df = spark.read.option("header",True).csv('/FileStore/tables/Sample_Input.csv', schema=csv_schema)

In [0]:
sample_df.printSchema()

root
 |-- Name: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- Subject: string (nullable = true)
 |-- Other: string (nullable = true)
 |-- Active: boolean (nullable = true)



In [0]:
sample_df.show()

+-----+---+---------+--------+------+
| Name|Age|  Subject|   Other|Active|
+-----+---+---------+--------+------+
| Ross| 28|  Physics| Teacher|  true|
| Ishu| 27| Software|Engineer| false|
|Robin| 27|  Biology|    null|  true|
|  Don| 26|  Physics|    null| false|
|  Doe| 26|Chemistry|    null|  true|
| Gina| 28| Computer|    null| false|
+-----+---+---------+--------+------+



In [0]:
csv_schema = StructType([StructField('Name', StringType(), True),
                        StructField('Age', IntegerType(), True),
                        StructField('Subject', StringType(), True),
                        StructField('Other', StringType(), False),
                        StructField('Active', StringType(), True)])

In [0]:
sample_df = spark.read.option("header",True).csv('/FileStore/tables/Sample_Input.csv', schema=csv_schema)

In [0]:
sample_df.printSchema()

root
 |-- Name: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- Subject: string (nullable = true)
 |-- Other: string (nullable = true)
 |-- Active: string (nullable = true)



In [0]:
sample_df.show()

+-----+---+---------+--------+------+
| Name|Age|  Subject|   Other|Active|
+-----+---+---------+--------+------+
| Ross| 28|  Physics| Teacher|  TRUE|
| Ishu| 27| Software|Engineer| FALSE|
|Robin| 27|  Biology|    null|  TRUE|
|  Don| 26|  Physics|    null| FALSE|
|  Doe| 26|Chemistry|    null|  TRUE|
| Gina| 28| Computer|    null| FALSE|
+-----+---+---------+--------+------+



In [0]:
json_df = spark.read.json("/FileStore/tables/Input_Data.json")

In [0]:
json_df.printSchema()

root
 |-- Age: long (nullable = true)
 |-- Name: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- FName: string (nullable = true)
 |    |    |-- LName: string (nullable = true)
 |    |    |-- MName: string (nullable = true)



In [0]:
json_schema = StructType([StructField('Name',ArrayType(StructType([StructField('FName',StringType()),
                                                                   StructField('MName',StringType()),
                                                                   StructField('LName',StringType())]))),
                        StructField('Age',IntegerType())])

In [0]:
json_df = spark.read.json("/FileStore/tables/Input_Data.json",schema=json_schema)

In [0]:
json_df.printSchema()

root
 |-- Name: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- FName: string (nullable = true)
 |    |    |-- MName: string (nullable = true)
 |    |    |-- LName: string (nullable = true)
 |-- Age: integer (nullable = true)



In [0]:
json_df.show()

+----------------+---+
|            Name|Age|
+----------------+---+
|[{Ross, NA, NA}]| 28|
+----------------+---+

