In [0]:
import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

data = [("James","","Smith","36636","M",60000),
        ("Michael","Rose","","40288","M",70000),
        ("Robert","","Williams","42114","",400000),
        ("Maria","Anne","Jones","39192","F",500000),
        ("Jen","Mary","Brown","","F",0)]

columns = ["first_name","middle_name","last_name","dob","gender","salary"]
pysparkDF = spark.createDataFrame(data = data, schema = columns)
pysparkDF.printSchema()
pysparkDF.show(truncate=False)

pandasDF = pysparkDF.toPandas()
print(pandasDF)

# Nested structure elements
from pyspark.sql.types import StructType, StructField, StringType,IntegerType
dataStruct = [(("James","","Smith"),"36636","M","3000"), \
      (("Michael","Rose",""),"40288","M","4000"), \
      (("Robert","","Williams"),"42114","M","4000"), \
      (("Maria","Anne","Jones"),"39192","F","4000"), \
      (("Jen","Mary","Brown"),"","F","-1") \
]

schemaStruct = StructType([
        StructField('name', StructType([
             StructField('firstname', StringType(), True),
             StructField('middlename', StringType(), True),
             StructField('lastname', StringType(), True)
             ])),
          StructField('dob', StringType(), True),
         StructField('gender', StringType(), True),
         StructField('salary', StringType(), True)
         ])


df = spark.createDataFrame(data=dataStruct, schema = schemaStruct)
df.printSchema()
df.show(truncate=False)

pandasDF2 = df.toPandas()
print(pandasDF2)

root
 |-- first_name: string (nullable = true)
 |-- middle_name: string (nullable = true)
 |-- last_name: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: long (nullable = true)

+----------+-----------+---------+-----+------+------+
|first_name|middle_name|last_name|dob  |gender|salary|
+----------+-----------+---------+-----+------+------+
|James     |           |Smith    |36636|M     |60000 |
|Michael   |Rose       |         |40288|M     |70000 |
|Robert    |           |Williams |42114|      |400000|
|Maria     |Anne       |Jones    |39192|F     |500000|
|Jen       |Mary       |Brown    |     |F     |0     |
+----------+-----------+---------+-----+------+------+

  first_name middle_name last_name    dob gender  salary
0      James                 Smith  36636      M   60000
1    Michael        Rose            40288      M   70000
2     Robert              Williams  42114         400000
3      Maria        Anne     Jones 

In [0]:
#Importing Libraries: The code imports the necessary libraries: pyspark, SparkSession, and StructType, StructField, StringType, IntegerType from pyspark.sql.types.

#Creating SparkSession: A Spark session is created using SparkSession.builder.appName('SparkByExamples.com').getOrCreate().

#Creating DataFrame: A DataFrame named pysparkDF is created from a list of tuples data and column names columns using spark.createDataFrame(data=data, schema=columns).

#Printing DataFrame Schema and Content: The schema and content of the DataFrame pysparkDF are displayed using pysparkDF.printSchema() and pysparkDF.show(truncate=False).

#Converting DataFrame to Pandas DataFrame: The DataFrame pysparkDF is converted to a Pandas DataFrame pandasDF using the toPandas() method.

#Printing Pandas DataFrame: The Pandas DataFrame pandasDF is printed using print(pandasDF).

#Creating DataFrame with Nested Structure: A new DataFrame named df is created using a list of tuples dataStruct and a defined schemaStruct. The schemaStruct specifies a nested structure with fields name, dob, gender, and salary. The name field further contains the fields firstname, middlename, and lastname.

#Printing Nested DataFrame Schema and Content: The schema and content of the DataFrame df are displayed using df.printSchema() and df.show(truncate=False).

#Converting Nested DataFrame to Pandas DataFrame: The DataFrame df is converted to a Pandas DataFrame pandasDF2 using the toPandas() method.

#Printing Pandas DataFrame: The Pandas DataFrame pandasDF2 is printed using print(pandasDF2).

#The code demonstrates how to create DataFrames in PySpark, print their schemas and content, convert DataFrames to Pandas DataFrames, and handle nested structures within DataFrames.