In [1]:
import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('PySparkLearning').getOrCreate()

### Create PySpark empty DataFrame with schema (StructType)

In [4]:
from pyspark.sql.types import StructType, StructField, StringType

schema = StructType([
    StructField('firstname', StringType()),
    StructField('middlename', StringType()),
    StructField('lastname', StringType())
])

In order to create an empty DataFrame first, you need to create an `empty RDD` by using `spark.sparkContext.emptyRDD()`. once you have an empty RDD, pass this RDD to `createDataFrame()` of SparkSession along with the schema.

In [8]:
df = spark.createDataFrame(spark.sparkContext.emptyRDD(), schema=schema)
df.printSchema()
df.show()

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)

+---------+----------+--------+
|firstname|middlename|lastname|
+---------+----------+--------+
+---------+----------+--------+



### Other ways
You can also create empty DataFrame without using spark.sparkContext.emptyRDD()

In [23]:
df1 = spark.sparkContext.parallelize([]).toDF(schema)
df1.printSchema()

df2 = spark.createDataFrame([], schema)
df2.printSchema()


root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)

