In [None]:
# Create PySpark RDD

from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

dept = [("Finance",10),("Marketing",20),("Sales",30),("IT",40)]

rdd = spark.sparkContext.parallelize(dept)

In [None]:
# Convert PySpark RDD to DataFrame

    #1. Using rdd.toDF() function
        df = rdd.toDF()
        df.printSchema()
        df.show(truncate=False)
        
        # Note: By default, toDF() function creates column names as “_1” and “_2”
        
    #2. toDF() has another signature that takes arguments to define column names as
        deptColumns = ["dept_name","dept_id"]
        df2 = rdd.toDF(deptColumns)
        df2.printSchema()
        df2.show(truncate=False)
        
    #3. Using PySpark createDataFrame() function
        deptDF = spark.createDataFrame(rdd, schema = deptColumns)
        deptDF.printSchema()
        deptDF.show(truncate=False)
        
    #4. Using createDataFrame() with StructType schema
        from pyspark.sql.types import StructType,StructField, StringType
        deptSchema = StructType([StructField('dept_name', StringType(), True),StructField('dept_id', StringType(), True)])
        deptDF1 = spark.createDataFrame(rdd, schema = deptSchema)
        deptDF1.printSchema()
        deptDF1.show(truncate=False)

In [None]:
# Complete Example
import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

dept = [("Finance",10),("Marketing",20),("Sales",30),("IT",40)]
rdd = spark.sparkContext.parallelize(dept)

df = rdd.toDF()
df.printSchema()
df.show(truncate=False)

deptColumns = ["dept_name","dept_id"]
df2 = rdd.toDF(deptColumns)
df2.printSchema()
df2.show(truncate=False)

deptDF = spark.createDataFrame(rdd, schema = deptColumns)
deptDF.printSchema()
deptDF.show(truncate=False)

from pyspark.sql.types import StructType,StructField, StringType
deptSchema = StructType([       
    StructField('dept_name', StringType(), True),
    StructField('dept_id', StringType(), True)
])

deptDF1 = spark.createDataFrame(rdd, schema = deptSchema)
deptDF1.printSchema()
deptDF1.show(truncate=False)

In [None]:
#Creates Empty RDD
emptyRDD = spark.sparkContext.emptyRDD()
print(emptyRDD)