In [None]:
# To convert a Python list into a PySpark DataFrame, use spark.createDataFrame() or convert it to an RDD first. Here's how:

from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("ListToDataFrame").getOrCreate()

# Python list
ls = [1, 2, 3, 5, 4, 7]

# Convert each element to a list (for row structure)
ls2 = [[i] for i in ls]

# Correct way: Pass ls2 directly
df = spark.createDataFrame(ls2, ["number"])

# Show DataFrame
df.show()

# Stop Spark session
spark.stop()


In [None]:
# To convert the dictionary d = {1: "Tom", 2: "Brad", 3: "Joe"} into a PySpark DataFrame, follow these steps:

from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("DictToDataFrame").getOrCreate()

# Dictionary
d = {1: "Tom", 2: "Brad", 3: "Joe"}

# Convert dictionary to list of tuples
ls = list(d.items())  # [(1, 'Tom'), (2, 'Brad'), (3, 'Joe')]

# Create DataFrame with column names
df = spark.createDataFrame(ls, ["ID", "Name"])

# Show DataFrame
df.show()

# Stop Spark session
spark.stop()


In [None]:
# To convert a list of tuples like ls = [(1, 3), (1, 4), (1, 5)] into a PySpark DataFrame, define column names during the conversion.

from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("TupleListToDataFrame").getOrCreate()

# List of tuples
ls = [(1, 3), (1, 4), (1, 5)]

# Convert list to DataFrame with column names
df = spark.createDataFrame(ls, ["col1", "col2"])

# Show DataFrame
df.show()

# Stop Spark session
spark.stop()
