### Creating a DataFrane from a List of Tuples (Static Values)

In [None]:
%pip install pyspark



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [6]:
from pyspark.sql import Row
from pyspark.sql import SparkSession
my_spark = SparkSession.builder.appName("DataFrameExample").getOrCreate()

# Defining the list of tuples
data = [
    Row(id=1, name="Alice", age=29),
    Row(id=2, name="Bob", age=31),
    Row(id=3,name="Cathy", age=25)
]

In [7]:
# Creating a dataframe from the list
df = my_spark.createDataFrame(data)
df.show()


+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 29|
|  2|  Bob| 31|
|  3|Cathy| 25|
+---+-----+---+



#### Creating a Dataframe with a List of Dictionaries

In [11]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Data Frames").getOrCreate()
data = [
    {"id":1, "name":"Alice", "age":29},
    {"id":2, "name":"Bob", "age":31},
    {"id":3,"name":"Cathy", "age":25}
]

# Creating a DataFrame with a List of Dictionaries
df = spark.createDataFrame(data)
df.show()

+---+---+-----+
|age| id| name|
+---+---+-----+
| 29|  1|Alice|
| 31|  2|  Bob|
| 25|  3|Cathy|
+---+---+-----+



#### How to Add Column Names to DataFrame

In [17]:
data = [
    (1, "Alice", 29),
    (2, "Bob", 31),
    (3, "Cathy", 25)
]

df = spark.createDataFrame(data)
df.show()

df.printSchema()

+---+-----+---+
| _1|   _2| _3|
+---+-----+---+
|  1|Alice| 29|
|  2|  Bob| 31|
|  3|Cathy| 25|
+---+-----+---+

root
 |-- _1: long (nullable = true)
 |-- _2: string (nullable = true)
 |-- _3: long (nullable = true)



In [13]:
# the toDF() function is used to rename columns in a DataFrame.
df2 = df.toDF("id", "name", "age")
df2.show()

+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 29|
|  2|  Bob| 31|
|  3|Cathy| 25|
+---+-----+---+



In [15]:
# Combining them all together
df3 = spark.createDataFrame(data).toDF("id", "name", "age")
df3.show()


+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 29|
|  2|  Bob| 31|
|  3|Cathy| 25|
+---+-----+---+



In [16]:
# Checking the schema of the DataFrame
df3.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- age: long (nullable = true)



#### Creating DataFrame() with Schema 

In [21]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

# Defining the schema
my_schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("name", StringType(), True),
    StructField("age", IntegerType(), True)

])


data = [
    (1, "Alice", 29),
    (2, "Bob", 31),
    (3, "Cathy", 25),
    (4, None, 25)
]

df = spark.createDataFrame(data, my_schema)
df.show()

df.printSchema()

+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 29|
|  2|  Bob| 31|
|  3|Cathy| 25|
|  4| null| 25|
+---+-----+---+

root
 |-- id: integer (nullable = false)
 |-- name: string (nullable = true)
 |-- age: integer (nullable = true)



#### How to use the show() Function

In [30]:
data = [
    (1, "Alice", 29),
    (2, "Bob", 31),
    (3, "Cathy, How are you doint today and the family is doing fine right?", 25),
    (4, None, 25)
]

df3 = spark.createDataFrame(data).toDF("id", "name", "age")
df3.show()

# Showing limited number of rows
df3.show(2)

#Truncating long strings
df.show(truncate=True, n=5)

# Avoiding truncating long strings
df3.show(truncate=False)

# Displaying data in vertical format
df3.show(vertical=True, truncate=False)

spark.stop()


+---+--------------------+---+
| id|                name|age|
+---+--------------------+---+
|  1|               Alice| 29|
|  2|                 Bob| 31|
|  3|Cathy, How are yo...| 25|
|  4|                null| 25|
+---+--------------------+---+

+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 29|
|  2|  Bob| 31|
+---+-----+---+
only showing top 2 rows

+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 29|
|  2|  Bob| 31|
|  3|Cathy| 25|
|  4| null| 25|
+---+-----+---+

+---+------------------------------------------------------------------+---+
|id |name                                                              |age|
+---+------------------------------------------------------------------+---+
|1  |Alice                                                             |29 |
|2  |Bob                                                               |31 |
|3  |Cathy, How are you doint today and the family is doing fine right?|25 |
|4  |null                              