In [13]:
from pyspark.sql import SparkSession;
from pyspark.sql.functions import to_date,col,desc

In [14]:
# 🔧 SOLUTION 1: Check if Spark session exists, create if needed
def get_spark_session():
    try:
        # Try to get existing active session
        spark = SparkSession.getActiveSession()
        if spark is None:
            # Create new session if none exists
            spark = (
                SparkSession.builder.appName("sparkDataframe")
                .master("local[*]")
                .config("spark.executor.memory", "2g")
                .config("spark.driver.memory", "2g")
                .getOrCreate()
            )
        return spark
    except:
        # If anything goes wrong, create fresh session
        spark = (
            SparkSession.builder.appName("sparkDataframe")
            .config("spark.executor.memory", "2g")
            .config("spark.driver.memory", "2g")
            .getOrCreate()
        )
        return spark

In [15]:
# Get or create Spark session
spark = get_spark_session()

In [16]:
print("✅ Spark Session Status:")
print(f"   App Name: {spark.sparkContext.appName}")
print(f"   Master: {spark.sparkContext.master}")
print(f"   Spark Version: {spark.version}")
print(f"   Active: {not spark.sparkContext._jsc.sc().isStopped()}")

✅ Spark Session Status:
   App Name: sparkDataframe
   Master: local[*]
   Spark Version: 4.0.0
   Active: True


In [17]:
retail_orders_data = [
    {"OrderID": "ORD001", "CustomerID": "CUST101", "ProductID": "PROD003", "ProductName": "Mouse XYZ", "Quantity": 2, "UnitPrice": 25.50, "OrderDate": "2024-01-15", "Country": "USA"},
    {"OrderID": "ORD002", "CustomerID": "CUST102", "ProductID": "PROD002", "ProductName": "Smartphone B", "Quantity": 1, "UnitPrice": 850.00, "OrderDate": "2024-01-16", "Country": "Canada"},
    {"OrderID": "ORD003", "CustomerID": "CUST101", "ProductID": "PROD004", "ProductName": "Keyboard Pro", "Quantity": 1, "UnitPrice": 75.00, "OrderDate": "2024-01-17", "Country": "USA"},
    {"OrderID": "ORD003", "CustomerID": "CUST101", "ProductID": "PROD005", "ProductName": "Webcam HD", "Quantity": 1, "UnitPrice": 49.99, "OrderDate": "2024-01-17", "Country": "USA"},
    {"OrderID": "ORD004", "CustomerID": "CUST103", "ProductID": "PROD001", "ProductName": "Laptop A", "Quantity": 1, "UnitPrice": 1200.00, "OrderDate": "2024-01-18", "Country": "UK"},
    {"OrderID": "ORD004", "CustomerID": "CUST103", "ProductID": "PROD006", "ProductName": "Monitor 27in", "Quantity": 1, "UnitPrice": 300.00, "OrderDate": "2024-01-18", "Country": "UK"},
    {"OrderID": "ORD005", "CustomerID": "CUST104", "ProductID": "PROD007", "ProductName": "Headphones ANC", "Quantity": 1, "UnitPrice": 199.99, "OrderDate": "2024-01-19", "Country": "Australia"},
    {"OrderID": "ORD006", "CustomerID": "CUST105", "ProductID": "PROD008", "ProductName": "External SSD 1TB", "Quantity": 1, "UnitPrice": 150.00, "OrderDate": "2024-01-20", "Country": "USA"},
    {"OrderID": "ORD006", "CustomerID": "CUST105", "ProductID": "PROD003", "ProductName": "Mouse XYZ", "Quantity": 1, "UnitPrice": 25.50, "OrderDate": "2024-01-20", "Country": "USA"},
    {"OrderID": "ORD007", "CustomerID": "CUST102", "ProductID": "PROD009", "ProductName": "Printer Laser", "Quantity": 1, "UnitPrice": 220.00, "OrderDate": "2024-01-21", "Country": "Canada"},
    {"OrderID": "ORD008", "CustomerID": "CUST106", "ProductID": "PROD010", "ProductName": "Smartwatch X", "Quantity": 1, "UnitPrice": 299.99, "OrderDate": "2024-01-22", "Country": "Germany"},
    {"OrderID": "ORD009", "CustomerID": "CUST101", "ProductID": "PROD002", "ProductName": "Smartphone B", "Quantity": 1, "UnitPrice": 850.00, "OrderDate": "2024-01-23", "Country": "USA"},
    {"OrderID": "ORD010", "CustomerID": "CUST107", "ProductID": "PROD004", "ProductName": "Keyboard Pro", "Quantity": 2, "UnitPrice": 75.00, "OrderDate": "2024-01-24", "Country": "France"},
    {"OrderID": "ORD011", "CustomerID": "CUST108", "ProductID": "PROD001", "ProductName": "Laptop A", "Quantity": 1, "UnitPrice": 1200.00, "OrderDate": "2024-01-25", "Country": "USA"},
    {"OrderID": "ORD011", "CustomerID": "CUST108", "ProductID": "PROD005", "ProductName": "Webcam HD", "Quantity": 1, "UnitPrice": 49.99, "OrderDate": "2024-01-25", "Country": "USA"},
    {"OrderID": "ORD012", "CustomerID": "CUST109", "ProductID": "PROD007", "ProductName": "Headphones ANC", "Quantity": 1, "UnitPrice": 199.99, "OrderDate": "2024-01-26", "Country": "Japan"},
    {"OrderID": "ORD013", "CustomerID": "CUST101", "ProductID": "PROD003", "ProductName": "Mouse XYZ", "Quantity": 3, "UnitPrice": 25.50, "OrderDate": "2024-01-27", "Country": "USA"},
    {"OrderID": "ORD014", "CustomerID": "CUST110", "ProductID": "PROD006", "ProductName": "Monitor 27in", "Quantity": 1, "UnitPrice": 300.00, "OrderDate": "2024-01-28", "Country": "UK"},
    {"OrderID": "ORD015", "CustomerID": "CUST102", "ProductID": "PROD008", "ProductName": "External SSD 1TB", "Quantity": 1, "UnitPrice": 150.00, "OrderDate": "2024-01-29", "Country": "Canada"},
    {"OrderID": "ORD016", "CustomerID": "CUST103", "ProductID": "PROD009", "ProductName": "Printer Laser", "Quantity": 1, "UnitPrice": 220.00, "OrderDate": "2024-01-30", "Country": "UK"},
    {"OrderID": "ORD017", "CustomerID": "CUST104", "ProductID": "PROD010", "ProductName": "Smartwatch X", "Quantity": 1, "UnitPrice": 299.99, "OrderDate": "2024-01-31", "Country": "Australia"}
]

In [18]:
df_list=spark.createDataFrame(retail_orders_data)

In [19]:
df_list=df_list.withColumn("orderDate",to_date(col("orderDate"),"yyyy-MM-dd"))

In [20]:
df_list.printSchema()

root
 |-- Country: string (nullable = true)
 |-- CustomerID: string (nullable = true)
 |-- orderDate: date (nullable = true)
 |-- OrderID: string (nullable = true)
 |-- ProductID: string (nullable = true)
 |-- ProductName: string (nullable = true)
 |-- Quantity: long (nullable = true)
 |-- UnitPrice: double (nullable = true)



In [None]:
#df_list.show()

In [None]:
usa_orders=df_list.filter(col("Country")=="USA")
print(usa_orders.collect())

In [None]:
quantity_product=df_list.filter(col("quantity")>1)

In [None]:
total_revenu=df_list.withColumn("totalRevenu",col("quantity")*col("price"))
total_revenu.select("order_id","totalRevenu").show()

In [None]:
#spark.stop()