In [0]:
# Load CSV into Spark DataFrame
df = spark.read.option("header", "true").option("inferSchema", "true") \
    .csv("/Workspace/Users/parthasarathy_1@msn.com/Sales.csv")

# Preview
display(df)

# Inspect schema
df.printSchema()

In [0]:
df.createOrReplaceTempView("sales")

In [0]:
%sql
SELECT `Order Date`,
       SUM(`Total Revenue`) AS TotalRevenue,
       SUM(`Total Profit`) AS TotalProfit
FROM sales
GROUP BY `Order Date`
ORDER BY `Order Date`

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Run SQL in Python
result_df = spark.sql("""
  SELECT `Order Date`,
       SUM(`Total Revenue`) AS TotalRevenue,
       SUM(`Total Profit`) AS TotalProfit
FROM sales
GROUP BY `Order Date`
ORDER BY `Order Date`
""")

# Convert to Pandas
pdf = result_df.toPandas()

# Convert Date column to datetime
pdf["Order Date"] = pd.to_datetime(pdf["Order Date"])

# Plot Revenue & Profit over time
plt.figure(figsize=(12,6))
sns.lineplot(x="Order Date", y="TotalRevenue", data=pdf, label="Total Revenue", color="blue")
sns.lineplot(x="Order Date", y="TotalProfit", data=pdf, label="Total Profit", color="green")

plt.xlabel("Order Date")
plt.ylabel("Amount")
plt.title("Time-Series Report: Revenue & Profit Over Time")
plt.legend()
plt.xticks(rotation=45)
plt.show()