In [0]:
%sql
use catalog basab_catalog_retail;
use schema gold_tables;

In [0]:
%sql
select * from day_wise_sale

In [0]:
%sql
select store_id,total_revenue from pos_stores_summary
order by total_revenue desc

In [0]:
%sql
select * from daily_sales limit 3

In [0]:
pos_df=spark.read.table('silver_tables.pos_silver')

In [0]:
pos_df.limit(2).display()

In [0]:
products_df=spark.read.table('silver_tables.products_silver')

In [0]:
pos_join_products = (
    pos_df.join(products_df, pos_df.sku_id == products_df.sku_id, "inner")
          .drop(products_df.sku_id)   # 👈 keeps pos_df.sku_id
)

In [0]:
pos_join_products.limit(3).display()

In [0]:
pos_join_products.write.format('delta').mode('overwrite').saveAsTable('silver_tables.pos_join_products')

In [0]:
%sql
create or replace table basab_catalog_retail.gold_tables.store_category_revenue as

select store_id,category,round(sum(units_sold*price),2) as total_category_revenue
from basab_catalog_retail.silver_tables.pos_join_products
where category != 'Unknown'
group by store_id,category
order by store_id,total_category_revenue desc


In [0]:
store_cat_rev_df=spark.table('gold_tables.store_category_revenue')

In [0]:
store_cat_rev_df.display()

In [0]:
inventory_df=spark.table('silver_tables.inventory_silver')

In [0]:
inventory_join_products=inventory_df.join(products_df, inventory_df.sku_id == products_df.sku_id, "inner").drop("products_df.sku_id")

In [0]:
inventory_join_products.display()

In [0]:
inventory_join_products.printSchema()


In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
inventory_summary = (
    inventory_join_products
    .groupBy("store_id", "category")
    .agg(sum("stock_level").alias("total_stock_level")).orderBy("store_id")
)
inventory_summary.display()

In [0]:


inventory_summary2 = inventory_summary.filter(col("category") != "Unknown")


In [0]:
inventory_summary2.orderBy("store_id", desc("total_stock_level")).display()

In [0]:
store_cat_rev_df.display()

In [0]:
store_cat_full_df = store_cat_rev_df.join(
    inventory_summary2,
    on=["store_id", "category"],
    how="left"   # Use left join to keep all revenue rows even if stock is missing
)

store_cat_full_df.display()

In [0]:
store_cat_full_df2=store_cat_full_df.fillna({
    "total_stock_level": 0
})



In [0]:
store_cat_full_df2.display()

In [0]:
store_cat_full_df2.write.format('delta').mode('overwrite').saveAsTable('gold_tables.Store_Category_Report')

In [0]:
threshold = 500  # for example, if stock < 500 → High

store_cat_full_df3 = store_cat_full_df2.withColumn(
    "replenish_needed",
    when(col("total_stock_level") < threshold, "High")
    .otherwise("Low")
)

store_cat_full_df3.display()

In [0]:
store_cat_full_df3.filter(col("replenish_needed") == "High").display()

In [0]:
%sql
select * from basab_catalog_retail.gold_tables.daily_sales

In [0]:
%sql
create or replace table basab_catalog_retail.gold_tables.date_revenue_report
as
select date ,total_revenue
from basab_catalog_retail.gold_tables.daily_sales

In [0]:
%sql
select * from basab_catalog_retail.gold_tables.date_revenue_report

In [0]:
import pandas as pd
daily_revenue_pd = spark.table('basab_catalog_retail.gold_tables.date_revenue_report').toPandas()
daily_revenue_pd['date'] = pd.to_datetime(daily_revenue_pd['date'])

In [0]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.plot(daily_revenue_pd['date'], daily_revenue_pd['total_revenue'], marker='o')
plt.title("Daily Total Revenue")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.xticks(rotation=45)
plt.grid(True)
plt.show()


In [0]:
%sql
select * from basab_catalog_retail.gold_tables.date_revenue_report
where total_revenue>(select avg(total_revenue) from basab_catalog_retail.gold_tables.date_revenue_report)
order by total_revenue desc

In [0]:
%sql
select * from basab_catalog_retail.silver_tables.holiday_silver

In [0]:
%sql
use catalog basab_catalog_retail;
use schema gold_tables;


In [0]:
%sql
select * from
date_revenue_report left join silver_tables.holiday_silver
on date_revenue_report.date=silver_tables.holiday_silver.date

In [0]:
%sql
with temp1 as (
select date,store_id,sku_id,round(sum(units_sold*price),2) as total_revenue
from silver_tables.pos_silver
where date!='1900-01-01'
group by date , store_id , sku_id 
order by date , store_id , sku_id
)
select t.date , t.store_id , t.sku_id , t.total_revenue , i.stock_level from
temp1 as t  join silver_tables.inventory_silver as i
on t.sku_id=i.sku_id
order by date , store_id , sku_id

    



In [0]:
%sql
create or replace table basab_catalog_retail.gold_tables.time_series_inventory
AS
WITH temp1 AS (
    SELECT 
        date,
        store_id,
        sku_id,
        ROUND(SUM(units_sold * price), 2) AS total_revenue
    FROM silver_tables.pos_silver
    WHERE date != '1900-01-01'
    GROUP BY date, store_id, sku_id
),
joined AS (
    SELECT 
        t.date,
        t.store_id,
        t.sku_id,
        t.total_revenue,
        i.stock_level,
        ROW_NUMBER() OVER (
            PARTITION BY t.date, t.store_id, t.sku_id 
            ORDER BY i.stock_level DESC  -- or any column to define "last"
        ) AS rn
    FROM temp1 t
    JOIN silver_tables.inventory_silver i
        ON t.sku_id = i.sku_id
)
SELECT date, store_id, sku_id, total_revenue, stock_level
FROM joined
WHERE rn = 1
ORDER BY date, store_id, sku_id;


In [0]:
df=spark.table('time_series_inventory')

In [0]:
df.display()

In [0]:
from pyspark.sql.functions  import concat_ws, first
from pyspark.sql.functions import *

# Pivot table: rows = date, columns = sku_store, values = total_revenue
pivot_df = df.groupBy("date").pivot("sku_id").agg(first("total_revenue"))

# Sort by date
pivot_df = pivot_df.orderBy("date")



In [0]:
pivot_df.display()

In [0]:
df_filled = pivot_df.fillna(0)


In [0]:
df_filled.display()

In [0]:
!pip install prophet

In [0]:
ts = df_filled[['date','SKU00002']].toPandas()
ts.columns = ['ds','y']  # Prophet requires columns 'ds' (date) and 'y' (value)
ts['ds'] = pd.to_datetime(ts['ds'])

In [0]:
from prophet import Prophet

model = Prophet(daily_seasonality=True)  # add weekly/monthly seasonality if needed
model.fit(ts)

In [0]:
future = model.make_future_dataframe(periods=7)  # forecast next 7 days
forecast = model.predict(future)

In [0]:
forecast[['ds','yhat','yhat_lower','yhat_upper']].tail(7)


In [0]:
skus = [col for col in df_filled.columns if col != 'date']

forecast_dict = {}

for sku in skus:
    ts = df_filled[['date', sku]].toPandas()
    ts.columns = ['ds','y']
    ts['ds'] = pd.to_datetime(ts['ds'])
    
    model = Prophet(daily_seasonality=True)
    model.fit(ts)
    
    future = model.make_future_dataframe(periods=7)
    forecast = model.predict(future)
    
    forecast_dict[sku] = forecast[['ds','yhat']].tail(7)

In [0]:
forecast_dict['SKU00085']

In [0]:
forecast_dict['SKU00121']