# LLM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
df = pd.read_excel('/kaggle/input/dataflow/final.xlsx')
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')
df.set_index('Date', inplace=True)

In [4]:
df.drop(columns=["COGS", "Product", "State", "Unnamed: 0","Category", "Segment", "City", "Region", "District"])

Unnamed: 0_level_0,ProductID,Zip,Units,Revenue
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-04,604,47946,1,5767.65
2010-07-04,1859,20008,1,1877.40
2010-07-04,485,44830,1,10218.60
2010-07-04,485,55008,1,10218.60
2010-07-04,485,47380,1,10218.60
...,...,...,...,...
2022-07-01,2269,21012,1,4718.70
2022-07-01,2269,48313,1,4718.70
2022-07-01,465,40299,1,14174.37
2022-07-01,506,40291,1,16316.37


In [7]:
def compute_metrics(actual, forecast):
    rmse = np.sqrt(mean_squared_error(actual, forecast))

    denom = np.where(actual == 0, 1, actual)
    mape = np.mean(np.abs((actual - forecast) / denom)) * 100
    r2 = r2_score(actual, forecast)
    return rmse, mape, r2

In [None]:
monthly_sales = df['Units'].resample('M').sum()
monthly_revenue = df['Revenue'].resample('M').sum()

train_revenue = monthly_revenue[monthly_revenue.index.year <= 2020]
test_revenue  = monthly_revenue[monthly_revenue.index.year >= 2021]

train_sales = monthly_sales[monthly_sales.index.year <= 2020]
test_sales  = monthly_sales[monthly_sales.index.year >= 2021]

In [2]:
# !pip install -c conda-forge fbprophet -y
!pip install prophet
!pip install --upgrade plotly



In [None]:
from prophet import Prophet

In [None]:
monthly_revenue = df['Revenue'].resample('M').sum()
monthly_sales   = df['Units'].resample('M').sum()

train_revenue = monthly_revenue[monthly_revenue.index.year <= 2020]
test_revenue  = monthly_revenue[monthly_revenue.index.year >= 2021]
train_sales   = monthly_sales[monthly_sales.index.year <= 2020]
test_sales    = monthly_sales[monthly_sales.index.year >= 2021]

train_rev_df = train_revenue.reset_index().rename(columns={'Date':'ds', 'Revenue':'y'})
revenue_model = Prophet()
revenue_model.fit(train_rev_df)
future_rev = revenue_model.make_future_dataframe(periods=len(test_revenue), freq='M')
forecast_revenue = revenue_model.predict(future_rev)
train_sales_df = train_sales.reset_index().rename(columns={'Date':'ds', 'Units':'y'})
sales_model = Prophet()
sales_model.fit(train_sales_df)
future_sales = sales_model.make_future_dataframe(periods=len(test_sales), freq='M')
forecast_sales = sales_model.predict(future_sales)

forecast_revenue.set_index('ds', inplace=True)
common_rev_index = forecast_revenue.index.intersection(test_revenue.index)
pred_rev = forecast_revenue.loc[common_rev_index, 'yhat']
actual_rev = test_revenue.loc[common_rev_index]
rmse_rev, mape_rev, r2_rev = compute_metrics(actual_rev.values, pred_rev.values)
print("Revenue Forecast Metrics - RMSE: {:.2f}, MAPE: {:.2f}%, R2: {:.2f}".format(rmse_rev, mape_rev, r2_rev))

forecast_sales.set_index('ds', inplace=True)
common_sales_index = forecast_sales.index.intersection(test_sales.index)
pred_sales = forecast_sales.loc[common_sales_index, 'yhat']
actual_sales = test_sales.loc[common_sales_index]
rmse_sales, mape_sales, r2_sales = compute_metrics(actual_sales.values, pred_sales.values)
print("Units Forecast Metrics - RMSE: {:.2f}, MAPE: {:.2f}%, R2: {:.2f}".format(rmse_sales, mape_sales, r2_sales))


  monthly_revenue = df['Revenue'].resample('M').sum()
  monthly_sales   = df['Units'].resample('M').sum()
08:54:42 - cmdstanpy - INFO - Chain [1] start processing
08:54:42 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
08:54:42 - cmdstanpy - INFO - Chain [1] start processing
08:54:42 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(


Revenue Forecast Metrics - RMSE: 12600356.11, MAPE: 57.20%, R2: -0.07
Units Forecast Metrics - RMSE: 2476.50, MAPE: 71.49%, R2: -1.59


Comment: can not find the pattern, seem to be relied heavily on the dataset being not completed

In [None]:
import logging

logging.getLogger("cmdstanpy").setLevel(logging.WARNING)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
