In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

In [None]:
df = pd.read_csv(r"C:\Users\Tanishq\Downloads\Sample - Superstore.csv.zip",compression="zip",encoding="latin1")
df.head()


In [None]:
df.info


In [None]:
df.info()


In [None]:
df.describe()

In [None]:
df['Order Date'] = pd.to_datetime(df['Order Date'])
df = df.sort_values('Order Date')
df=df.set_index('Order Date')
df.columns

In [None]:
monthly_sales = df.resample('M')['Sales'].sum() #groups the data by month.

In [None]:
plt.figure(figsize=(12,4))
plt.plot(monthly_sales, label='Monthly Sales')
plt.title('Monthly Total Sales')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
train = monthly_sales[:-12] # take everything except the last 12 months
test = monthly_sales[-12:]  # take only the last 12 months

In [None]:
# Naive forecast = previous month value
naive_preds = train.shift(1).iloc[-12:]

mae_naive = mean_absolute_error(test, naive_preds)

mae_naive


In [None]:
lr_df = pd.DataFrame(monthly_sales, columns=['Sales'])

# Lag features
lr_df['lag_1'] = lr_df['Sales'].shift(1)
lr_df['lag_3'] = lr_df['Sales'].shift(3)
lr_df['lag_6'] = lr_df['Sales'].shift(6)

# Rolling averages
lr_df['rolling_3'] = lr_df['Sales'].rolling(3).mean()
lr_df['rolling_6'] = lr_df['Sales'].rolling(6).mean()

# Seasonality
lr_df['month'] = lr_df.index.month

# Remove missing rows caused by lags
lr_df.dropna(inplace=True)

lr_df.head()


In [None]:
train_lr = lr_df.iloc[:-12]
test_lr  = lr_df.iloc[-12:]

X_train = train_lr.drop('Sales', axis=1)
y_train = train_lr['Sales']

X_test = test_lr.drop('Sales', axis=1)
y_test = test_lr['Sales']


In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

lr_preds = lr_model.predict(X_test)


In [None]:
mae_lr = mean_absolute_error(y_test, lr_preds)

mae_lr


In [None]:
print(f"Naive Forecast MAE: {mae_naive:.2f}")
print(f"Linear Regression MAE: {mae_lr:.2f}")


In [None]:
plt.figure(figsize=(12,5))

plt.plot(test.index, test.values, label='Actual', marker='o')
plt.plot(test.index, naive_preds.values, label='Naive Forecast', marker='o')
plt.plot(test.index, lr_preds, label='Linear Regression Forecast', marker='o')

plt.title('Actual vs Forecasted Sales (Last 12 Months)')
plt.xlabel('Month')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
last_value = monthly_sales.iloc[-1]

future_dates = pd.date_range(
    start=monthly_sales.index[-1] + pd.offsets.MonthEnd(1),
    periods=6,
    freq='M'
)

future_forecast = pd.Series(
    [last_value] * 6,
    index=future_dates
)

plt.figure(figsize=(12,4))
plt.plot(monthly_sales, label='Historical Sales')
plt.plot(future_forecast, label='Future Forecast', linestyle='--')
plt.title('Future Sales Forecast (Naive Model)')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()