In [None]:
import numpy as np
import pandas as pd


In [None]:
# Re-execute full logic after code environment reset

import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Define constants
n_stores = 5
n_products = 10
weeks = list(range(1, 53))
seasons = ['winter', 'spring', 'summer', 'autumn']

# Simulate dataset with predictable patterns
data = []

for store_id in range(1, n_stores + 1):
    for product_id in range(1, n_products + 1):
        prev_sales = np.random.randint(40, 70)
        for week in weeks:
            is_holiday = int(week in [1, 5, 10])
            is_weekend = int(week % 6 == 0 or week % 7 == 0)
            season = seasons[(week - 1) // 3 % 4]
            discount_percent = np.random.choice([0, 10, 20, 30])
            price_per_unit = round(10 + np.random.randn() * 2, 2)

            # Predictable influence pattern
            effect = (
                (discount_percent * 0.5) +
                (is_holiday * 8) +
                (is_weekend * 4) -
                (price_per_unit * 0.3) +
                np.random.normal(0, 2)
            )
            next_sales = max(0, prev_sales + effect)

            data.append({
                'week_of_year': week,
                'is_holiday': is_holiday,
                'is_weekend': is_weekend,
                'season': season,
                'discount_percent': discount_percent,
                'price_per_unit': price_per_unit,
                'storeid': store_id,
                'productid': product_id,
                'prev_week_sales': prev_sales,
                'next_week_sales': next_sales
            })

            prev_sales = next_sales

# Create DataFrame
predictable_df = pd.DataFrame(data)
predictable_df['season'] = predictable_df['season'].astype('category').cat.codes
predictable_df.head(20)


Unnamed: 0,week_of_year,is_holiday,is_weekend,season,discount_percent,price_per_unit,storeid,productid,prev_week_sales,next_week_sales
0,1,1,0,3,30,7.78,1,1,46.0,67.303804
1,2,0,0,3,20,9.81,1,1,67.303804,72.503148
2,3,0,0,3,0,13.16,1,1,72.503148,70.090018
3,4,0,0,1,10,10.16,1,1,70.090018,71.722985
4,5,1,0,1,10,8.84,1,1,71.722985,81.020645
5,6,0,1,1,0,12.92,1,1,81.020645,84.222075
6,7,0,1,2,0,6.55,1,1,84.222075,85.1325
7,8,0,0,2,20,8.94,1,1,85.1325,92.215549
8,9,0,0,2,30,8.59,1,1,92.215549,100.359308
9,10,1,0,0,20,7.05,1,1,100.359308,119.218269


In [None]:
df = predictable_df

In [None]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Feature lists
all_features = [
    'week_of_year', 'is_holiday', 'is_weekend', 'season',
    'discount_percent', 'price_per_unit', 'storeid', 'productid', 'prev_week_sales'
]
target = 'next_week_sales'

# Features to normalize
features_to_normalize = ['is_holiday', 'is_weekend', 'season',
                         'discount_percent', 'price_per_unit', 'prev_week_sales']

# Create a copy to avoid modifying original df
df_scaled = df.copy()

# Normalize only selected features
scaler = MinMaxScaler()
df_scaled[features_to_normalize] = scaler.fit_transform(df_scaled[features_to_normalize])


In [None]:
df_scaled.head()
df = df_scaled
df

Unnamed: 0,week_of_year,is_holiday,is_weekend,season,discount_percent,price_per_unit,storeid,productid,prev_week_sales,next_week_sales
0,1,1.0,0.0,1.000000,1.000000,0.301744,1,1,0.013971,67.303804
1,2,0.0,0.0,1.000000,0.666667,0.455648,1,1,0.063576,72.503148
2,3,0.0,0.0,1.000000,0.000000,0.709629,1,1,0.075682,70.090018
3,4,0.0,0.0,0.333333,0.333333,0.482183,1,1,0.070063,71.722985
4,5,1.0,0.0,0.333333,0.333333,0.382108,1,1,0.073866,81.020645
...,...,...,...,...,...,...,...,...,...,...
2595,48,0.0,1.0,0.000000,0.000000,0.411676,5,10,0.807613,383.665641
2596,49,0.0,1.0,1.000000,0.666667,0.767248,5,10,0.800210,395.165795
2597,50,0.0,0.0,1.000000,0.000000,0.411676,5,10,0.826987,390.201915
2598,51,0.0,0.0,1.000000,1.000000,0.564822,5,10,0.815429,404.050312


In [None]:
features = [
    'week_of_year', 'is_holiday', 'is_weekend', 'season',
    'discount_percent', 'price_per_unit', 'storeid', 'productid', 'prev_week_sales'
]
target = 'next_week_sales'


In [None]:

train = df[df['week_of_year'] <= 40]
test  = df[df['week_of_year'] > 40]


In [None]:
df['week_of_year'].max()

52

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lr = LinearRegression()
lr.fit(train[features],train[target] )
preds = lr.predict(test[features])

print("Linear Regression MSE:", mean_squared_error(test[target], preds))


Linear Regression MSE: 4.230568639327552


In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(train[features],train[target] )
preds = rf.predict(test[features])

print("Random Forest MSE:", mean_squared_error(test[target], preds))


Random Forest MSE: 116.14397785100915


In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gbr.fit(train[features],train[target] )
preds = gbr.predict(test[features])

print("Gradient Boosting MSE:", mean_squared_error(test[target], preds))


Gradient Boosting MSE: 131.50705075165982


In [None]:
import xgboost as xgb

xgb_model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_model.fit(train[features],train[target] )
preds = xgb_model.predict(test[features])

print("XGBoost MSE:", mean_squared_error(test[target], preds))


XGBoost MSE: 209.721584235696


In [None]:
# Step 1: make sure you generate predictions for all test rows
 # preds will be len(test) size

# Step 2: add predictions as a new column to the test set
test = test.copy()
test['prediction'] = preds

# Step 3: now you can filter by store/product safely
store_id = 5
product_id = 4

sample = test[(test['storeid'] == store_id) & (test['productid'] == product_id)].copy()
sample = sample.sort_values(by='week_of_year')

# Step 4: plot
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.plot(sample['week_of_year'], sample['next_week_sales'], label='Actual', marker='o')
plt.plot(sample['week_of_year'], sample['prediction'], label='Predicted', marker='x')
plt.title(f'Sales Forecast (Store {store_id}, Product {product_id})')
plt.xlabel('Week')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


NameError: name 'test' is not defined

In [None]:
import shap


In [None]:
train_shap = train.drop('next_week_sales', axis = 1)

In [None]:
for i in range(train_shap.shape[1]):
  print(train_shap.columns[i], "=", lr.coef_[i].round(5))

week_of_year = -0.00862
is_holiday = 7.96624
is_weekend = 3.93856
season = 0.12511
discount_percent = 15.2033
price_per_unit = -4.03566
storeid = -0.01267
productid = 0.0017
prev_week_sales = 430.16534
