### Load and Prepare Data

In [None]:
import pandas as pd
import numpy as np

# Load data
calendar = pd.read_csv('C:\Users\wwwsu\Desktop\All folders\Logistics_demand\data\raw\calendar.csv')
sell_prices = pd.read_csv('C:\Users\wwwsu\Desktop\All folders\Logistics_demand\data\raw\sell_prices.csv')
sales_train = pd.read_csv('C:\Users\wwwsu\Desktop\All folders\Logistics_demand\data\raw\sales_train_evaluation.csv')

# Filter for one store (e.g., CA_1) or department for memory efficiency
sales_train = sales_train[sales_train['store_id'] == 'CA_1'].reset_index(drop=True)

### Melt Sales Data to Long Format 

In [None]:
id_cols = ['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']
value_cols = [col for col in sales_train.columns if col.startswith('d_')]
sales_long = pd.melt(
    sales_train,
    id_vars=id_cols,
    value_vars=value_cols,
    var_name='d',
    value_name='sales'
)

### Merge Calendar and Sell Prices

In [None]:
sales_long = sales_long.merge(calendar, on='d', how='left')
sales_long = sales_long.merge(sell_prices, on=['store_id', 'item_id', 'wm_yr_wk'], how='left')


### Downcast and Convert to Category

In [None]:
# Downcast numerics
for col in sales_long.select_dtypes('number').columns:
    sales_long[col] = pd.to_numeric(sales_long[col], downcast='float')

# Convert strings to category
for col in ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id', 'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2', 'weekday']:
    if col in sales_long.columns:
        sales_long[col] = sales_long[col].astype('category')


### Feature Engineering 

In [None]:
# Lag features
for lag in [1, 7, 14, 28]:
    sales_long[f'lag_{lag}'] = sales_long.groupby('id')['sales'].shift(lag).astype(np.float16)

# Rolling means
for window in [7, 28]:
    sales_long[f'rolling_mean_{window}'] = sales_long.groupby('id')['sales'].shift(1).rolling(window).mean().astype(np.float16)

# Event flags
for event in sales_long['event_name_1'].dropna().unique():
    sales_long[f'event_{event}'] = (sales_long['event_name_1'] == event).astype(np.int8)
for event in sales_long['event_name_2'].dropna().unique():
    sales_long[f'event_{event}'] = (sales_long['event_name_2'] == event).astype(np.int8)

# SNAP flag
sales_long['is_snap'] = sales_long[['snap_CA','snap_TX','snap_WI']].max(axis=1).astype(np.int8)

# Drop NA (from lag/rolling)
sales_long = sales_long.dropna().reset_index(drop=True)


### Aggregate for Weekly Forecasting

In [None]:
weekly_agg = sales_long.groupby('wm_yr_wk').agg({
    'sales': 'sum',
    'sell_price': 'mean',
    'month': 'first',
    'year': 'first',
    'is_snap': 'sum',
    'lag_1': 'mean',
    'lag_7': 'mean',
    'lag_14': 'mean',
    'lag_28': 'mean',
    'rolling_mean_7': 'mean',
    'rolling_mean_28': 'mean'
    # You can add event columns here if needed
}).reset_index()
weekly_agg = weekly_agg.dropna().reset_index(drop=True)


### Train/Test Split

In [None]:
# Last 5 weeks for test
train = weekly_agg.iloc[:-5]
test = weekly_agg.iloc[-5:]

features = [
    'sell_price', 'month', 'year', 'is_snap',
    'lag_1', 'lag_7', 'lag_14', 'lag_28',
    'rolling_mean_7', 'rolling_mean_28'
]
target = 'sales'

X_train = train[features]
y_train = train[target]
X_test = test[features]
y_test = test[target]


###  Train LightGBM Model

In [None]:
from lightgbm import LGBMRegressor
model = LGBMRegressor(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(X_train, y_train)


### Predict, Evaluate, and Plot

In [None]:
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

preds = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, preds))
print(f'RMSE: {rmse:.2f}')

plt.figure(figsize=(10, 6))
plt.plot(test['wm_yr_wk'], y_test, marker='o', label='Actual Sales')
plt.plot(test['wm_yr_wk'], preds, marker='x', label='Predicted Sales')
plt.xlabel('wm_yr_wk')
plt.ylabel('Sales')
plt.title('Aggregated Weekly Sales Forecast (Next 5 Weeks)')
plt.legend()
plt.grid(True)
plt.show()
