In [19]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the cleaned dataset from Phase 1
df = pd.read_csv('C:/Users/Saisa/Downloads/Walmart_Sales/data/processed/processed_sales.csv', parse_dates=['date'])

# Sort by store and date
df = df.sort_values(by=['store', 'date'])


In [20]:
# Date Parts
df['day'] = df['date'].dt.day
df['week'] = df['date'].dt.isocalendar().week
df['month'] = df['date'].dt.month
df['quarter'] = df['date'].dt.quarter
df['year'] = df['date'].dt.year
df['weekday'] = df['date'].dt.weekday
df['is_weekend'] = df['weekday'].isin([5, 6]).astype(int)



In [21]:
# Lag Features
for lag in [1, 2, 4, 13, 26, 52]:
    df[f'lag_{lag}'] = df.groupby('store')['weekly_sales'].shift(lag)


In [22]:
# Rolling averages
for window in [4, 13, 26, 52]:
    df[f'sales_roll_{window}'] = (
        df.groupby('store')['weekly_sales']
        .shift(1)
        .rolling(window=window)
        .mean()
    )

In [23]:
# Sales difference 
df['sales_diff_1'] = df.groupby('store')['weekly_sales'].diff(1)
df['sales_diff_4'] = df.groupby('store')['weekly_sales'].diff(4)

In [24]:
# Cumulative Sales

df['cumulative_sales'] = df.groupby('store')['weekly_sales'].cumsum()


In [25]:
# Store-wise average sales
df['avg_sales_per_store'] = df.groupby('store')['weekly_sales'].transform('mean')


In [26]:
# Promotion Features
df['promo_last_week'] = df.groupby('store')['is_holiday'].shift(1)
df['promo_next_week'] = df.groupby('store')['is_holiday'].shift(-1)


In [28]:
# Label Encoding

le_store = LabelEncoder()
le_dept = LabelEncoder()

if 'store' in df.columns:
    df['store_encoded'] = le_store.fit_transform(df['store'])

if 'dept' in df.columns:
    df['dept_encoded'] = le_dept.fit_transform(df['dept'])


In [29]:
# Missing Values

df.fillna(0, inplace=True)

In [30]:
output_path = 'C:/Users/Saisa/Downloads/Walmart_Sales/data/processed/feature_engineered_sales.csv'
df.to_csv(output_path, index=False)
print("✅ Feature engineering completed and file saved to:")
print(output_path)

✅ Feature engineering completed and file saved to:
C:/Users/Saisa/Downloads/Walmart_Sales/data/processed/feature_engineered_sales.csv


In [31]:
df.head()

Unnamed: 0,store,date,weekly_sales,holiday_flag,temperature,fuel_price,cpi,unemployment,year,month,...,sales_roll_13,sales_roll_26,sales_roll_52,sales_diff_1,sales_diff_4,cumulative_sales,avg_sales_per_store,promo_last_week,promo_next_week,store_encoded
14,1,2010-01-10,1453329.5,0,71.89,2.603,211.671989,7.838,2010,1,...,0.0,0.0,0.0,0.0,0.0,1453329.5,1601845.0,0.0,0.0,0
4,1,2010-02-04,1594968.28,0,62.27,2.719,210.82045,7.808,2010,2,...,0.0,0.0,0.0,141638.78,0.0,3048297.78,1601845.0,0.0,0.0,0
9,1,2010-02-07,1492418.14,0,80.91,2.669,211.223533,7.787,2010,2,...,0.0,0.0,0.0,-102550.14,0.0,4540715.92,1601845.0,0.0,0.0,0
12,1,2010-03-09,1540163.53,0,81.21,2.577,211.531248,7.787,2010,3,...,0.0,0.0,0.0,47745.39,0.0,6080879.45,1601845.0,0.0,0.0,0
18,1,2010-03-12,1548033.78,0,49.27,2.708,211.607193,7.838,2010,3,...,0.0,0.0,0.0,7870.25,94704.28,7628913.23,1601845.0,0.0,0.0,0
