## Feature Engineering
Creating New Features and Saving to csv File


### 2.1) Creating more lag features
lag_1 -> last week's sales,
lag_2 -> sales 2 weeks ago and so on

In [3]:
import pandas as pd 

df = pd.read_csv('../data/Walmart.csv')


for lag in [1, 2, 3, 4, 5, 6, 7]:
    df[f'lag_{lag}'] = df.groupby('Store')['Weekly_Sales'].shift(lag)  



In [8]:
df.columns = df.columns.str.lower()
df['date'] = pd.to_datetime(df['date'], format='%d-%m-%Y')

Unnamed: 0,store,date,weekly_sales,holiday_flag,temperature,fuel_price,cpi,unemployment,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
0,1,2010-02-05,1643690.90,0,42.31,2.572,211.096358,8.106,,,,,,,
1,1,2010-02-12,1641957.44,1,38.51,2.548,211.242170,8.106,1643690.90,,,,,,
2,1,2010-02-19,1611968.17,0,39.93,2.514,211.289143,8.106,1641957.44,1643690.90,,,,,
3,1,2010-02-26,1409727.59,0,46.63,2.561,211.319643,8.106,1611968.17,1641957.44,1643690.90,,,,
4,1,2010-03-05,1554806.68,0,46.50,2.625,211.350143,8.106,1409727.59,1611968.17,1641957.44,1643690.90,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6430,45,2012-09-28,713173.95,0,64.88,3.997,192.013558,8.684,723086.20,702238.27,766512.66,734297.87,718232.26,722496.93,733037.32
6431,45,2012-10-05,733455.07,0,64.89,3.985,192.170412,8.667,713173.95,723086.20,702238.27,766512.66,734297.87,718232.26,722496.93
6432,45,2012-10-12,734464.36,0,54.47,4.000,192.327265,8.667,733455.07,713173.95,723086.20,702238.27,766512.66,734297.87,718232.26
6433,45,2012-10-19,718125.53,0,56.47,3.969,192.330854,8.667,734464.36,733455.07,713173.95,723086.20,702238.27,766512.66,734297.87


### 2.2) Rolling window statistics
Rolling windows capture short-term trends

In [9]:
# Rolling means and std
df['rolling_mean_4'] = df.groupby('store')['weekly_sales'].shift(1).rolling(window=4).mean()
df['rolling_std_4']  = df.groupby('store')['weekly_sales'].shift(1).rolling(window=4).std()

df['rolling_mean_12'] = df.groupby('store')['weekly_sales'].shift(1).rolling(window=12).mean()


### 2.3) Date-based features

In [10]:
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['weekofyear'] = df['date'].dt.isocalendar().week.astype(int)
df['dayofweek'] = df['date'].dt.dayofweek

### 2.4) Holiday Lag

In [11]:
df['holiday_lag1'] = df['holiday_flag'] * df['lag_1']

### 2.5) External features smoothing

In [12]:
for col in ['temperature', 'fuel_price', 'cpi', 'unemployment']:
    df[f'{col}_rolling4'] = df[col].rolling(window=4).mean()

### 2.6) Handle missing values and saving to new file 


In [14]:
df.dropna(inplace=True)
df.to_csv('../data/walmart_features.csv', index=False)