In [92]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

data = pd.read_csv("/Users/ivyadiele/Downloads/Walmart.csv")

df = pd.DataFrame(data)

df.head()

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment
0,1,05-02-2010,1643690.9,0,42.31,2.572,211.096358,8.106
1,1,12-02-2010,1641957.44,1,38.51,2.548,211.24217,8.106
2,1,19-02-2010,1611968.17,0,39.93,2.514,211.289143,8.106
3,1,26-02-2010,1409727.59,0,46.63,2.561,211.319643,8.106
4,1,05-03-2010,1554806.68,0,46.5,2.625,211.350143,8.106


In [9]:
df = df.drop_duplicates()

missing_values = df.isnull().sum()

print(missing_values)



Store           0
Date            0
Weekly_Sales    0
Holiday_Flag    0
Temperature     0
Fuel_Price      0
CPI             0
Unemployment    0
dtype: int64


In [96]:
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')

df['Month'] = df['Date'].dt.month
df['Week'] = df['Date'].dt.isocalendar().week

df = df.sort_values(by=['Store', 'Date'])

df['Lag_Weekly_Sales'] = df.groupby('Store')['Weekly_Sales'].shift(1)

df['Lag_Weekly_Sales']= df['Lag_Weekly_Sales'].fillna(df['Weekly_Sales'].mean())


features = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment', 'Holiday_Flag', 'Month', 'Week', 'Lag_Weekly_Sales']

X = df[features]
y = df['Weekly_Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R2 Score:{r2:.2f}")
print(f"RMSE: {rmse:.2f}")

R2 Score:0.91
RMSE: 172995.84


In [90]:
df.head()

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment,Month,Week,Lag_Weekly_Sales
0,1,2010-02-05,1643690.9,0,42.31,2.572,211.096358,8.106,2,5,
1,1,2010-02-12,1641957.44,1,38.51,2.548,211.24217,8.106,2,6,1643690.9
2,1,2010-02-19,1611968.17,0,39.93,2.514,211.289143,8.106,2,7,1641957.44
3,1,2010-02-26,1409727.59,0,46.63,2.561,211.319643,8.106,2,8,1611968.17
4,1,2010-03-05,1554806.68,0,46.5,2.625,211.350143,8.106,3,9,1409727.59
