In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score ,  mean_absolute_error
import joblib

In [14]:
df = pd.read_csv("data/cleaned_sales_data.csv")
df.head()

Unnamed: 0,date,units_sold,price,discount,month,day_of_week,is_weekend,competitor_price
0,2023-01-31,3,106.59,0.0,1,1,0,114.889809
1,2023-12-30,1,251.37,0.05,12,5,1,262.207677
2,2022-05-10,3,35.03,0.1,5,1,0,34.773672
3,2023-07-18,5,33.58,0.15,7,1,0,34.275574
4,2023-02-04,2,515.64,0.25,2,5,1,510.883201


In [16]:
df['log_units_sold'] = np.log1p(df['units_sold'])

In [18]:
features = ['price','discount','competitor_price','month','is_weekend']
target = 'log_units_sold'

In [20]:
X = df[features]   # inputs
y = df[target]     # output we predict

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
model = LinearRegression()
model.fit(X_train, y_train)

In [26]:
pred = model.predict(X_test)
print("R² Score:", r2_score(y_test, pred))
print("MAE (Avg error in units):", mean_absolute_error(y_test, pred))

R² Score: -5.07008157724087e-05
MAE (Avg error in units): 0.3354328474261596


In [28]:
coefficients = pd.DataFrame({'Feature': features,'Impact_on_Demand': model.coef_})

print("\nFeature Impact:")
print(coefficients)


Feature Impact:
            Feature  Impact_on_Demand
0             price          0.000089
1          discount         -0.002543
2  competitor_price         -0.000078
3             month          0.000343
4        is_weekend          0.001147


In [30]:
joblib.dump(model,"data/demand_model.pkl")

['data/demand_model.pkl']