In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error


df = pd.read_csv('sales_data.csv')

In [2]:
df['Date'] = pd.to_datetime(df['Date'])

In [3]:
df['Date'] = df['Date'].dt.year


In [4]:
df.rename(columns={'Date': 'Year'}, inplace=True)

In [5]:
numeric_columns = ['Inventory Level', 'Units Sold', 'Units Ordered', 'Price', 'Discount', 'Competitor Pricing', 'Year', 'Promotion']
categorical_columns = ['Store ID', 'Product ID', 'Category', 'Region', 'Weather Condition', 'Seasonality']


In [6]:
X = df[numeric_columns + categorical_columns]
y = df['Demand']

In [7]:
prepocesing = ColumnTransformer(transformers=[(
    'num', Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', MinMaxScaler())
    ]), numeric_columns),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
])

In [8]:
model = Pipeline(steps=[
    ('Preprocesing', prepocesing),
    ('regressor', LinearRegression())
])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=123)
model.fit(X_train, y_train)

In [10]:
y_pred = model.predict(X_test)

residual = y_test - y_pred
print('Residual mean:',residual.abs().mean())
print('MSE:', mean_squared_error(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))

Residual mean: 17.086431114392735
MSE: 527.4542056359727
MAE: 17.086431114392735


In [11]:
#Example prediction
data = [{
    'Inventory Level': 210,
    'Units Sold': 95,
    'Units Ordered': 180,
    'Price': 64.50,
    'Discount': 12,
    'Competitor Pricing': 66.00,
    'Year': 2025,
    'Promotion': 1,
    'Store ID': 'S999',
    'Product ID': 'P0999',
    'Category': 'Accessories',
    'Region': 'West',
    'Weather Condition': 'Rainy',
    'Seasonality': 'Spring'
}]

df_test = pd.DataFrame(data)

df_test_pred = model.predict(df_test)
print(df_test_pred)

[109.05989643]


In [12]:
#Saving the model
import joblib
import joblib
joblib.dump(model, 'model.pkl')


['model.pkl']