In [27]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import numpy as np
import pickle

# Data Cleaning and grouping

In [28]:

# Load data
fact_sales = pd.read_csv('fact_sales.csv')
dim_date = pd.read_csv('dim_date.csv')

# Merge data
merged_data = pd.merge(fact_sales, dim_date, on='id_date')

# Extract necessary columns
data = merged_data[['access_date', 'number_person']]

# Convert 'access_date' to datetime using .loc to avoid the SettingWithCopyWarning
data.loc[:, 'access_date'] = pd.to_datetime(data['access_date'])


# Group by date and calculate total number of persons per day
daily_persons = data.groupby('access_date')['number_person'].sum().reset_index()

# Training Model

In [29]:

# Split data into train and test sets
train, test = train_test_split(daily_persons, test_size=0.2, random_state=42)

# Prepare features and target variable
X_train = train[['access_date']]
y_train = train['number_person']
X_test = test[['access_date']]
y_test = test['number_person']

# Convert datetime to ordinal for model training
X_train_ordinal = X_train['access_date'].apply(lambda x: x.toordinal()).values.reshape(-1, 1)
X_test_ordinal = X_test['access_date'].apply(lambda x: x.toordinal()).values.reshape(-1, 1)

# Initialize and train model
model = LinearRegression()
model.fit(X_train_ordinal, y_train)

# Predict
predictions = model.predict(X_test_ordinal)


# Evaluate

In [30]:

# Evaluate model
mae = mean_absolute_error(y_test, predictions)
print("Mean Absolute Error:", mae)

Mean Absolute Error: 182.7064430409508


# Test

In [31]:
# Make predictions for future dates
future_dates = pd.date_range(start='2023-05-06', end='2027-05-12')
future_dates_ordinal = np.array([date.toordinal() for date in future_dates]).reshape(-1, 1)
future_dates_pred = model.predict(future_dates_ordinal)
print("Predictions for future dates:", future_dates_pred)


Predictions for future dates: [369.52526989 369.44115065 369.3570314  ... 246.29057791 246.20645867
 246.12233942]


# Exporting Model

In [32]:
filename ='number_of_persons.sav'
pickle.dump(model, open(filename,'wb'))