In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

In [2]:
extracted_df = pd.read_csv('./Data/cleaned_data.csv')
extracted_df.shape

(11778, 6)

## Decision Tree model - kept 2023, 2024 data for test, remaining as train

In [3]:
label_encoder = LabelEncoder()

extracted_df['Product ID'] = label_encoder.fit_transform(extracted_df['Product ID'])

print(extracted_df.head())

train_data = extracted_df[extracted_df['Order Year'] < 2023]
test_data = extracted_df[extracted_df['Order Year'] >= 2023]

print(train_data.shape,test_data.shape)
X_train = train_data[['Product ID', 'Order Month', 'Order Year', 'Product Unit Selling Price']]
y_train = train_data['Product Quantity']

X_test = test_data[['Product ID', 'Order Month', 'Order Year', 'Product Unit Selling Price']]
y_test = test_data['Product Quantity']

X = train_data[['Product ID', 'Order Month', 'Order Year', 'Product Unit Selling Price']]
y = train_data['Product Quantity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

   Product ID  Order Month  Order Year  Product Unit Selling Price  \
0           0            9        2015                        0.00   
1           1            4        2013                        3.95   
2           1            4        2014                        3.95   
3           1            4        2015                        4.10   
4           1            4        2016                        4.10   

   Product Quantity  Total Selling Price  
0                 5                 0.00  
1                13                51.35  
2                 8                31.60  
3                16                65.60  
4                20                82.00  
(10507, 6) (1271, 6)


In [6]:
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)

Mean Squared Error: 507.89724072312083
Root Mean Squared Error: 22.53657562104591
Mean Absolute Error: 9.219790675547099
R-squared: 0.10952532606119714


In [7]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)

Mean Squared Error: 541.7953445476443
Root Mean Squared Error: 23.276497686457134
Mean Absolute Error: 10.572621619076587
R-squared: 0.05009321946556056
