In [10]:
import pandas as pd
import joblib

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

import numpy as np

import math

train = pd.read_csv("../data/train_data.csv")

X = train.drop(columns=["User_ID", "Purchase"])
Y = np.log(train["Purchase"])

test = pd.read_csv("../data/test_data.csv")

X_test = test.drop(columns=["User_ID", "Purchase"])
Y_test = np.log(test["Purchase"])

In [11]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X, Y)

joblib.dump(lr, "../models/linear_model.pkl")

Y_pred = lr.predict(X_test)

print(math.sqrt(mse(Y_test, Y_pred)))
print(r2_score(Y_test, Y_pred))

1.687759272265369
0.37407083445386946


In [12]:
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)
rfr.fit(X, Y)

joblib.dump(rfr, "../models/random_forest_model.pkl")

Y_pred = rfr.predict(X_test)

print(math.sqrt(mse(Y_test, Y_pred)))
print(r2_score(Y_test, Y_pred))

0.9678034388272168
0.7941842979168563


In [13]:
# MLP Classifier

from sklearn.neural_network import MLPRegressor

mlp = MLPRegressor(hidden_layer_sizes=(10, 50, 30), activation = 'relu', solver='adam',
                   max_iter=300, random_state=20)
mlp.fit(X, Y)

joblib.dump(mlp, "../models/mlp_model.pkl")

Y_pred = mlp.predict(X_test)

print(math.sqrt(mse(Y_test, Y_pred)))
print(r2_score(Y_test, Y_pred))

0.9846310066878726
0.7869648831297031


In [14]:
validation = pd.read_csv("../data/validation_data.csv")

X_val = validation.drop(columns=["User_ID", "Purchase"])
Y_val = np.log(validation["Purchase"])

# Linear Regression
Y_pred = lr.predict(X_val)
print("Linear Regression RMSE: ", math.sqrt(mse(Y_val, Y_pred)))
print("\nLinear Regression R2: ", r2_score(Y_val, Y_pred))

# Random Forest
Y_pred = rfr.predict(X_val)
print("\nRandom Forest RMSE: ", math.sqrt(mse(Y_val, Y_pred)))
print("\nRandom Forest R2: ", r2_score(Y_val, Y_pred))

# MLP Classifier
Y_pred = mlp.predict(X_val)
print("\nMLP Classifier RMSE: ", math.sqrt(mse(Y_val, Y_pred)))
print("\nMLP Classifier R2: ", r2_score(Y_val, Y_pred))

Linear Regression RMSE:  1.660081104885919

Linear Regression R2:  0.38142769703748014

Random Forest RMSE:  0.948364497933683

Random Forest R2:  0.798125049624682

MLP Classifier RMSE:  0.9617936671488875

MLP Classifier R2:  0.7923673323869171
