# Model Training

In [8]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy
import pandas as pd
import pickle

In [9]:
# Define the data files
interaction_data_file = 'Data/interaction_data_prepared.csv'

# Import data file into dataframe
interaction_dataframe = pd.read_csv(interaction_data_file)

In [10]:
# Prepare data for Surprise using the weighted interactions
reader = Reader(rating_scale=(interaction_dataframe['SCORE'].min(), interaction_dataframe['SCORE'].max()))
data = Dataset.load_from_df(interaction_dataframe[['USER_ID', 'BRAND_ID', 'SCORE']], reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.25)

# Define the best KNN model parameters
best_knn_params = {
    'k': 10,
    'sim_options': {
        'name': 'msd',
        'user_based': False
    }
}

# Initialize the KNN model with the best parameters
best_knn = KNNBasic(k=best_knn_params['k'], sim_options=best_knn_params['sim_options'])

# Train the model on the training set
best_knn.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x160ee48d0>

In [11]:
# Evaluate the model using cross-validation
cv_results = cross_validate(best_knn, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Print the cross-validation results
print("\nCross-validation results:")
print(f"Mean RMSE: {cv_results['test_rmse'].mean()}")
print(f"Mean MAE: {cv_results['test_mae'].mean()}")

# Test the model on the test set
test_predictions = best_knn.test(testset)

# Compute and print test set RMSE and MAE
test_rmse = accuracy.rmse(test_predictions, verbose=True)
test_mae = accuracy.mae(test_predictions, verbose=True)

# Print final evaluation metrics on the test set
print(f"\nTest set results:")
print(f"Test RMSE: {test_rmse}")
print(f"Test MAE: {test_mae}")

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.1723  0.1719  0.1705  0.1695  0.1711  0.1711  0.0010  
MAE (testset)     0.1306  0.1304  0.1303  0.1289  0.1301  0.1301  0.0006  
Fit time          0.07    0.08    0.08    0.09    0.08    0.08    0.00    
Test time         0.82    0.73    0.70    0.69    0.75    0.74    0.04    

Cross-validation results:
Mean RMSE: 0.17105387444020723
Mean MAE: 0.13009017417429872
RMSE: 0.1412
MAE:  0.1039

Test set results:
Test RMSE: 0.1412021854722519
Test MAE: 0.1039464004264746


In [12]:
# Save the trained model to a file
with open('Data/knn_brand_recommendation_model.pkl', 'wb') as model_file:
    pickle.dump(best_knn, model_file)