In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

# Load data
ratings = pd.read_csv('ratings.csv')

# Train-test split
train, test = train_test_split(ratings, test_size=0.2, random_state=42)

# Function to calculate RMSE and MAE
def evaluate(predictions):
    rmse = np.sqrt(mean_squared_error(predictions['rating'], predictions['prediction']))
    mae = mean_absolute_error(predictions['rating'], predictions['prediction'])
    return rmse, mae

# Collaborative Filtering using K-Nearest Neighbors
knn_cf = KNeighborsRegressor(n_neighbors=10)
knn_cf.fit(train[['userId', 'movieId']], train['rating'])
knn_cf_preds = knn_cf.predict(test[['userId', 'movieId']])
knn_cf_predictions = pd.DataFrame({'rating': test['rating'], 'prediction': knn_cf_preds})
knn_cf_rmse, knn_cf_mae = evaluate(knn_cf_predictions)

# Support Vector Machine (SVM)
svm_regressor = SVR()
svm_regressor.fit(train[['userId', 'movieId']], train['rating'])
svm_preds = svm_regressor.predict(test[['userId', 'movieId']])
svm_predictions = pd.DataFrame({'rating': test['rating'], 'prediction': svm_preds})
svm_rmse, svm_mae = evaluate(svm_predictions)

# Decision Tree
dt_regressor = DecisionTreeRegressor()
dt_regressor.fit(train[['userId', 'movieId']], train['rating'])
dt_preds = dt_regressor.predict(test[['userId', 'movieId']])
dt_predictions = pd.DataFrame({'rating': test['rating'], 'prediction': dt_preds})
dt_rmse, dt_mae = evaluate(dt_predictions)

# Principal Component Analysis (PCA)
pca = PCA()
pca_train = pca.fit_transform(train[['userId', 'movieId']])
pca_test = pca.transform(test[['userId', 'movieId']])
pca_regressor = DecisionTreeRegressor()
pca_regressor.fit(pca_train, train['rating'])
pca_preds = pca_regressor.predict(pca_test)
pca_predictions = pd.DataFrame({'rating': test['rating'], 'prediction': pca_preds})
pca_rmse, pca_mae = evaluate(pca_predictions)

# Compare results
results = pd.DataFrame({
    'Model': ['KNN CF', 'SVM', 'Decision Tree', 'PCA'],
    'RMSE': [knn_cf_rmse, svm_rmse, dt_rmse, pca_rmse],
    'MAE': [knn_cf_mae, svm_mae, dt_mae, pca_mae]
})

print(results)


           Model      RMSE       MAE
0         KNN CF  1.042631  0.821140
1            SVM  1.065239  0.832667
2  Decision Tree  1.282515  0.947813
3            PCA  1.283235  0.949871
