In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('../../data/kaggle_20m/movie_genres_ratings.csv')

# Splitting features and target variable
X = df.drop(columns=['movieId', 'title', 'rating'])
y = df['rating']

# Splitting data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
# Hyperparameters
kernel = 'linear'
C = 1.0
epsilon = 0.1

# Initialize the SVR model
svr_model = SVR(kernel=kernel, C=C, epsilon=epsilon)
svr_model.fit(X_train, y_train)

# Predict on the test set
svr_predictions = svr_model.predict(X_test)

# Compute MSE, RMSE, and MAE
svr_mse = mean_squared_error(y_test, svr_predictions)
svr_rmse = np.sqrt(svr_mse)
svr_mae = mean_absolute_error(y_test, svr_predictions)

print(f"SVR MSE on Test Set: {svr_mse:.4f}")
print(f"SVR RMSE on Test Set: {svr_rmse:.4f}")
print(f"SVR MAE on Test Set: {svr_mae:.4f}")

SVR MSE on Test Set: 0.3933
SVR RMSE on Test Set: 0.6271
SVR MAE on Test Set: 0.4565
