In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the rating data into a pandas DataFrame
ratings = pd.read_csv('/kaggle/input/breakthrough-tech-ai-studio-challenge/train.csv')
ratings[['userId', 'movieId']] = ratings['userId_movieId'].str.split('_', expand=True)

# Create a Surprise Reader object with rating scale of 1 to 5
reader = Reader(rating_scale=(1, 5))

# Load the rating data into the Surprise Dataset object
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

# Define the SVD model with modified hyperparameters
model = SVD(n_factors=160, reg_all=0.2, n_epochs=40, lr_all=0.01)

# Fit the model on the training data
model.fit(trainset)

# Use the fitted model to predict the ratings of the testing set
predictions = model.test(testset)

# Compute RMSE and MAE on the testing set
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")

# Load the test data into a pandas DataFrame
test = pd.read_csv('/kaggle/input/breakthrough-tech-ai-studio-challenge/test.csv')
test[['userId', 'movieId']] = test['userId_movieId'].str.split('_', expand=True)

# Create a list of tuples where each tuple contains the userId and movieId
test_data = list(zip(test['userId'], test['movieId']))

# Create a Surprise Reader object with rating scale of 0.5 to 5
reader = Reader(rating_scale=(0.5, 5.0))

# Load the rating data into the Surprise Dataset object and build full trainset
train_data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset = train_data.build_full_trainset()

# Train the SVD model on the entire dataset with modified hyperparameters
model = SVD(n_factors=150, reg_all=0.1, n_epochs=30, lr_all=0.01)
model.fit(trainset)

# Use the trained model to make predictions on the test data
predictions = []
for userId, movieId in test_data:
    prediction = model.predict(userId, movieId)
    predictions.append((f'{userId}_{movieId}', prediction.est))
    
# Write the predictions to an output file
with open('output.csv', 'w') as f:
    f.write('userId_movieId,rating\n')
    for userId_movieId, rating in predictions:
        f.write(f'{userId_movieId},{rating:.4f}\n')

RMSE: 0.3605
MAE:  0.2925
RMSE: 0.3605
MAE: 0.2925
