In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/MyDrive/module2')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Load data
user_reviews = pd.read_csv('user_reviews.csv')
movie_genres = pd.read_csv('movie_genres.csv')

# Preprocess data
user_reviews_cleaned = user_reviews.iloc[:, 2:].values  # User-movie ratings
user_reviews_cleaned = user_reviews_cleaned / np.max(user_reviews_cleaned)  # Normalize ratings

movie_genres_cleaned = movie_genres.iloc[:, 2:].values  # Movie-genre matrix

# Convert data to PyTorch tensors
user_reviews_tensor = torch.tensor(user_reviews_cleaned, dtype=torch.float32)  # Shape: (num_users, num_movies)
movie_genres_tensor = torch.tensor(movie_genres_cleaned, dtype=torch.float32)  # Shape: (num_movies, num_genres)

# Dimensions
num_users = user_reviews_tensor.shape[0]  # Number of users
num_movies = user_reviews_tensor.shape[1]  # Number of movies
num_genres = movie_genres_tensor.shape[1]  # Number of genres

# Mask for observed ratings
mask = (user_reviews_tensor > 0).float()  # 1 for observed ratings, 0 for missing

# Define the neural network with two hidden layers
class RecommendationNN(nn.Module):
    def __init__(self, input_size, hidden_size1,output_size):
        super(RecommendationNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)  # First hidden layer
        self.fc2 = nn.Linear(hidden_size1, output_size)  # Output layer (predict ratings)

    def forward(self, x):
        x = self.fc1(x)  # Apply ReLU activation for the first hidden layer
        x = self.fc2(x) # Output predicted ratings
        return x

# Hyperparameters
hidden_size1 = 32 # Number of neurons in the first hidden layer

learning_rate = 0.05
reg_param = 0.001  # Regularization parameter
num_epochs = 5000

# Initialize the model
input_size = num_genres * num_movies
output_size = num_users * num_movies
model = RecommendationNN(input_size, hidden_size1,output_size)

# Define the loss function and optimizer
criterion = nn.MSELoss(reduction='sum')  # Mean squared error loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg_param)  # L2 regularization via weight_decay

# Training loop
for epoch in range(num_epochs):
    # Flatten movie_genres_tensor to match input size
    flattened_input = movie_genres_tensor.reshape(1, -1)  # Shape: (1, num_genres * num_movies)

    # Forward pass: predict ratings
    predictions = model(flattened_input)  # Shape: (1, num_users * num_movies)

    # Reshape predictions to match the shape of user_reviews_tensor
    predictions_reshaped = predictions.view(num_users, num_movies)  # Shape: (num_users, num_movies)

    # Compute the error only for observed ratings
    error = (user_reviews_tensor - predictions_reshaped) * mask  # Shape: (num_users, num_movies)

    # Compute the loss (MSE for observed ratings)
    loss = torch.sum(error**2)

    # Backward pass and weight update
    optimizer.zero_grad()  # Clear the gradients
    loss.backward()  # Compute gradients
    optimizer.step()  # Update weights

    # Print the loss every 50 epochs
    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")

    if loss<0.1:
      break
model_path = "recommendation_model.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

Epoch 50/5000, Loss: 578147.6250
Epoch 100/5000, Loss: 9016.0947
Epoch 150/5000, Loss: 1669.9355
Epoch 200/5000, Loss: 1382.2190
Epoch 250/5000, Loss: 1239.6235
Epoch 300/5000, Loss: 1098.4055
Epoch 350/5000, Loss: 961.9930
Epoch 400/5000, Loss: 833.1893
Epoch 450/5000, Loss: 713.8941
Epoch 500/5000, Loss: 605.2703
Epoch 550/5000, Loss: 507.8827
Epoch 600/5000, Loss: 421.8181
Epoch 650/5000, Loss: 346.7896
Epoch 700/5000, Loss: 282.2308
Epoch 750/5000, Loss: 227.3782
Epoch 800/5000, Loss: 181.3426
Epoch 850/5000, Loss: 143.1699
Epoch 900/5000, Loss: 111.8908
Epoch 950/5000, Loss: 86.5591
Epoch 1000/5000, Loss: 66.2810
Epoch 1050/5000, Loss: 50.2348
Epoch 1100/5000, Loss: 37.6823
Epoch 1150/5000, Loss: 27.9747
Epoch 1200/5000, Loss: 20.5527
Epoch 1250/5000, Loss: 14.9428
Epoch 1300/5000, Loss: 10.7507
Epoch 1350/5000, Loss: 7.6537
Epoch 1400/5000, Loss: 5.3919
Epoch 1450/5000, Loss: 3.7588
Epoch 1500/5000, Loss: 2.5932
Epoch 1550/5000, Loss: 1.7706
Epoch 1600/5000, Loss: 1.1967
Epoch 16

In [None]:
# Set the model to evaluation mode
model.eval()
header = user_reviews.columns.tolist()[2:]

first_column = user_reviews.iloc[:, 1]
name = first_column.to_numpy()

# Predict ratings for all users and movies
with torch.no_grad():
    flattened_input = movie_genres_tensor.reshape(1, -1)  # Shape: (1, num_genres * num_movies)
    predictions = model(flattened_input)  # Shape: (1, num_users * num_movies)
    predictions_reshaped = predictions.view(num_users, num_movies)  # Shape: (num_users, num_movies)


# Recommendations for the first 5 users
recommendations = {}
for user_idx in range(5):  # First 5 users
    user_ratings = user_reviews_tensor[user_idx]  # Actual ratings by the user
    predicted_ratings = predictions_reshaped[user_idx]  # Predicted ratings by the model

    # Mask already rated movies
    unrated_mask = user_ratings == 0  # True for movies not rated by the user
    predicted_ratings = predicted_ratings * unrated_mask  # Set scores for rated movies to 0

    # Get the indices of the top 5 movies
    recommended_movie_indices = torch.argsort(predicted_ratings, descending=True)[:5]

    # Map indices to movie names
    recommended_movies = [header[idx] for idx in recommended_movie_indices]
    recommendations[user_idx] = recommended_movies

# Print recommendations
for user_idx, movie_list in recommendations.items():
    print(f"{name[user_idx]}: {movie_list}")

Vincent: ['Sugar Hill', 'Dinner for Schmucks', 'Elmer Gantry', 'Metropolitan', 'Dumb and Dumber To']
Edgar: ['Rollerball', 'Pet Sematary', 'Pretty Woman', 'Blade II', 'Multiplicity']
Addilyn: ['Dumb & Dumber', 'Firewall', 'Flushed Away', 'Mi America', 'Jack Reacher']
Marlee: ['Middle of Nowhere', 'Ted 2', 'The Story of Us', 'Pootie Tang', 'The Heart of Me']
Javier: ['Just My Luck', 'Machete', 'House of 1000 Corpses', 'Dear John', 'Once in a Lifetime: The Extraordinary Story of the New York Cosmos']


Epoch 50/5000, Loss: 1168503.7500
Epoch 100/5000, Loss: 26529.5371
Epoch 150/5000, Loss: 1503.5192
Epoch 200/5000, Loss: 1339.9778
Epoch 250/5000, Loss: 1202.4587
Epoch 300/5000, Loss: 1065.8695
Epoch 350/5000, Loss: 933.8777
Epoch 400/5000, Loss: 809.2027
Epoch 450/5000, Loss: 693.6890
Epoch 500/5000, Loss: 588.4667
Epoch 550/5000, Loss: 494.0864
Epoch 600/5000, Loss: 410.6362
Epoch 650/5000, Loss: 337.8440
Epoch 700/5000, Loss: 275.1679
Epoch 750/5000, Loss: 221.8756
Epoch 800/5000, Loss: 177.1134
Epoch 850/5000, Loss: 139.9644
Epoch 900/5000, Loss: 109.4957
Epoch 950/5000, Loss: 84.7959
Epoch 1000/5000, Loss: 65.0031
Epoch 1050/5000, Loss: 49.3235
Epoch 1100/5000, Loss: 37.0438
Epoch 1150/5000, Loss: 27.5357
Epoch 1200/5000, Loss: 20.2570
Epoch 1250/5000, Loss: 14.7481
Epoch 1300/5000, Loss: 10.6258
Epoch 1350/5000, Loss: 7.5761
Epoch 1400/5000, Loss: 5.3454
Epoch 1450/5000, Loss: 3.7323
Epoch 1500/5000, Loss: 2.5790
Epoch 1550/5000, Loss: 1.7638
Epoch 1600/5000, Loss: 1.1941
Epoch 

KeyboardInterrupt: 

RuntimeError: The size of tensor a (600) must match the size of tensor b (2000) at non-singleton dimension 0