In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load data
user_reviews = pd.read_csv('user_reviews.csv')
movie_genres = pd.read_csv('movie_genres.csv')
header = user_reviews.columns.tolist()[2:]
first_column = user_reviews.iloc[:, 1]  
name = first_column.to_numpy()

user_reviews_cleaned = user_reviews.iloc[:, 2:].values  # User-movie ratings
user_reviews_cleaned = user_reviews_cleaned / np.max(user_reviews_cleaned)  # Normalize ratings
movie_genres_cleaned = movie_genres.iloc[:, 2:].values  # Movie-genre matrix


def item_based_recommendation(user_reviews_tensor, num_recommendations=5):
    item_similarity = cosine_similarity(user_reviews_tensor.T)  
    recommendations = {}
    for user_idx in range(5):  
        user_ratings = user_reviews_tensor[user_idx]
        
        predicted_ratings = np.zeros_like(user_ratings)
        for movie_idx in range(len(user_ratings)):
            if user_ratings[movie_idx] > 0:
                continue
            
            weighted_sum = 0  
            sim_sum = 0  
            for other_movie_idx in range(len(user_ratings)):
                if user_ratings[other_movie_idx] > 0: 
                    weighted_sum += item_similarity[movie_idx, other_movie_idx] * user_ratings[other_movie_idx]
                    sim_sum += item_similarity[movie_idx, other_movie_idx]
            
            if sim_sum > 0:
                predicted_ratings[movie_idx] = weighted_sum / sim_sum
        
        recommended_movie_indices = np.argsort(-predicted_ratings)[:num_recommendations]
        recommendations[name[user_idx]] = [header[idx] for idx in recommended_movie_indices]
    
    return recommendations

item_based_rec = item_based_recommendation(user_reviews_cleaned)


print("Item-Based Recommendations:")
for user, movies in item_based_rec.items():
    print(f"{user}: {movies}")

Item-Based Recommendations:
Vincent: ['The Alamo', 'The Broadway Melody', 'Vessel', 'Indiana Jones and the Temple of Doom', 'Ip Man 3']
Edgar: ['The Game', 'Metropolitan', 'A Passage to India', 'The Nativity Story', 'The Final Destination']
Addilyn: ['Molière', 'Enemy of the State', 'Flash of Genius', 'Top Spin', 'Shooting Fish']
Marlee: ['Superman', 'The Ninth Gate', 'Pitch Black', "Valley of the Heart's Delight", 'Intolerable Cruelty']
Javier: ["Pirates of the Caribbean: At World's End", 'A Dangerous Method', 'The Great Escape', "The World's Fastest Indian", 'Falcon Rising']


In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load data
user_reviews = pd.read_csv('user_reviews.csv')
movie_genres = pd.read_csv('movie_genres.csv')
header = pd.read_csv('user_reviews.csv', nrows=0).columns.tolist()[2:]
first_column = user_reviews.iloc[:, 1]  
name = first_column.to_numpy() 

user_reviews_tensor = user_reviews.iloc[:, 2:].values  
movie_genres_cleaned = movie_genres.iloc[:, 2:].values  

# User-Based Collaborative Filtering

def user_based_recommendation(user_reviews_tensor, num_recommendations=5):
    # Compute cosine similarity between users
    user_similarity = cosine_similarity(user_reviews_tensor)  # Shape: (num_users, num_users)
    
    # Generate recommendations for the first 5 users
    recommendations = {}
    for user_idx in range(5):  # First 5 users
        # Find similar users (excluding the user themselves)
        similar_users = np.argsort(-user_similarity[user_idx])  # Sort in descending order of similarity
        similar_users = similar_users[similar_users != user_idx]  # Exclude self
        
        # Aggregate ratings from similar users
        user_ratings = user_reviews_tensor[user_idx]
        predicted_ratings = np.zeros_like(user_ratings)
        similarity_sums = np.zeros_like(user_ratings)  # To store the sum of similarities for normalization
        
        for similar_user in similar_users:
            predicted_ratings += user_similarity[user_idx, similar_user] * user_reviews_tensor[similar_user]
            similarity_sums += user_similarity[user_idx, similar_user] * (user_reviews_tensor[similar_user] != 0)
        
        predicted_ratings = np.divide(
            predicted_ratings, similarity_sums, where=similarity_sums != 0
        )  # Avoid division by zero
        
        # Mask already rated movies
        unrated_mask = user_ratings == 0
        predicted_ratings = predicted_ratings * unrated_mask  # Only consider unrated movies
        
        # Recommend top N movies
        recommended_movie_indices = np.argsort(-predicted_ratings)[:num_recommendations]
        recommendations[name[user_idx]] = [header[idx] for idx in recommended_movie_indices]
    
    return recommendations

user_based_recommendations = user_based_recommendation(user_reviews_tensor)

# Print recommendations
print("User-Based Recommendations:")
for user, movies in user_based_recommendations.items():
    print(f"{user}: {movies}")

User-Based Recommendations:
Vincent: ['Jonah: A VeggieTales Movie', 'The Great Escape', 'Addicted', 'The Last Five Years', "It's Complicated"]
Edgar: ['Mad City', 'Get Carter', 'Maid in Manhattan', 'The Contender', 'Final Fantasy: The Spirits Within']
Addilyn: ['Sex and the City 2', 'The Tempest', 'Amadeus', 'Space: Above and Beyond', 'Highlander: Endgame']
Marlee: ['To Kill a Mockingbird', 'Boiler Room', 'Don Juan DeMarco', 'The Grandmaster', 'Big']
Javier: ['Bad Company', "The Astronaut's Wife", 'The Phantom', 'Home', 'Superman']


In [11]:
#item based:baseline
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load data
user_reviews = pd.read_csv('user_reviews.csv')
movie_genres = pd.read_csv('movie_genres.csv')
header = user_reviews.columns.tolist()[2:]
first_column = user_reviews.iloc[:, 1]  
name = first_column.to_numpy()

user_reviews_cleaned = user_reviews.iloc[:, 2:].values  # User-movie ratings
user_reviews_cleaned = user_reviews_cleaned / np.max(user_reviews_cleaned)  # Normalize ratings
movie_genres_cleaned = movie_genres.iloc[:, 2:].values  # Movie-genre matrix

def item_based_recommendation(user_reviews_tensor, num_recommendations=5):
    item_similarity = cosine_similarity(user_reviews_tensor.T)  
    recommendations = {}
    
    # Calculate user averages, ignoring zeros
    user_means = np.array([np.mean(user_reviews_tensor[user_idx][user_reviews_tensor[user_idx] > 0]) 
                           for user_idx in range(user_reviews_tensor.shape[0])])
    
    for user_idx in range(5):  
        user_ratings = user_reviews_tensor[user_idx]
        
        predicted_ratings = np.zeros_like(user_ratings)
        for movie_idx in range(len(user_ratings)):
            if user_ratings[movie_idx] > 0:
                continue
            
            weighted_sum = 0  
            sim_sum = 0  
            for other_movie_idx in range(len(user_ratings)):
                if user_ratings[other_movie_idx] > 0: 
                    # Adjust ratings by subtracting the user's average
                    adjusted_rating = user_ratings[other_movie_idx] - user_means[user_idx]
                    weighted_sum += item_similarity[movie_idx, other_movie_idx] * adjusted_rating
                    sim_sum += item_similarity[movie_idx, other_movie_idx]
            
            # Apply the baseline adjustment
            if sim_sum > 0:
                predicted_ratings[movie_idx] = weighted_sum / sim_sum + user_means[user_idx]
                
        unrated_mask = user_ratings == 0
        predicted_ratings = predicted_ratings * unrated_mask  # Only consider unrated movies
        
        recommended_movie_indices = np.argsort(-predicted_ratings)[:num_recommendations]
        recommendations[name[user_idx]] = [header[idx] for idx in recommended_movie_indices]
    
    return recommendations

item_based_rec = item_based_recommendation(user_reviews_cleaned)

print("Item-Based Recommendations:")
for user, movies in item_based_rec.items():
    print(f"{user}: {movies}")

Item-Based Recommendations:
Vincent: ['The Alamo', 'The Broadway Melody', 'Vessel', 'Indiana Jones and the Temple of Doom', 'Ip Man 3']
Edgar: ['The Game', 'Metropolitan', 'A Passage to India', 'The Nativity Story', 'The Final Destination']
Addilyn: ['Molière', 'Enemy of the State', 'Flash of Genius', 'Top Spin', 'Shooting Fish']
Marlee: ['Superman', 'The Ninth Gate', 'Pitch Black', "Valley of the Heart's Delight", 'Intolerable Cruelty']
Javier: ["Pirates of the Caribbean: At World's End", 'Rebecca', 'The House of the Devil', 'The Juror', 'Boom Town']


In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load data
user_reviews = pd.read_csv('user_reviews.csv')
movie_genres = pd.read_csv('movie_genres.csv')
header = pd.read_csv('user_reviews.csv', nrows=0).columns.tolist()[2:]
first_column = user_reviews.iloc[:, 1]  
name = first_column.to_numpy() 

user_reviews_tensor = user_reviews.iloc[:, 2:].values  
movie_genres_cleaned = movie_genres.iloc[:, 2:].values  

# User-Based Collaborative Filtering

def user_based_recommendation(user_reviews_tensor, num_recommendations=5):
    # Compute cosine similarity between users
    user_similarity = cosine_similarity(user_reviews_tensor)  # Shape: (num_users, num_users)
    
    # Calculate user averages, ignoring zeros
    user_means = np.array([np.mean(user_reviews_tensor[user_idx][user_reviews_tensor[user_idx] > 0]) 
                           for user_idx in range(user_reviews_tensor.shape[0])])
    
    # Generate recommendations for the first 5 users
    recommendations = {}
    for user_idx in range(5):  # First 5 users
        # Find similar users (excluding the user themselves)
        similar_users = np.argsort(-user_similarity[user_idx])  # Sort in descending order of similarity
        similar_users = similar_users[similar_users != user_idx]  # Exclude self
        
        # Aggregate ratings from similar users
        user_ratings = user_reviews_tensor[user_idx]
        predicted_ratings = np.zeros_like(user_ratings)
        similarity_sums = np.zeros_like(user_ratings)  # To store the sum of similarities for normalization
        
        for similar_user in similar_users:
            predicted_ratings += user_similarity[user_idx, similar_user] * user_reviews_tensor[similar_user]
            similarity_sums += user_similarity[user_idx, similar_user] * (user_reviews_tensor[similar_user] != 0)
        
        predicted_ratings = np.divide(
            predicted_ratings, similarity_sums, where=similarity_sums != 0
        )  # Avoid division by zero
        
        # Apply baseline adjustment
        predicted_ratings += user_means[user_idx]
        
        # Mask already rated movies
        unrated_mask = user_ratings == 0
        predicted_ratings = predicted_ratings * unrated_mask  # Only consider unrated movies
        
        # Recommend top N movies
        recommended_movie_indices = np.argsort(-predicted_ratings)[:num_recommendations]
        recommendations[name[user_idx]] = [header[idx] for idx in recommended_movie_indices]
    
    return recommendations

user_based_recommendations = user_based_recommendation(user_reviews_tensor)

# Print recommendations
print("User-Based Recommendations:")
for user, movies in user_based_recommendations.items():
    print(f"{user}: {movies}")

User-Based Recommendations:
Vincent: ['AVP: Alien vs. Predator', 'Highlander: Endgame', 'The Phantom', 'The Informers', 'Neighbors']
Edgar: ['Edtv', 'Torque', 'Clockwatchers', 'Flyboys', 'Wicked Blood']
Addilyn: ['Space: Above and Beyond', 'Amadeus', 'Sex and the City 2', 'The Tempest', 'Highlander: Endgame']
Marlee: ['The Death and Life of Bobby Z', 'The Ballad of Cable Hogue', 'The Puffy Chair', 'Martha Marcy May Marlene', 'Beloved']
Javier: ['Bad Company', 'The Phantom', 'The Tempest', "The Astronaut's Wife", 'Thirteen']
