<a href="https://colab.research.google.com/github/Girisaivinayguttula/midterm-assignment/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds

# Step 1: Data Collection and Preparation

# Load the dataset
movies = pd.read_csv('movie.csv')
ratings = pd.read_csv('rating.csv')

# Display the first few rows of the datasets
print("Movies DataFrame:")
print(movies.head())
print("\nRatings DataFrame:")
print(ratings.head())

# Step 2: Data Preprocessing

# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Display the user-item matrix
print("\nUser-Item Matrix:")
print(user_item_matrix.head())

# Step 3: Building the Recommendation Model

# Convert user-item matrix to a sparse matrix
user_item_matrix_sparse = csr_matrix(user_item_matrix.values)

# Decompose the matrix
U, sigma, Vt = svds(user_item_matrix_sparse, k=50)

# Convert sigma to a diagonal matrix
sigma = np.diag(sigma)

# Reconstruct the matrix
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Convert to a DataFrame
preds_df = pd.DataFrame(all_user_predicted_ratings, columns=user_item_matrix.columns)

# Display the predictions for the first few users
print("\nPredicted Ratings DataFrame:")
print(preds_df.head())

# Step 4: Making Recommendations

def recommend_movies(user_id, preds_df, movies_df, original_ratings_df, num_recommendations=5):
    # Get and sort the user's predictions
    user_row_number = user_id - 1  # User ID starts at 1
    sorted_user_predictions = preds_df.iloc[user_row_number].sort_values(ascending=False)

    # Get the user's data and merge in the movie information
    user_data = original_ratings_df[original_ratings_df.userId == user_id]
    user_full = user_data.merge(movies_df, how='left', left_on='movieId', right_on='movieId').sort_values(['rating'], ascending=False)

    print(f'\nUser {user_id} has already rated {user_full.shape[0]} movies.')
    print(f'Recommending the highest {num_recommendations} predicted ratings movies not already rated.')

    # Recommend the highest predicted rating movies that the user hasn't seen yet
    recommendations = (movies_df[~movies_df['movieId'].isin(user_full['movieId'])].
                       merge(pd.DataFrame(sorted_user_predictions).reset_index(), how='left',
                             left_on='movieId',
                             right_on='movieId').
                       rename(columns={user_row_number: 'Predictions'}).
                       sort_values('Predictions', ascending=False).
                       iloc[:num_recommendations, :-1])

    return user_full, recommendations

# Get recommendations for a specific user
user_id = 1
already_rated, predictions = recommend_movies(user_id, preds_df, movies, ratings, 10)

# Display the already rated movies and the predictions
print("\nAlready Rated Movies:")
print(already_rated.head(10))
print("\nRecommended Movies:")
print(predictions)


Movies DataFrame:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  

Ratings DataFrame:
   userId  movieId  rating            timestamp
0       1      2.0     3.5  2005-04-02 23:53:47
1       1     29.0     3.5  2005-04-02 23:31:16
2       1     32.0     3.5  2005-04-02 23:33:39
3       1     47.0     3.5  2005-04-02 23:32:07
4       1     50.0     3.5  2005-04-02 23:29:40

User-Item Matrix:
movieId  NaN       1.0       2.0       3.0       4.0       5