<a href="https://colab.research.google.com/github/Haranth/Machine-learning/blob/main/Movie_Recomendatoin_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from scipy.sparse.linalg import svds

# Load MovieLens dataset
ratings = pd.read_csv("/content/ratings.csv")
movies = pd.read_csv("/content/movies.csv")

# Display the first few rows of the datasets
print(ratings.head())
print(movies.head())


   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [3]:
# Create a user-item ratings matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Convert user-item matrix into a numpy array for SVD
user_item_matrix_np = user_item_matrix.to_numpy()

# Mean normalization of ratings
user_ratings_mean = np.mean(user_item_matrix_np, axis=1)
user_item_matrix_demeaned = user_item_matrix_np - user_ratings_mean.reshape(-1, 1)

# Perform Singular Value Decomposition
U, sigma, Vt = svds(user_item_matrix_demeaned, k=50)

# Convert sigma into a diagonal matrix
sigma = np.diag(sigma)

# Reconstruct the user-item matrix using SVD components
predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=user_item_matrix.columns)

print(predicted_ratings_df.head())


movieId    1         2         3         4         5         6         7       \
0        2.167328  0.402751  0.840184 -0.076281 -0.551337  2.504091 -0.890114   
1        0.211459  0.006658  0.033455  0.017419  0.183430 -0.062473  0.083037   
2        0.003588  0.030518  0.046393  0.008176 -0.006247  0.107328 -0.012416   
3        2.051549 -0.387104 -0.252199  0.087562  0.130465  0.270210  0.477835   
4        1.344738  0.778511  0.065749  0.111744  0.273144  0.584426  0.254930   

movieId    8         9         10      ...    193565    193567    193571  \
0       -0.026443  0.196974  1.593259  ... -0.023453 -0.019967 -0.026939   
1        0.024158  0.049330 -0.152530  ...  0.019498  0.016777  0.022219   
2        0.003779  0.007297 -0.059362  ...  0.005909  0.006209  0.005610   
3        0.040313  0.025858 -0.017365  ...  0.004836  0.004172  0.005500   
4        0.128788 -0.085541  1.023455  ... -0.008042 -0.007419 -0.008664   

movieId    193573    193579    193581    193583    19358

In [4]:
# Preprocessing: Convert genres into a bag-of-words representation
movies['genres'] = movies['genres'].str.split('|')
movies['genres'] = movies['genres'].fillna("").astype('str')

# Create a content matrix (genre-based)
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(token_pattern=r'[a-zA-Z0-9\-]+')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get recommendations based on movie title
def content_based_recommendations(title, cosine_sim=cosine_sim):
    idx = movies[movies['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# Example recommendation based on a movie title
print(content_based_recommendations('Toy Story (1995)'))


1706                                          Antz (1998)
2355                                   Toy Story 2 (1999)
2809       Adventures of Rocky and Bullwinkle, The (2000)
3000                     Emperor's New Groove, The (2000)
3568                                Monsters, Inc. (2001)
6194                                     Wild, The (2006)
6486                               Shrek the Third (2007)
6948                       Tale of Despereaux, The (2008)
7760    Asterix and the Vikings (Astérix et les Viking...
8219                                         Turbo (2013)
Name: title, dtype: object


In [5]:
def hybrid_recommendations(user_id, movie_title, predicted_ratings_df=predicted_ratings_df, cosine_sim=cosine_sim):
    # Get collaborative filtering recommendations
    user_ratings = predicted_ratings_df.loc[user_id].sort_values(ascending=False)
    collaborative_recommendations = user_ratings.index[:10]

    # Get content-based filtering recommendations
    content_recommendations = content_based_recommendations(movie_title, cosine_sim)

    # Combine recommendations
    hybrid_recs = set(collaborative_recommendations).union(set(content_recommendations))
    return list(hybrid_recs)

# Example hybrid recommendation for a user and a movie title
print(hybrid_recommendations(1, 'Toy Story (1995)'))


[48516, 91529, 'Turbo (2013)', 2571, 'Asterix and the Vikings (Astérix et les Vikings) (2006)', 2959, 'Tale of Despereaux, The (2008)', 'Shrek the Third (2007)', 'Monsters, Inc. (2001)', 74458, 79132, 'Toy Story 2 (1999)', 'Wild, The (2006)', 'Adventures of Rocky and Bullwinkle, The (2000)', 99114, 'Antz (1998)', "Emperor's New Groove, The (2000)", 68157, 318, 58559]
