**COLLABORATIVE FILTERING**

In [1]:
import pandas as pd
movies = pd.read_csv('/content/movie.csv')
ratings = pd.read_csv('/content/rating.csv')
print(movies.head())
print(ratings.head())


   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
   userId  movieId  rating            timestamp
0       1        2     3.5  2005-04-02 23:53:47
1       1       29     3.5  2005-04-02 23:31:16
2       1       32     3.5  2005-04-02 23:33:39
3       1       47     3.5  2005-04-02 23:32:07
4       1       50     3.5  2005-04-02 23:29:40


In [2]:
data = pd.merge(ratings, movies, on='movieId')
user_item_matrix = data.pivot_table(index='userId', columns='title', values='rating')
user_item_matrix.fillna(0, inplace=True)
print(user_item_matrix.head())


title   'Round Midnight (1986)  'Til There Was You (1997)  'burbs, The (1989)  \
userId                                                                          
1                          0.0                        0.0                 0.0   
2                          0.0                        0.0                 0.0   
3                          0.0                        0.0                 0.0   
4                          0.0                        0.0                 0.0   
5                          0.0                        0.0                 0.0   

title   'night Mother (1986)  (500) Days of Summer (2009)  \
userId                                                      
1                        0.0                          0.0   
2                        0.0                          0.0   
3                        0.0                          0.0   
4                        0.0                          0.0   
5                        0.0                          0.0   

titl

In [3]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=20)
matrix_svd = svd.fit_transform(user_item_matrix)
print(matrix_svd.shape)

(459, 20)


In [5]:
import numpy as np

def recommend_movies(user_id, user_item_matrix, matrix_svd, num_recommendations=5):
    user_row = user_item_matrix.loc[user_id].values.reshape(1, -1)
    user_svd = svd.transform(user_row)

    similarity = np.dot(matrix_svd, user_svd.T).flatten()
    movie_indices = similarity.argsort()[-num_recommendations:][::-1]

    recommended_movies = user_item_matrix.columns[movie_indices]
    return recommended_movies

user_id = 5
recommendations = recommend_movies(user_id, user_item_matrix, matrix_svd, num_recommendations=5)
print(f"Recommended movies for user {user_id}:")
print(recommendations)


Recommended movies for user 5:
Index(['Addicted to Love (1997)', '50 First Dates (2004)',
       'American Gigolo (1980)', 'Animal Crackers (1930)',
       '22 Jump Street (2014)'],
      dtype='object', name='title')




**CONTENT BASED FILTERING**

In [7]:
import pandas as pd
movies = pd.read_csv('/content/Tamil_movies_dataset.csv')
print(movies.head())

          MovieName   Genre  Rating              Director  \
0        Mouna Guru  Action     7.7          Santha Kumar   
1       7 Aum Arivu  Action     6.2       A.R. Murugadoss   
2  Vaagai Sooda Vaa  Comedy     8.0           A. Sarkunam   
3          Mankatha  Action     7.6         Venkat Prabhu   
4  Kanchana: Muni 2  Comedy     6.5  Lawrence Raghavendra   

                  Actor  PeopleVote  Year  Hero_Rating  movie_rating  \
0             Arulnithi         746  2011            8             8   
1                Suriya        9479  2011            9             9   
2                 Vimal       14522  2011            8             7   
3           Ajith Kumar       12276  2011            6             8   
4  Lawrence Raghavendra        1044  2011            8             9   

   content_rating  
0        7.900000  
1        8.066667  
2        7.666667  
3        7.200000  
4        7.833333  


In [8]:
movies.fillna('', inplace=True)
movies['combined_features'] = movies['Genre'] + ' ' + movies['Director'] + ' ' + movies['Actor']
print(movies['combined_features'].head())

0                       Action Santha Kumar Arulnithi
1                       Action A.R. Murugadoss Suriya
2                            Comedy A. Sarkunam Vimal
3                    Action Venkat Prabhu Ajith Kumar
4    Comedy Lawrence Raghavendra Lawrence Raghavendra
Name: combined_features, dtype: object


In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies['combined_features'])
print(tfidf_matrix.shape)

(329, 513)


In [10]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
print(cosine_sim.shape)

(329, 329)


In [12]:
def get_recommendations(title, movies_df, cosine_sim, num_recommendations=10):
    title = title.strip().lower()

    if title not in movies_df['MovieName'].str.lower().values:
        raise ValueError(f"Movie '{title}' not found in the dataset.")

    idx = movies_df[movies_df['MovieName'].str.lower() == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    movie_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]
    return movies_df['MovieName'].iloc[movie_indices]

recommendations = get_recommendations('7 Aum Arivu', movies, cosine_sim)
print(recommendations)

12     Thuppakki
47       Kaththi
223       Sarkar
272     Kaappaan
96            24
35      Singam 2
110     Singam 3
297          NGK
145       Spyder
65     Pasanga 2
Name: MovieName, dtype: object
