# Content Based Filtering >

In [39]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load dataset
movies = pd.read_csv("C:/Users/OM/Downloads/CSV Data/Movies Data.csv")

movies.head()

# Fill NaN and combine features
movies['combined_features'] = movies['Genres'].fillna('') + " " + movies['Description'].fillna('')

# Convert text to TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Recommendation function
def recommend(movie_title, cosine_sim=cosine_sim):
    idx = movies[movies['Movie Title'] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Top 5 recommendations
    movie_indices = [i[0] for i in sim_scores]
    return movies['Movie Title'].iloc[movie_indices]

print(recommend("Avatar 2"))

10          La La Land 11
27             Titanic 28
74     The Dark Knight 75
136        Fight Club 137
160          Parasite 161
Name: Movie Title, dtype: object


# Collaborative Filtering >

In [None]:
import pandas as pd
from sklearn.decomposition import TruncatedSVD

# Load ratings dataset
ratings = pd.read_csv("ratings.csv")  # userId, movieId, rating
movies = pd.read_csv("movies.csv")

# Create user-movie rating matrix
ratings_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Apply SVD
svd = TruncatedSVD(n_components=20, random_state=42)
matrix_svd = svd.fit_transform(ratings_matrix)

# Reconstruct approx ratings
approx_ratings = svd.inverse_transform(matrix_svd)
pred_ratings = pd.DataFrame(approx_ratings, index=ratings_matrix.index, columns=ratings_matrix.columns)

# Recommendation function
def recommend_movies(user_id, n=5):
    user_ratings = pred_ratings.loc[user_id].sort_values(ascending=False)
    top_movies = user_ratings.head(n).index
    return movies[movies['movieId'].isin(top_movies)]['title']

print(recommend_movies(user_id=1, n=5))
