# 🎬 Movie Recommendation System
This notebook demonstrates a **Movie Recommendation System** using both **Collaborative Filtering** and **Content-Based Filtering** techniques.

## 🔍 Use Cases
- Personalized recommendations based on user preferences
- Discovering similar movies based on genre, actors, or ratings
- Improving user engagement on movie streaming platforms


In [13]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


## 📂 Load MovieLens Dataset

In [23]:
movies = pd.read_csv("movies.dat", sep="::", engine="python", encoding="ISO-8859-1",
                     names=["MovieID", "Title", "Genres"])

ratings = pd.read_csv("ratings.dat", sep="::", engine="python", encoding="ISO-8859-1",
                      names=["UserID", "MovieID", "Rating", "Timestamp"])

users = pd.read_csv("users.dat", sep="::", engine="python", encoding="ISO-8859-1",
                    names=["UserID", "Gender", "Age", "Occupation", "Zip-code"])



In [25]:
movies.to_csv("movies.csv", index=False)
ratings.to_csv("ratings.csv", index=False)
users.to_csv("users.csv", index=False)

In [49]:
movies.head(), movies.tail()

(   MovieID                               Title                        Genres
 0        1                    Toy Story (1995)   Animation|Children's|Comedy
 1        2                      Jumanji (1995)  Adventure|Children's|Fantasy
 2        3             Grumpier Old Men (1995)                Comedy|Romance
 3        4            Waiting to Exhale (1995)                  Comedy|Drama
 4        5  Father of the Bride Part II (1995)                        Comedy,
       MovieID                       Title          Genres
 3878     3948     Meet the Parents (2000)          Comedy
 3879     3949  Requiem for a Dream (2000)           Drama
 3880     3950            Tigerland (2000)           Drama
 3881     3951     Two Family House (2000)           Drama
 3882     3952       Contender, The (2000)  Drama|Thriller)

In [43]:
ratings.head(), ratings.tail()

(   UserID  MovieID  Rating  Timestamp
 0       1     1193       5  978300760
 1       1      661       3  978302109
 2       1      914       3  978301968
 3       1     3408       4  978300275
 4       1     2355       5  978824291,
          UserID  MovieID  Rating  Timestamp
 1000204    6040     1091       1  956716541
 1000205    6040     1094       5  956704887
 1000206    6040      562       5  956704746
 1000207    6040     1096       4  956715648
 1000208    6040     1097       4  956715569)

In [41]:
users.head(), users.tail()

(   UserID Gender  Age  Occupation Zip-code
 0       1      F    1          10    48067
 1       2      M   56          16    70072
 2       3      M   25          15    55117
 3       4      M   45           7    02460
 4       5      M   25          20    55455,
       UserID Gender  Age  Occupation Zip-code
 6035    6036      F   25          15    32603
 6036    6037      F   45           1    76006
 6037    6038      F   56           1    14706
 6038    6039      F   45           0    01060
 6039    6040      M   25           6    11106)

## 🎯 Content-Based Recommendation

In [27]:
# Step 1: TF-IDF on genres
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['Genres'].fillna(''))

# Step 2: Fit KNN model
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(tfidf_matrix)

# Step 3: Map movie titles to index
indices = pd.Series(movies.index, index=movies['Title'])

# Step 4: Recommend similar movies
def content_recommendations(title, num=5):
    if title not in indices:
        return ["Movie not found."]
    idx = indices[title]
    distances, neighbors = knn_model.kneighbors(tfidf_matrix[idx], n_neighbors=num+1)
    movie_indices = neighbors.flatten()[1:]  # skip the first (it's the movie itself)
    return movies['Title'].iloc[movie_indices].tolist()


## 👥 Collaborative Filtering (User-Based)

In [51]:
# Step 1: Create pivot table (users x movies)
user_movie_ratings = ratings.pivot_table(index='UserID', columns='MovieID', values='Rating').fillna(0)

# Step 2: Fit Nearest Neighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_movie_ratings.values)

# Step 3: Define recommendation function
def collaborative_recommendations(user_id, num=5):
    if user_id not in user_movie_ratings.index:
        return ["❌ User ID not found."]
    
    # Get similar users
    distances, indices = model_knn.kneighbors([user_movie_ratings.loc[user_id]], n_neighbors=num+1)
    neighbors = indices.flatten()[1:]  # Exclude the input user
    
    # Aggregate ratings from neighbors
    neighbor_ratings = ratings[ratings['UserID'].isin(user_movie_ratings.index[neighbors])]
    top_movies = (neighbor_ratings.groupby('MovieID')
                                .mean()['Rating']
                                .sort_values(ascending=False)
                                .head(num))

    recommended_titles = movies[movies['MovieID'].isin(top_movies.index)]['Title'].tolist()
    return recommended_titles if recommended_titles else ["⚠️ No recommendations available."]


# 🧪 Try it Out!

# Content-Based Example

In [60]:
movie_title = 'Toy Story (1995)'
print(f"🎬 Content-based recommendations for '{movie_title}':")
try:
    for movie in content_recommendations(movie_title):
        print(f"👉 {movie}")
except Exception as e:
    print(f"❌ Error: {e}")


🎬 Content-based recommendations for 'Toy Story (1995)':
👉 Saludos Amigos (1943)
👉 Aladdin and the King of Thieves (1996)
👉 American Tail, An (1986)
👉 Toy Story 2 (1999)
👉 Chicken Run (2000)


# Collaborative Example

In [63]:
user_id = 1
print(f"\n👥 Collaborative recommendations for User ID {user_id}:")
try:
    for movie in collaborative_recommendations(user_id):
        print(f"👉 {movie}")
except Exception as e:
    print(f"❌ Error: {e}")


👥 Collaborative recommendations for User ID 1:
👉 Schindler's List (1993)
👉 English Patient, The (1996)
👉 Titanic (1997)
👉 Dark City (1998)
👉 Christmas Story, A (1983)
