# ðŸŽ¬ Human Behavior Through Movies: Building a Personalized Recommendation System

## ðŸ“– Project Overview
This project explores how artificial intelligence can understand and predict **human behavior** through movie preferences.  
By analyzing user ratings and identifying patterns of similarity, we aim to create a **personalized movie recommendation system** that adapts to individual tastes â€” demonstrating how machines can learn from human interactions.


In [None]:
# install the required packages
import pandas as pd  # for data manipulation
import numpy as np   # for numerical operations
import matplotlib.pyplot as plt  # for plotting
import seaborn as sns    # for advanced plotting
from scipy.sparse import csr_matrix  # for sparse matrix representation
from sklearn.decomposition import TruncatedSVD # for dimensionality reduction
from sklearn.metrics.pairwise import cosine_similarity  # for similarity calculations
from tqdm import tqdm  # for progress bars



In [None]:
# Load the dataset 
print(' Loading Data ... (this may take a while)')

Moves = pd.read_csv('movies.csv') 
Ratings = pd.read_csv('ratings.csv')

print('Data Loaded Successfully')
print('Movies :', Moves.shape)
print('Ratings :', Ratings.shape)

 Loading Data ... (this may take a while)
Data Loaded Successfully
Movies : (62423, 3)
Ratings : (25000095, 4)


In [10]:
# Encoding ID's for memory efficiency

print('Encoding user and movie IDs ....')

user_ids = Ratings['userId'].astype('category').cat.codes
movie_ids = Ratings['movieId'].astype('category').cat.codes

Ratings['user_idx'] = user_ids
Ratings['movie_idx'] = movie_ids

n_users = Ratings['user_idx'].nunique()
n_movies = Ratings['movie_idx'].nunique()

print(f'Users: {n_users}, Movies: {n_movies}')

Encoding user and movie IDs ....
Users: 162541, Movies: 59047


In [14]:
# Build Sparse User-Item Matrix
print('Building sparse Matrix ...')

# user-movie sparse matrix
rating_matrix = csr_matrix(
    (Ratings["rating"], (Ratings["user_idx"], Ratings["movie_idx"])),
    shape=(n_users, n_movies)
)

print(f'Sparse Matrix shape: {rating_matrix.shape}, with {rating_matrix.nnz} non-zero entries')
print(f'Matrix Density: {100 * rating_matrix.nnz / (n_users * n_movies):.4f}%')



Building sparse Matrix ...
Sparse Matrix shape: (162541, 59047), with 25000095 non-zero entries
Matrix Density: 0.2605%


In [15]:
# Dimensionality Reduction with Truncated SVD
# This extracts dense movie embeddings while keeping memory small
print("Training SVD model (dimensionality reduction)...")

svd = TruncatedSVD(n_components=100, random_state=42)
movie_embeddings = svd.fit_transform(rating_matrix.T)

print(' SVD training completed.')
print(f'Movie embeddings shape: {movie_embeddings.shape}')

Training SVD model (dimensionality reduction)...
 SVD training completed.
Movie embeddings shape: (59047, 100)


In [None]:
# Compute similarity (on-demand)

def recommend_movies(title, top_n=10):
    """
    Recommend movies similar to a given title based on SVD-compressed embeddings
    """
    if title not in Moves['title'].values:
        print(f"Movie '{title}' not found.")
        return []
    
    movie_id = Moves[Moves['title'] == title]['movieId'].values[0]
    movie_idx = Ratings[Ratings['movieId'] == movie_id]['movie_idx'].iloc[0]

 # cosine similarity to all movies
    target_vec = movie_embeddings[movie_idx].reshape(1, -1)
    sim_scores = cosine_similarity(target_vec, movie_embeddings)[0]

    top_indices = np.argsort(sim_scores)[::-1][1:top_n+1]
    rec_ids = Moves.iloc[top_indices]['movieId'].values
    rec_titles = Moves.iloc[top_indices]['title'].values

    print(f"\nðŸŽ¬ Because you liked '{title}', you might also enjoy:")
    for i, rec in enumerate(rec_titles, 1):
        print(f"{i}. {rec}")

    return rec_titles


In [19]:
# Test

example_movie = "Toy Story (1995)"  # change to any movie in dataset
recommend_movies(example_movie, top_n=10)


ðŸŽ¬ Because you liked 'Toy Story (1995)', you might also enjoy:
1. Toy Story 2 (1999)
2. Bug's Life, A (1998)
3. Willy Wonka & the Chocolate Factory (1971)
4. Independence Day (a.k.a. ID4) (1996)
5. Mission: Impossible (1996)
6. Star Wars: Episode IV - A New Hope (1977)
7. Jurassic Park (1993)
8. Monsters, Inc. (2001)
9. Back to the Future (1985)
10. Men in Black (a.k.a. MIB) (1997)


array(['Toy Story 2 (1999)', "Bug's Life, A (1998)",
       'Willy Wonka & the Chocolate Factory (1971)',
       'Independence Day (a.k.a. ID4) (1996)',
       'Mission: Impossible (1996)',
       'Star Wars: Episode IV - A New Hope (1977)',
       'Jurassic Park (1993)', 'Monsters, Inc. (2001)',
       'Back to the Future (1985)', 'Men in Black (a.k.a. MIB) (1997)'],
      dtype=object)