# Matrix -  Movie Reccomendation
Idea: similar people might have similar move preferences. for this simple project, we would try to recommend a movie based on other person best match similarities.

In [11]:
# Testing
import pandas as pd

# More variative data: User, Movie, Rating
raw_data = [
    ['Alice', 'Inception', 5], ['Alice', 'Interstellar', 4], ['Alice', 'Titanic', 1],
    ['Bob', 'Inception', 4], ['Bob', 'Interstellar', 5], ['Bob', 'The Matrix', 2], ['Bob', 'Titanic', 5], ['Bob', 'Avatar', 5],
    ['Charlie', 'The Godfather', 5], ['Charlie', 'Pulp Fiction', 4], ['Charlie', 'Inception', 1],
    ['David', 'The Godfather', 2], ['David', 'Pulp Fiction', 5], ['David', 'Titanic', 4],
    ['Eve', 'The Godfather', 5], ['Eve', 'Pulp Fiction', 4], ['Eve', 'Interstellar', 2],
    ['Frank', 'The Matrix', 5], ['Frank', 'Inception', 4], ['Frank', 'Interstellar', 4]
]

df = pd.DataFrame(raw_data, columns=['User', 'Movie', 'Rating'])

print("Regular Dataframe (Long Format):")
df

Regular Dataframe (Long Format):


Unnamed: 0,User,Movie,Rating
0,Alice,Inception,5
1,Alice,Interstellar,4
2,Alice,Titanic,1
3,Bob,Inception,4
4,Bob,Interstellar,5
5,Bob,The Matrix,2
6,Bob,Titanic,5
7,Bob,Avatar,5
8,Charlie,The Godfather,5
9,Charlie,Pulp Fiction,4


In [12]:
# Only keep ratings of 4 or 5
df_high_rated = df[df['Rating'] >= 4].copy()

# Create the matrix from the filtered data
matrix = df_high_rated.pivot(index='User', columns='Movie', values='Rating').fillna(0)

print("Matrix (Only includes 4s and 5s):")
display(matrix)

Matrix (Only includes 4s and 5s):


Movie,Avatar,Inception,Interstellar,Pulp Fiction,The Godfather,The Matrix,Titanic
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Alice,0.0,5.0,4.0,0.0,0.0,0.0,0.0
Bob,5.0,4.0,5.0,0.0,0.0,0.0,5.0
Charlie,0.0,0.0,0.0,4.0,5.0,0.0,0.0
David,0.0,0.0,0.0,5.0,0.0,0.0,4.0
Eve,0.0,0.0,0.0,4.0,5.0,0.0,0.0
Frank,0.0,4.0,4.0,0.0,0.0,5.0,0.0


In [13]:
from sklearn.metrics.pairwise import cosine_similarity

Note:
### Why use Cosine instead of regular Correlation?
1. Magnitude doesn't matter as much: Cosine similarity looks at the pattern of the ratings rather than the absolute values.
2. The "Zero" problem: In recommendation systems, we have a lot of zeros (unwatched movies). Cosine similarity is very efficient at handling these "sparse" matrices because it only cares about the dimensions where both vectors have non-zero values.

In [14]:
# Calculate Similarity based on high-quality matches
user_sim = cosine_similarity(matrix)
user_sim_df = pd.DataFrame(user_sim, index=matrix.index, columns=matrix.index)

In [15]:
def recommend_smart(target_user):
    # Find most similar user
    sim_series = user_sim_df[target_user].sort_values(ascending=False)
    
    # FIX: Use .index[1] for the name and .iloc[1] for the numerical value
    similar_user = sim_series.index[1] 
    similarity_score = sim_series.iloc[1]
    
    # What has the target user already seen?
    target_seen = df[df['User'] == target_user]['Movie'].unique()
    
    # What did the similar user LOVE (4 or 5)?
    sim_user_loves = df[(df['User'] == similar_user) & (df['Rating'] >= 4)]
    
    # Filter out movies the target user has already seen
    recommendations = sim_user_loves[~sim_user_loves['Movie'].isin(target_seen)]
    
    print(f"Target User: {target_user}")
    print(f"Matched with: {similar_user} (Similarity Score: {similarity_score:.2f})")
    
    if not recommendations.empty:
        print(f"Recommended Movies: {recommendations['Movie'].tolist()}")
    else:
        print("Recommended Movies: No new high-rated movies found from this match.")

# Run it again
recommend_smart('Alice')

Target User: Alice
Matched with: Frank (Similarity Score: 0.74)
Recommended Movies: ['The Matrix']
