## CodeSoft Task 3
### Nitya Joshi

In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Sample data
ratings_dict = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 4, 4, 4],
    'movie_id': [1, 2, 3, 1, 2, 2, 3, 1, 3, 4],
    'rating': [5, 4, 3, 5, 2, 4, 4, 2, 4, 5]
}

movies_dict = {
    'movie_id': [1, 2, 3, 4],
    'title': ['Movie A', 'Movie B', 'Movie C', 'Movie D'],
    'genre': ['Action', 'Comedy', 'Action', 'Drama']
}

# Create DataFrame
ratings_df = pd.DataFrame(ratings_dict)
movies_df = pd.DataFrame(movies_dict)

# Merge ratings with movie titles
ratings_df = pd.merge(ratings_df, movies_df, on='movie_id')

# Create user-item matrix
user_item_matrix = ratings_df.pivot_table(index='user_id', columns='title', values='rating')

# Fill NaN values with 0 for similarity calculation
user_item_matrix_filled = user_item_matrix.fillna(0)

# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Create a matrix of movie genres
genre_matrix = pd.get_dummies(movies_df.set_index('title')['genre'])

# Normalize genre matrix for cosine similarity
genre_matrix_normalized = genre_matrix.div(genre_matrix.sum(axis=1), axis=0)

# Compute cosine similarity between movies based on genres
genre_similarity = cosine_similarity(genre_matrix_normalized)
genre_similarity_df = pd.DataFrame(genre_similarity, index=genre_matrix.index, columns=genre_matrix.index)

def get_hybrid_recommendations(user_id, user_item_matrix, user_similarity_df, genre_similarity_df, alpha=0.5, n_recommendations=2):
    # Collaborative filtering component
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]
    similar_users_ratings = user_item_matrix.loc[similar_users].mean().sort_values(ascending=False)
    user_rated_movies = user_item_matrix.loc[user_id].dropna().index
    collaborative_recommendations = similar_users_ratings.drop(user_rated_movies)
    
    # Content-based filtering component
    user_rated_movie_titles = user_item_matrix.loc[user_id].dropna().index
    content_scores = genre_similarity_df[user_rated_movie_titles].mean(axis=1).sort_values(ascending=False)
    content_based_recommendations = content_scores.drop(user_rated_movies)
    
    # Hybrid recommendation: combine scores with weighting
    hybrid_scores = (alpha * collaborative_recommendations + (1 - alpha) * content_based_recommendations).sort_values(ascending=False)
    
    # Return top N recommendations
    recommendations = hybrid_scores.head(n_recommendations)
    
    return recommendations

# Get hybrid recommendations for user 1 with a 70% weight to collaborative filtering and 30% to content-based filtering
user_id = 1
alpha = 0.7
recommendations = get_hybrid_recommendations(user_id, user_item_matrix, user_similarity_df, genre_similarity_df, alpha)
print(f"Hybrid Recommendations for User {user_id}:")
print(recommendations)


Hybrid Recommendations for User 1:
title
Movie D    3.5
dtype: float64
