### Data Loading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.gridspec as gridspec
from sklearn.model_selection import train_test_split

In [None]:
movie_df = pd.read_csv('/content/drive/MyDrive/Movie-Product-Recommendation /Datasets/Cleaned Data/final_data/model_build_data_movies.csv')
movie_df.drop(['Unnamed: 0'], axis=1, inplace=True)
movie_df.head()

Unnamed: 0,movieId,title,userId,rating,watch_date,Season,genres
0,15,Cutthroat Island (1995),7.0,2.0,2002-01-16,Winter,Action & Adventure
1,15,Cutthroat Island (1995),29.0,4.0,1996-06-24,Summer,Action & Adventure
2,15,Cutthroat Island (1995),34.0,3.0,1996-10-28,Fall,Action & Adventure
3,15,Cutthroat Island (1995),69.0,1.0,1997-06-20,Summer,Action & Adventure
4,15,Cutthroat Island (1995),116.0,0.5,2005-11-24,Fall,Action & Adventure


In [None]:
product_df = pd.read_csv('/content/drive/MyDrive/Movie-Product-Recommendation /Datasets/Cleaned Data/final_data/merged_final.csv')
product_df.drop(['Unnamed: 0'], axis=1, inplace=True)
product_df.head()

Unnamed: 0,user_id,product_id,rating,movie_generes,season
0,AKM1MP6P0OYPR,B0000AMRTM,0.5,Thriller & Mystery,Spring
1,A30ZD4ECGOT4DU,B0000AMRTM,2.5,Thriller & Mystery,Spring
2,A3R0IDSWV4KXX7,B0000AMRTM,5.0,Thriller & Mystery,Spring
3,AOHB3TUD9G5LU,B0000AMRTM,0.5,Thriller & Mystery,Spring
4,A1X1CV5GAQKAF4,B0000AMRTM,1.5,Thriller & Mystery,Spring


In [None]:
movie_df

Unnamed: 0,movieId,title,userId,rating,watch_date,Season,genres
0,15,Cutthroat Island (1995),7.0,2.0,2002-01-16,Winter,Action & Adventure
1,15,Cutthroat Island (1995),29.0,4.0,1996-06-24,Summer,Action & Adventure
2,15,Cutthroat Island (1995),34.0,3.0,1996-10-28,Fall,Action & Adventure
3,15,Cutthroat Island (1995),69.0,1.0,1997-06-20,Summer,Action & Adventure
4,15,Cutthroat Island (1995),116.0,0.5,2005-11-24,Fall,Action & Adventure
...,...,...,...,...,...,...,...
20212802,2631,Frogs for Snakes (1998),107326.0,2.0,2002-09-12,Fall,Comedy
20212803,2631,Frogs for Snakes (1998),112282.0,1.0,2000-05-22,Spring,Comedy
20212804,2631,Frogs for Snakes (1998),114360.0,1.0,1999-07-01,Summer,Comedy
20212805,2631,Frogs for Snakes (1998),126269.0,2.0,2001-02-15,Winter,Comedy


In [None]:
product_df

Unnamed: 0,user_id,product_id,rating,movie_generes,season
0,AKM1MP6P0OYPR,B0000AMRTM,0.5,Thriller & Mystery,Spring
1,A30ZD4ECGOT4DU,B0000AMRTM,2.5,Thriller & Mystery,Spring
2,A3R0IDSWV4KXX7,B0000AMRTM,5.0,Thriller & Mystery,Spring
3,AOHB3TUD9G5LU,B0000AMRTM,0.5,Thriller & Mystery,Spring
4,A1X1CV5GAQKAF4,B0000AMRTM,1.5,Thriller & Mystery,Spring
...,...,...,...,...,...
7824477,A3PD7MGSTSZZ93,B00005TN7L,4.5,Comedy,Summer
7824478,A3I9DXZ2C833JT,B00005TN7L,4.5,Comedy,Summer
7824479,A1MRHPHUUXGOOM,B00005TN7L,0.5,Comedy,Summer
7824480,A3UUZYZFMI0L07,B00005TN7L,2.5,Comedy,Summer


In [None]:
product_df.shape, movie_df.shape

((7824482, 5), (20212807, 7))

In [None]:
# User Preferences - Aggregate user genre preferences from movie data
user_genre_preferences = movie_df.groupby(['userId', 'genres']).size().reset_index(name='counts')


In [None]:
def recommend_products(user_id):
    # Get the genres liked by the user from movies dataset
    liked_genres = user_genre_preferences[user_genre_preferences['userId'] == user_id]['genres']

    # Find products matching these genres
    recommended_products = product_df[product_df['movie_generes'].isin(liked_genres)]

    return recommended_products[['product_id', 'movie_generes', 'rating']]

# Display Recommendations for a given user (example user ID 7)

In [None]:
user_id = 29.0
recommendations = recommend_products(user_id)
print("Recommended Products for User ID:", user_id)
print(recommendations)

Recommended Products for User ID: 29.0
         product_id       movie_generes  rating
0        B0000AMRTM  Thriller & Mystery     0.5
1        B0000AMRTM  Thriller & Mystery     2.5
2        B0000AMRTM  Thriller & Mystery     5.0
3        B0000AMRTM  Thriller & Mystery     0.5
4        B0000AMRTM  Thriller & Mystery     1.5
...             ...                 ...     ...
7824477  B00005TN7L              Comedy     4.5
7824478  B00005TN7L              Comedy     4.5
7824479  B00005TN7L              Comedy     0.5
7824480  B00005TN7L              Comedy     2.5
7824481  B00005TN7L              Comedy     2.0

[6350337 rows x 3 columns]


In [None]:
def recommend_products(user_id, num_recommendations=5):
    # Get the genres liked by the user from movies dataset
    liked_genres = user_genre_preferences[user_genre_preferences['userId'] == user_id]['genres']

    # Find products matching these genres
    recommended_products = product_df[product_df['movie_generes'].isin(liked_genres)]

    # Calculate average rating for each product and filter for distinct products
    recommended_products = recommended_products.groupby(['product_id', 'movie_generes']).agg(
        average_rating=pd.NamedAgg(column='rating', aggfunc='mean')
    ).reset_index()

    # Sort by average rating and limit the number of recommendations
    recommended_products = recommended_products.sort_values(by='average_rating', ascending=False)
    recommended_products = recommended_products.head(num_recommendations)

    return recommended_products[['product_id', 'movie_generes', 'average_rating']]



In [None]:
# Display Recommendations for a given user (example user ID 7)
user_id = 7
recommendations = recommend_products(user_id)
print("Recommended Products for User ID:", user_id)
print(recommendations)

Recommended Products for User ID: 7
       product_id       movie_generes  average_rating
11537  B00006HS9N              Comedy        2.990826
44745  B000BXEUCA               Drama        2.990066
15475  B00008XRUD               Drama        2.984252
10980  B00006BBA8  Action & Adventure        2.981618
11361  B00006HNOC             Romance        2.977477


In [None]:
user_id=29.0
recommendations = recommend_products(user_id)
print("Recommended Products for User ID:", user_id)
print(recommendations)

Recommended Products for User ID: 29.0
       product_id       movie_generes  average_rating
10444  B00006HS9N              Comedy        2.990826
38852  B000BXEUCA               Drama        2.990066
14110  B00008XRUD               Drama        2.984252
9933   B00006BBA8  Action & Adventure        2.981618
39871  B000CPQ20A               Drama        2.968966


In [None]:
# from sklearn.feature_extraction.text import TfidfVectorizer

# # Combine genres from both datasets for vectorization
# all_genres = pd.concat([movie_df['genres'], product_df['movie_generes']]).unique()

# # Vectorizing genres
# vectorizer = TfidfVectorizer(token_pattern='(?u)\\b\\w+\\b')
# genre_vectors = vectorizer.fit_transform(all_genres)


In [None]:
# from sklearn.metrics.pairwise import cosine_similarity

# # Assume we have genre vectors for movies and products separately
# movie_genre_vectors = vectorizer.transform(movie_df['genres'])
# product_genre_vectors = vectorizer.transform(product_df['movie_generes'])

# # Calculate cosine similarity between movie genres and product genres
# similarity_matrix = cosine_similarity(movie_genre_vectors, product_genre_vectors)


In [None]:
# import cupy as cp

# # Convert your data to CuPy arrays (assuming data is already loaded and available as numpy arrays)
# movie_genre_vectors_gpu = cp.asarray(movie_genre_vectors.toarray())  # Ensure data is in dense format if coming from sparse
# product_genre_vectors_gpu = cp.asarray(product_genre_vectors.toarray())

# # Function to compute cosine similarity on GPU
# def cosine_similarity_gpu(X, Y):
#     X_norm = cp.linalg.norm(X, axis=1, keepdims=True)
#     Y_norm = cp.linalg.norm(Y, axis=1, keepdims=True)
#     similarity = cp.dot(X, Y.T) / cp.dot(X_norm, Y_norm.T)
#     return similarity

# # Compute similarity
# similarity_matrix_gpu = cosine_similarity_gpu(movie_genre_vectors_gpu, product_genre_vectors_gpu)


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf


In [None]:
vectorizer = TfidfVectorizer()
all_genres = movie_df['genres'].tolist() + product_df['movie_generes'].tolist()

# Fit and transform the genres
tfidf_matrix = vectorizer.fit_transform(all_genres)

# Movie and product TF-IDF features
movie_genres_tfidf = tfidf_matrix[:len(movie_df)]
product_genres_tfidf = tfidf_matrix[len(movie_df):]


In [None]:
# Convert sparse matrix to dense and then to tensor
movie_genres_tensor = tf.convert_to_tensor(movie_genres_tfidf.todense())
product_genres_tensor = tf.convert_to_tensor(product_genres_tfidf.todense())

# Normalize the vectors
movie_genres_norm = tf.nn.l2_normalize(movie_genres_tensor, axis=1)
product_genres_norm = tf.nn.l2_normalize(product_genres_tensor, axis=1)

# Compute cosine similarity
cosine_sim = tf.matmul(movie_genres_norm, product_genres_norm, transpose_b=True)


ResourceExhaustedError: {{function_node __wrapped__MatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} OOM when allocating tensor with shape[20212807,7824482] and type double on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu [Op:MatMul] name: 

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
# Downsample the data to 50% of its original size
movie_df_sampled = movie_df.sample(frac=0.01, random_state=42)  # Use a fixed random state for reproducibility
product_df_sampled = product_df.sample(frac=0.01, random_state=42)

In [None]:
vectorizer = TfidfVectorizer()

# Combine genres for vectorization
all_genres = movie_df_sampled['genres'].tolist() + product_df_sampled['movie_generes'].tolist()
genre_vectors = vectorizer.fit_transform(all_genres)

# Split the vectors back into movie genres and product genres
num_movies = len(movie_df_sampled)
movie_genres = genre_vectors[:num_movies]
product_genres = genre_vectors[num_movies:]


In [None]:
cosine_sim = cosine_similarity(movie_genres, product_genres)
