In [55]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf

In [45]:
product_df = pd.read_csv('merged_final.csv')
product_df.drop(['Unnamed: 0'], axis=1, inplace=True)
product_df.head()

Unnamed: 0,user_id,product_id,rating,movie_generes,season
0,AKM1MP6P0OYPR,B0000AMRTM,0.5,Thriller & Mystery,Spring
1,A30ZD4ECGOT4DU,B0000AMRTM,2.5,Thriller & Mystery,Spring
2,A3R0IDSWV4KXX7,B0000AMRTM,5.0,Thriller & Mystery,Spring
3,AOHB3TUD9G5LU,B0000AMRTM,0.5,Thriller & Mystery,Spring
4,A1X1CV5GAQKAF4,B0000AMRTM,1.5,Thriller & Mystery,Spring


In [46]:
movie_df = pd.read_csv('model_build_data_movies.csv')
movie_df.drop(['Unnamed: 0'], axis=1, inplace=True)
movie_df.head()

Unnamed: 0,movieId,title,userId,rating,watch_date,Season,genres
0,15,Cutthroat Island (1995),7.0,2.0,2002-01-16,Winter,Action & Adventure
1,15,Cutthroat Island (1995),29.0,4.0,1996-06-24,Summer,Action & Adventure
2,15,Cutthroat Island (1995),34.0,3.0,1996-10-28,Fall,Action & Adventure
3,15,Cutthroat Island (1995),69.0,1.0,1997-06-20,Summer,Action & Adventure
4,15,Cutthroat Island (1995),116.0,0.5,2005-11-24,Fall,Action & Adventure


In [48]:
movie_df_recommend=movie_df[["movieId","genres"]]

In [49]:
product_df_recommend=product_df[["product_id","movie_generes"]]

In [50]:
product_df_recommend

Unnamed: 0,product_id,movie_generes
0,B0000AMRTM,Thriller & Mystery
1,B0000AMRTM,Thriller & Mystery
2,B0000AMRTM,Thriller & Mystery
3,B0000AMRTM,Thriller & Mystery
4,B0000AMRTM,Thriller & Mystery
...,...,...
7824477,B00005TN7L,Comedy
7824478,B00005TN7L,Comedy
7824479,B00005TN7L,Comedy
7824480,B00005TN7L,Comedy


In [51]:
movie_df_recommend

Unnamed: 0,movieId,genres
0,15,Action & Adventure
1,15,Action & Adventure
2,15,Action & Adventure
3,15,Action & Adventure
4,15,Action & Adventure
...,...,...
20212802,2631,Comedy
20212803,2631,Comedy
20212804,2631,Comedy
20212805,2631,Comedy


In [83]:
# Sample 50% of each dataframe
movie_df_sampled = movie_df_recommend.sample(frac=0.002, random_state=42)  # Use random state for reproducibility
product_df_sampled = product_df_recommend.sample(frac=0.002, random_state=42)


In [84]:
movie_df_sampled.shape,product_df_sampled.shape

((40426, 2), (15649, 2))

In [74]:
encoder = LabelEncoder()
all_genres = pd.concat([movie_df_sampled['genres'], product_df_sampled['movie_generes']])
encoded_genres = encoder.fit_transform(all_genres)

# Split back into movies and products
num_movies = movie_df_sampled.shape[0]
movie_genres_encoded = encoded_genres[:num_movies]
product_genres_encoded = encoded_genres[num_movies:]

# Convert to TensorFlow tensors
movie_genres_tensor = tf.convert_to_tensor(movie_genres_encoded, dtype=tf.float32)
product_genres_tensor = tf.convert_to_tensor(product_genres_encoded, dtype=tf.float32)

In [75]:
def cosine_similarity_tf(embeddings_a, embeddings_b):
    norm_a = tf.nn.l2_normalize(embeddings_a, axis=1)
    norm_b = tf.nn.l2_normalize(embeddings_b, axis=1)
    return tf.matmul(norm_a, norm_b, transpose_b=True)

similarity_scores = cosine_similarity_tf(movie_genres_tensor[:, tf.newaxis], product_genres_tensor[:, tf.newaxis])


In [76]:
def get_recommendations(similarity_matrix, top_k=10):
    recommendations = {}
    for i in range(similarity_matrix.shape[0]):
        top_indices = tf.argsort(similarity_matrix[i], direction='DESCENDING')[:top_k]
        recommended_products = product_df.iloc[top_indices.numpy()]['product_id'].values
        recommendations[movie_df.iloc[i]['movieId']] = recommended_products
    return recommendations

recommendations = get_recommendations(similarity_scores)
print("Recommendations for each movie:")
print(recommendations)


Recommendations for each movie:
{15: array(['B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM'], dtype=object), 940: array(['B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM'], dtype=object), 8167: array(['B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM'], dtype=object), 25840: array(['B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM'], dtype=object), 25866: array(['B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM',
       'B0000AMRTM', 'B0000AMRTM'], dtype=object), 25962: array(['B0000AMRTM', 'B0000AMRTM', 'B0000AMRTM

In [85]:

# Combine and vectorize genres
vectorizer = TfidfVectorizer()
all_genres = movie_df_sampled['genres'].tolist() + product_df_sampled['movie_generes'].tolist()
genre_vectors = vectorizer.fit_transform(all_genres)

# Convert to dense tensor
genre_tensor = tf.convert_to_tensor(genre_vectors.todense(), dtype=tf.float32)

# Split tensor back into movies and products
num_movies = len(movie_df_sampled)
movie_genres_tensor = genre_tensor[:num_movies]
product_genres_tensor = genre_tensor[num_movies:]

# Calculate cosine similarity using TensorFlow
def cosine_similarity_tf(matA, matB):
    matA_norm = tf.nn.l2_normalize(matA, axis=1)
    matB_norm = tf.nn.l2_normalize(matB, axis=1)
    return tf.matmul(matA_norm, matB_norm, transpose_b=True)

similarity_scores = cosine_similarity_tf(movie_genres_tensor, product_genres_tensor)

# Function to get recommendations for a given movie ID
def get_recommendations_for_movie(movie_id, similarity_matrix, top_k=10):
    # Find the index of the movie ID in movie_df
    movie_idx = movie_df.index[movie_df['movieId'] == movie_id].tolist()
    if not movie_idx:
        return "Movie ID not found."
    movie_idx = movie_idx[0]  # Assuming unique movie IDs
    
    # Get top-k recommendations
    top_indices = tf.argsort(similarity_matrix[movie_idx], direction='DESCENDING')[:top_k]
    recommended_products = product_df.iloc[top_indices.numpy()]['product_id'].values
    return recommended_products

# Example usage: Get top 10 recommendations for movie ID 15
recommended_products = get_recommendations_for_movie(15, similarity_scores, top_k=10)
print("Recommended Products for Movie ID 15:")
print(recommended_products)


Recommended Products for Movie ID 15:
['B0000AMRTM' 'B0000AMRTM' 'B0000AMRTM' 'B0000AMRTM' 'B0000AMRTM'
 'B0000AMRTM' 'B0000AMRTM' 'B0000AMRTM' 'B0000AMRTM' 'B0000AMRTM']


In [91]:
movie_df_unique=movie_df_recommend.drop_duplicates(subset="movieId").reset_index().drop(columns="index")
movie_df_unique

Unnamed: 0,movieId,genres
0,15,Action & Adventure
1,940,Action & Adventure
2,8167,Action & Adventure
3,25840,Action & Adventure
4,25866,Action & Adventure
...,...,...
26508,127042,Action & Adventure
26509,127044,Action & Adventure
26510,2367,Fantasy & Sci-Fi
26511,2422,Action & Adventure


In [92]:
product_df_unique=product_df_recommend.drop_duplicates(subset="product_id").reset_index().drop(columns="index")
product_df_unique

Unnamed: 0,product_id,movie_generes
0,B0000AMRTM,Thriller & Mystery
1,B000EPSUI0,Action & Adventure
2,B0002ZP4DQ,Action & Adventure
3,B00009WPPA,Romance
4,B00004SD8X,Comedy
...,...,...
57276,B0009Y76CW,Sports & Outdoors
57277,B00005OOJZ,Family & Kids
57278,B0007LXRC0,Family & Kids
57279,B00062GJ8O,Comedy


In [103]:
# Sample 50% of each dataframe
movie_df_sampled = movie_df_unique.sample(frac=0.3, random_state=42)  # Use random state for reproducibility
product_df_sampled = product_df_unique.sample(frac=0.3, random_state=42)


In [104]:
product_df_sampled.shape,movie_df_sampled.shape

((17184, 2), (7954, 2))

In [105]:

# Combine and vectorize genres
vectorizer = TfidfVectorizer()
all_genres = movie_df_sampled['genres'].tolist() + product_df_sampled['movie_generes'].tolist()
genre_vectors = vectorizer.fit_transform(all_genres)

# Convert to dense tensor
genre_tensor = tf.convert_to_tensor(genre_vectors.todense(), dtype=tf.float32)

# Split tensor back into movies and products
num_movies = len(movie_df_sampled)
movie_genres_tensor = genre_tensor[:num_movies]
product_genres_tensor = genre_tensor[num_movies:]

# Calculate cosine similarity using TensorFlow
def cosine_similarity_tf(matA, matB):
    matA_norm = tf.nn.l2_normalize(matA, axis=1)
    matB_norm = tf.nn.l2_normalize(matB, axis=1)
    return tf.matmul(matA_norm, matB_norm, transpose_b=True)

similarity_scores = cosine_similarity_tf(movie_genres_tensor, product_genres_tensor)

# Function to get recommendations for a given movie ID
def get_recommendations_for_movie(movie_id, similarity_matrix, top_k=10):
    # Find the index of the movie ID in movie_df
    movie_idx = movie_df.index[movie_df['movieId'] == movie_id].tolist()
    if not movie_idx:
        return "Movie ID not found."
    movie_idx = movie_idx[0]  # Assuming unique movie IDs
    
    # Get top-k recommendations
    top_indices = tf.argsort(similarity_matrix[movie_idx], direction='DESCENDING')[:top_k]
    recommended_products = product_df_sampled.iloc[top_indices.numpy()]['product_id'].values
    return recommended_products

# Example usage: Get top 10 recommendations for movie ID 15
recommended_products = get_recommendations_for_movie(15, similarity_scores, top_k=10)
print("Recommended Products for Movie ID 15:")
print(recommended_products)


Recommended Products for Movie ID 15:
['B0002F79XE' 'B00005QFYE' 'B0002NXCKU' 'B0002805TQ' 'B0002OPKE0'
 'B0002OO0T6' 'B00006I5DP' 'B00004Z5LZ' 'B0002QJWZQ' 'B00028OKB0']
