This notebook implements the Cross-Domain Recommendation Engine using the K-Nearest Neighbors (KNN) algorithm. The goal is to recommend movies based on the genres of books a user is interested in. By leveraging pre-classified genres and similarity measures, the system identifies and suggests movies that align with the user's book preferences.

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import json
import pickle

In [2]:
# Load the movies dataset with classified genres
movies_df = pd.read_csv('movies_book_classified.csv')

# Display the first few rows of the dataset to understand its structure
movies_df.head()

Unnamed: 0,name,genre,description,clean_description,labels
0,Creed III,"Drama, Action","After dominating the boxing world, Adonis Cree...",dominating boxing world adonis creed thriving ...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Avatar: The Way of Water,"Science Fiction, Adventure, Action",Set more than a decade after the events of the...,set decade events first film learn story sully...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,The Super Mario Bros. Movie,"Animation, Adventure, Family, Fantasy, Comedy","While working underground to fix a water main,...",working underground fix water main brooklyn pl...,"[1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
3,Mummies,"Animation, Comedy, Family, Adventure, Fantasy","Through a series of unfortunate events, three ...",series unfortunate events three mummies end pr...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,Supercell,Action,Good-hearted teenager William always lived in ...,good hearted teenager william always lived hop...,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [33]:
# Convert the 'labels' column from string representation to actual lists
# Each entry in 'labels' is a stringified list of binary indicators for genres
train_arr = np.array([eval(entry) for entry in movies_df['labels']])

# Display the training array to verify its structure
train_arr

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0]])

In [34]:
# Initialize the NearestNeighbors model with cosine similarity and 5 neighbors
model = NearestNeighbors(n_neighbors=5, metric='cosine')

# Fit the model on the training data
model.fit(train_arr)

In [35]:
# Define a sample test input representing a user's genre preferences
# Each element corresponds to one of the 20 unified genres (binary indicators)
test_data = [1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0]

# Convert the test data to a NumPy array and reshape for the model
test_arr = np.array(test_data).reshape(1, -1)

# Use the model to find the nearest neighbors (similar movies)
distances, indices = model.kneighbors(test_arr, n_neighbors=5)

In [53]:
k = np.array([[1,23], [12,3]])
k.flatten().tolist()

[1, 23, 12, 3]

In [51]:
# Flatten the indices array to get a list of recommended movie indices
recommended_indices = indices.flatten().tolist()
recommended_indices

[9829, 565, 448, 6109, 537]

In [55]:
# Select a specific movie by its index (e.g., index 565)
selected_movie = movies_df.iloc[565]

# Display the selected movie's details
selected_movie

name                                                         Day Shift
genre                                  Action, Fantasy, Horror, Comedy
description          An LA vampire hunter has a week to come up wit...
clean_description    la vampire hunter week come cash pay kids tuit...
labels               [1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, ...
Name: 565, dtype: object

In [39]:
# Load the book genres to map indices to genre names
with open('book_genres.json', 'r') as f:
    book_genres = json.load(f)
    
# Extract genre labels
book_labels = [x['genre'] for x in book_genres]

In [44]:
# Get the binary genre labels for the selected movie
predicted_labels = eval(movies_df.iloc[565]['labels'])
predicted_labels

[1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0]

In [45]:
# Display the genres associated with the selected movie
for idx, lbl in enumerate(predicted_labels):
    if lbl:
        print(book_labels[idx], end = ' ')
print()

Fiction Romance Young Adult Fantasy Horror 


In [48]:
# Save the trained KNN model to a file using pickle
with open('cross-recommender-model-v1.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved successfully as 'cross-recommender-model-v1.pkl'")