In [2]:
import pandas as pd
import numpy as np
import joblib
import json
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [20]:
sheet_name = 'Movie_Preference_Information'
sheet_id = '1u85B-IL-btlQaqji6wXwgvbdZc_PZ9Y8EVCIRg-DMY0'
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
prefs_df = pd.read_csv(url)

In [3]:
model = keras.models.load_model('movie_recommendation_model.h5')
scaler = joblib.load('scaler.pkl')
min_max_scaler = joblib.load('min_max_scaler.pkl')
director_encoder = joblib.load('director_encoder.pkl')
title_tokenizer = joblib.load('title_tokenizer.pkl')

with open('actor_to_index.json', 'r') as f:
    actor_to_index = json.load(f)
all_actors = np.load('all_actors.npy', allow_pickle=True).tolist()
with open('genre_columns.json', 'r') as f:
    genre_columns = json.load(f)

In [4]:
movies = pd.read_csv("neural_net_ready_preprocessed_movies.csv")

In [5]:
if 'Director_Encoded' not in movies.columns:
     movies['Director_Encoded'] = director_encoder.transform(movies['Director'])

In [6]:
if 'Title_Sequences' not in movies.columns:
    sequences = title_tokenizer.texts_to_sequences(movies['Title'])
    padded_sequences = pad_sequences(sequences, maxlen=20)
    title_data = padded_sequences

In [7]:
if 'Star_Cast_Encoded' not in movies.columns:
     movies['Star Cast List Clean'] = movies['Star Cast List Clean'].apply(eval) 
     star_cast_encoded = np.zeros((len(movies), len(all_actors)))
     for i, actors in enumerate(movies['Star Cast List Clean']):
         for actor in actors:
             if actor in actor_to_index:
                 index = actor_to_index[actor]
                 star_cast_encoded[i, index] = 1

In [8]:
numerical_data = movies[['IMDb Rating', 'Duration (minutes)', 'Year']].values
director_data = movies['Director_Encoded'].values 
genre_data = movies[genre_columns].values

In [9]:
user_id_to_predict = 5 
num_movies = len(movies)

In [10]:
user_id_array = np.array([user_id_to_predict] * num_movies)
movie_id_array = np.arange(num_movies)

In [11]:
all_numerical_data = numerical_data
all_director_data = director_data
all_genre_data = genre_data
all_star_cast_data = star_cast_encoded
all_title_data = title_data

In [12]:
predictions = model.predict([
    user_id_array,
    movie_id_array,        
    all_numerical_data,
    all_director_data,
    all_genre_data,
    all_star_cast_data,
    all_title_data
])



In [13]:
movies['predicted_liking'] = predictions.flatten()

In [14]:
recommendations = movies.sort_values(by='predicted_liking', ascending=False)

In [15]:
top_n = 10
print(f"Top {top_n} recommendations for User ID {user_id_to_predict}:")
for index, row in recommendations.head(top_n).iterrows():
    print(f"- {row['Title']} (Predicted Liking: {row['predicted_liking']:.2%})")

Top 10 recommendations for User ID 5:
- rebel moon - part two: the scargiver (Predicted Liking: 100.00%)
- hollywoodland (Predicted Liking: 100.00%)
- black ' (Predicted Liking: 100.00%)
- retrograde (Predicted Liking: 100.00%)
- mark of the devil (Predicted Liking: 100.00%)
- driven (Predicted Liking: 100.00%)
- ip man  (Predicted Liking: 100.00%)
- scandal (Predicted Liking: 100.00%)
- marshall (Predicted Liking: 100.00%)
- geronimo: an american legend (Predicted Liking: 100.00%)


In [21]:
generic_user_id = 0 # Or an average user ID
generic_user_array = np.array([generic_user_id] * num_movies)
movie_id_array = np.arange(num_movies)

In [23]:
def process_preferences(raw_prefs_row, all_genre_columns):
    """Converts a row (Pandas Series) of raw preferences into a structured dictionary."""
    processed = {}
    # Adjust keys based on your actual DataFrame column names
    raw_genres = raw_prefs_row.get('What Genres do you enjoy?', '').split(',') # Example key
    processed['genres'] = [g.strip() for g in raw_genres if g.strip()]

    processed['era'] = raw_prefs_row.get('Do you prefer older classics or newer releases?', '') # Example key
    processed['length'] = raw_prefs_row.get('Do you prefer shorter movies or longer epics?', '') # Example key

    raw_actors_directors = raw_prefs_row.get('Are there any actors or directors your particularly enjoy?', '') # Example key
    if isinstance(raw_actors_directors, str) and raw_actors_directors.lower().strip() not in ['no', 'yes', 'nope', '']:
        processed['actors_directors'] = [name.strip() for name in raw_actors_directors.split(',') if name.strip()]
    else:
         processed['actors_directors'] = []

    processed['acclaim'] = raw_prefs_row.get('Do you prefer critically acclaimed movies or more popular ones?', '') # Example key
    return processed

def apply_filtering(movies_with_scores_df, processed_prefs):
    """Filters the movie DataFrame based on user preferences."""
    filtered_df = movies_with_scores_df.copy()

    # Filter by Genre
    if processed_prefs.get('genres'):
         # Ensure your movies_df has the one-hot encoded columns matching genre_columns
         valid_genres = [g for g in processed_prefs['genres'] if g in filtered_df.columns]
         if valid_genres:
             genre_filter = filtered_df[valid_genres].sum(axis=1) > 0
             filtered_df = filtered_df[genre_filter]

    # Filter by Era (Requires 'Original_Year' column, see previous examples)
    # if 'Original_Year' in filtered_df.columns:
    #    # ... add era filtering logic ...
    #    pass

    # Filter by Length (Requires 'Original_Duration' column, see previous examples)
    # if 'Original_Duration' in filtered_df.columns:
    #    # ... add length filtering logic ...
    #    pass

    # Potential future filters: actors/directors, acclaim level (might require mapping acclaim to IMDb rating ranges, etc.)

    return filtered_df

In [26]:
all_user_recommendations = []
for index, user_row in prefs_df.iterrows():
    print(f"Processing user index: {index}") # Progress indicator

    # 1. Process the preferences for the current user
    # Pass the row (which is a Pandas Series) to the function
    processed_prefs = process_preferences(user_row, genre_columns)

    # 2. Apply Filtering based on these preferences
    # Assumes movies_df has the 'base_score' column calculated above
    filtered_recommendations = apply_filtering(movies, processed_prefs)

    # 3. Sort filtered results and get Top N
    final_recommendations = filtered_recommendations.sort_values(by='predicted_liking', ascending=False)
    top_n = 10
    user_top_movies = final_recommendations.head(top_n)[['Title', 'predicted_liking']].to_dict(orient='records')

    # 4. Store the result for this user
    # Use a unique identifier for the user, e.g., the DataFrame index or a timestamp
    user_result = {
        'user_identifier': index, # Or user_row['Timestamp'] or another unique column
        'preferences': processed_prefs, # Optional: store processed prefs too
        'recommendations': user_top_movies
    }
    all_user_recommendations.append(user_result)

    # Optional: Print results for the current user
    # print(f"  Recommendations for user {index}: {user_top_movies}\n")

Processing user index: 0
Processing user index: 1
Processing user index: 2
Processing user index: 3
Processing user index: 4
Processing user index: 5
Processing user index: 6
Processing user index: 7
Processing user index: 8
Processing user index: 9
Processing user index: 10
Processing user index: 11


In [27]:
if all_user_recommendations:
     print(json.dumps(all_user_recommendations[0], indent=2))

{
  "user_identifier": 0,
  "preferences": {
    "genres": [
      "Action",
      "Adventure",
      "Animation",
      "Comedy",
      "Fantasy",
      "Romance",
      "Sci-Fi",
      "Thriller"
    ],
    "era": "Newer Releases (Post-2000)",
    "length": "Longer epics (~120 minutes or longer)",
    "actors_directors": [],
    "acclaim": ""
  },
  "recommendations": [
    {
      "Title": "rebel moon - part two: the scargiver",
      "predicted_liking": 1.0
    },
    {
      "Title": "racing stripes",
      "predicted_liking": 1.0
    },
    {
      "Title": "getting even with dad",
      "predicted_liking": 1.0
    },
    {
      "Title": "lassie",
      "predicted_liking": 1.0
    },
    {
      "Title": "extinct",
      "predicted_liking": 1.0
    },
    {
      "Title": "the peanut butter solution",
      "predicted_liking": 1.0
    },
    {
      "Title": "the muppet christmas carol",
      "predicted_liking": 1.0
    },
    {
      "Title": "fireheart",
      "predicted_liki