In [11]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
import joblib
import json
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [12]:
sheet_name = 'Movie_Preference_Information'
sheet_id = '1u85B-IL-btlQaqji6wXwgvbdZc_PZ9Y8EVCIRg-DMY0'

In [13]:
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

In [14]:
data = pd.read_csv(url)
data

Unnamed: 0,Timestamp,What Genres do you enjoy?,Do you prefer older classics or newer releases?,Do you prefer shorter movies or longer epics?,Are there any actors or directors your particularly enjoy?,"Do you prefer critically acclaimed movies or more popular, crowd-pleasing films?"
0,3/6/2025 10:09:02,"Action, Adventure, Animation, Comedy, Fantasy,...",Newer Releases (Post-2000),Longer epics (~120 minutes or longer),,All of the above - no preference
1,3/6/2025 20:07:15,"Action, Adventure",Newer Releases (Post-2000),Shorter movies (~100 minutes or less),Christian Bale,"Popular, Crowd-Pleasing Films"
2,3/6/2025 20:26:51,"Action, Adventure, Animation, Fantasy, Sci-Fi",All of the above - no preference,All of the above - no preference,Yes,All of the above - no preference
3,3/6/2025 21:08:18,"Biography, Comedy",Older Classics (Pre-1980),Longer epics (~120 minutes or longer),Kirk Camron,"Popular, Crowd-Pleasing Films"
4,3/7/2025 12:58:25,"Action, Adventure, Comedy, Crime, Drama, Histo...",Newer Releases (Post-2000),All of the above - no preference,,"Popular, Crowd-Pleasing Films"
5,3/7/2025 12:59:23,"Action, Adventure, Comedy, Crime, Drama, Famil...",All of the above - no preference,All of the above - no preference,"Kevin Costner, Mark Washburn, Chris Pratt",All of the above - no preference
6,3/7/2025 13:04:25,"Animation, Comedy, Fantasy, Horror, Musical, R...",Newer Releases (Post-2000),All of the above - no preference,Nope,"Popular, Crowd-Pleasing Films"
7,3/7/2025 13:08:43,"Action, Adventure, Comedy, Fantasy, Horror, Mu...",All of the above - no preference,Shorter movies (~100 minutes or less),Keanu Reveess,All of the above - no preference
8,3/7/2025 13:20:03,"Action, Adventure, Biography, Comedy, Fantasy,...",All of the above - no preference,All of the above - no preference,No,All of the above - no preference
9,3/7/2025 18:28:34,"Action, Adventure, Comedy, Fantasy, Sci-Fi",In Between (1980-2000),Shorter movies (~100 minutes or less),,"Popular, Crowd-Pleasing Films"


In [15]:
scaler = joblib.load('scaler.pkl')
min_max_scaler = joblib.load('min_max_scaler.pkl')
director_encoder = joblib.load('director_encoder.pkl')
title_tokenizer = joblib.load('title_tokenizer.pkl')
with open('actor_to_index.json', 'r') as f:
    actor_to_index = json.load(f)
all_actors = np.load('all_actors.npy', allow_pickle=True).tolist()
with open('genre_columns.json', 'r') as f:
    genre_columns = json.load(f)

In [18]:
user_prefs_raw = {
    'Timestamp': '3/6/2025 20:07:15', # Example data based on your PDF [cite: 1]
    'Do you prefer older classics or newer releases?': 'Newer Releases (Post-2000)',
    'What Genres do you enjoy?': 'Action, Adventure',
    'Do you prefer shorter movies or longer epics?': 'Shorter movies (~100 minutes or less)',
    # Assuming the column header might be truncated or cleaned up
    'Are there any actors or directors your particularly enjoy?': 'Christian Bale',
    'Do you prefer critically acclaimed movies or more popular ones?': 'Popular, Crowd-Pleasing Films'
}

In [19]:
def process_preferences(raw_prefs, all_genre_columns):
    """Converts raw preference strings into a structured dictionary for filtering."""
    processed = {}

    # Process Genres
    raw_genres = raw_prefs.get('What Genres do you enjoy?', '').split(',')
    processed['genres'] = [g.strip() for g in raw_genres if g.strip()]
    # Create a multi-hot vector for potential model input (if model adapted)
    # genre_vector = [1 if col in processed['genres'] else 0 for col in all_genre_columns]


    # Process Era
    processed['era'] = raw_prefs.get('Do you prefer older classics or newer releases?', '')

    # Process Length
    processed['length'] = raw_prefs.get('Do you prefer shorter movies or longer epics?', '')

    # Process Actors/Directors (split if multiple, handle 'No', 'Yes', 'Nope' etc.)
    raw_actors_directors = raw_prefs.get('Are there any actors or directors your particularly enjoy?', '')
    if raw_actors_directors.lower().strip() not in ['no', 'yes', 'nope', '']:
         # Basic split, might need more robust parsing
        processed['actors_directors'] = [name.strip() for name in raw_actors_directors.split(',') if name.strip()]
    else:
        processed['actors_directors'] = []

    # Process Acclaim Preference
    processed['acclaim'] = raw_prefs.get('Do you prefer critically acclaimed movies or more popular ones?', '')

    return processed

In [20]:
processed_user_prefs = process_preferences(user_prefs_raw, genre_columns)

In [21]:
print(processed_user_prefs)

{'genres': ['Action', 'Adventure'], 'era': 'Newer Releases (Post-2000)', 'length': 'Shorter movies (~100 minutes or less)', 'actors_directors': ['Christian Bale'], 'acclaim': 'Popular, Crowd-Pleasing Films'}
