In [2]:
import streamlit as st
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
device = torch.device('cpu')

In [3]:
# LOAD EVERYTHING
def load_model(fp):
    mdl = torch.jit.load(fp, map_location=device)
    mdl.eval()
    return mdl

def load_utils(fp):
    utils = pd.read_pickle(fp)
    return utils

def name2itemId(names):
    if not isinstance(names, list):
        names = [names]
    return d['movieId'].transform([d_name2le[name] for name in names])

In [4]:
fp = 'mdls/fm_pt.pkl'
mdl = load_model(fp)

fp = 'data/d_utils.pkl'
utils = load_utils(fp)
d = utils['label_encoder']
feature_offsets = utils['feature_offsets']
movie_offset = feature_offsets['movieId_index']
movie_embs = utils['movie_embs']
movies = utils['movies']
d_name2le = utils['d_name2le']
movie_list = sorted(movies.title.to_list())

In [5]:
# #GET RECOMMENDATIONS
movie = 'Toy Story 2 (1999)'
IDX = name2itemId(movie)[0] # movieId_idx in original data, after labelEncoder, before FM offset
IDX = IDX + movie_offset # Add offset to get input movie emb
emb_toy2 = mdl.x_emb(torch.tensor(IDX, device=device)) # Using embedding only here
cosine_sim = torch.tensor(
    [F.cosine_similarity(emb_toy2, emb, dim=0) for emb in movie_embs]
)
top8 = cosine_sim.argsort(descending=True)[:8]
movie_sims = cosine_sim[top8]
movie_recs = movies.iloc[top8.detach().numpy()]['title'].values
for rec, sim in zip(movie_recs, movie_sims):
    print(f'{sim.tolist():0.3f} - {rec}')

1.000 - Toy Story 2 (1999)
0.795 - Toy Story (1995)
0.625 - Babe (1995)
0.596 - Bug's Life, A (1998)
0.558 - Beauty and the Beast (1991)
0.525 - Aladdin (1992)
0.524 - Last Days, The (1998)
0.522 - Illtown (1996)


In [6]:
# For cold start, use metadata (age, gender) for recommendations
# LabelEncoder.classes_ contains the map between integer and movielens metadata
# Metadata info: https://files.grouplens.org/datasets/movielens/ml-1m-README.txt
feats = ['gender', 'age']
for feat in feats:
    print(feat)
    classes = d[feat].classes_
    for i,c in enumerate(classes):
        print(f'{i} : {c}')

gender
0 : F
1 : M
age
0 : 1
1 : 18
2 : 25
3 : 35
4 : 45
5 : 50
6 : 56


In [65]:
# Get movie recs for a male, ages 18-24
idxs_movies = torch.tensor(movies['movieId_index'].values, device=device)
movie_biases = mdl.x_bias[idxs_movies]

GENDER = 1
AGE = 1
gender_emb = mdl.x_emb(
    torch.tensor(feature_offsets['gender_index']+GENDER, device=device)
)
age_emb = mdl.x_emb(
    torch.tensor(feature_offsets['age_index']+AGE, device=device)
)
metadata_emb = gender_emb + age_emb
rankings = movie_biases + (metadata_emb*movie_embs).sum(1) # dot product
rankings = rankings.detach()
cold_start_recs = movies.iloc[rankings.argsort(descending=True)]['title'].values[:8]
for i,rec in enumerate(cold_start_recs):
    print(i,rec)

0 Shawshank Redemption, The (1994)
1 Usual Suspects, The (1995)
2 Life Is Beautiful (La Vita Ã¨ bella) (1997)
3 Braveheart (1995)
4 Sanjuro (1962)
5 Star Wars: Episode V - The Empire Strikes Back (1980)
6 Star Wars: Episode IV - A New Hope (1977)
7 Schindler's List (1993)


In [20]:
d_age_meta = {'Under 18': 1, '18-24': 18, '25-34': 25, '35-44': 35,
              '45-49': 45, '50-55': 50, '56+': 56}
d_age = dict(zip(d['age'].classes_, range(len(d['age'].classes_))))
d_age

{1: 0, 18: 1, 25: 2, 35: 3, 45: 4, 50: 5, 56: 6}

In [10]:
dd = {0: 'Male', 1: 'Female'}
st.selectbox('Gender', dd)

2023-07-05 14:39:44.612 
  command:

    streamlit run /home/holynec/anaconda3/envs/streamlit/lib/python3.8/site-packages/ipykernel_launcher.py [ARGUMENTS]


0