In [74]:
from fastapi import FastAPI
import pandas as pd
import json
import gzip
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

Train data

In [81]:
df = pd.read_csv('./datasets/streaming.csv')
df_sample = df.sample(n=3000, random_state=42)

df = df_sample.drop(['type', 'Unnamed: 0', 'show_id', 'director', 'country', 'date_added',
                     'release_year', 'duration_int', 'duration_type', 'description', 'cast'], axis=1)
genre_dummies = df_sample['listed_in'].str.join('|').str.get_dummies()
features = pd.concat([genre_dummies, df['score']], axis=1)
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
cosine_sim = cosine_similarity(features_scaled)

# Save the preprocessed data and trained model to files without compression
with open('preprocessed_data.pickle', 'wb') as f:
    pickle.dump((df_sample, features, cosine_sim), f)

with open('trained_model.pickle', 'wb') as f:
    pickle.dump((scaler,), f)


Recomendation

In [82]:
def get_recommendations_new(title: str, num_recommendations=5):
    """
    This model will give you a recommendation based on a given movie

    Parameters:
    title (str): The title of the movie you want to get recommendations from

    Returns:
    A list of 5 movies this model recommends you
    """
    # Load preprocessed data and trained model from files without compression
    with open('preprocessed_data.pickle', 'rb') as f:
        df_sample, features, cosine_sim = pickle.load(f)

    with open('trained_model.pickle', 'rb') as f:
        scaler, = pickle.load(f)

    # Preprocess the input title
    title_features = pd.DataFrame(
        features.iloc[df_sample[df_sample['title'] == title].index[0]]).T
    title_features_scaled = scaler.transform(title_features)

    # Calculate cosine similarity between input title and all other titles
    sim_scores = list(
        enumerate(cosine_sim[df_sample[df_sample['title'] == title].index[0]]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    # Get recommended movie titles
    movie_indices = [i[0] for i in sim_scores]
    recommended_movies = df_sample['title'].iloc[movie_indices].tolist()

    return recommended_movies


In [83]:
get_recommendations_new('homecoming')

['jcvd',
 'tomorrow never dies',
 'the man with the golden arm',
 'blackmark',
 'kadavul paathi mirugam paathi']