In [84]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.pipeline import make_pipeline
import pandas as pd
import joblib

Train data

In [85]:
df = pd.read_csv('./datasets/streaming.csv')
df_sample = df.sample(n=3000, random_state=42)

df = df_sample.drop(['type', 'Unnamed: 0', 'show_id', 'director', 'country', 'date_added',
                     'release_year', 'duration_int', 'duration_type', 'description', 'cast'], axis=1)
genre_dummies = df_sample['listed_in'].str.join('|').str.get_dummies()
features = pd.concat([genre_dummies, df['score']], axis=1)

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

cosine_sim = cosine_similarity(features_scaled)

joblib.dump((df_sample, features, cosine_sim), 'preprocessed_data.joblib')
joblib.dump(scaler, 'trained_model.joblib')


['trained_model.joblib']

Recomendation

In [86]:
def get_recommendations_new(title: str, num_recommendations=5):
    """
    This model will give you a recommendation based on a given movie

    Parameters:
    title (str): The title of the movie you want to get recommendations from

    Returns:
    A list of 5 movies this model recommends you
    """
    # Load preprocessed data and trained model from joblib files
    df_sample, features, cosine_sim = joblib.load('preprocessed_data.joblib')
    scaler = joblib.load('trained_model.joblib')

    # Preprocess the input title
    title_features = pd.DataFrame(
        features.iloc[df_sample[df_sample['title'] == title].index[0]]).T
    title_features_scaled = scaler.transform(title_features)

    # Calculate cosine similarity between input title and all other titles
    sim_scores = list(
        enumerate(cosine_sim[df_sample[df_sample['title'] == title].index[0]]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    # Get recommended movie titles
    movie_indices = [i[0] for i in sim_scores]
    recommended_movies = df_sample['title'].iloc[movie_indices].tolist()

    return recommended_movies

In [87]:
get_recommendations_new('homecoming')

['jcvd',
 'tomorrow never dies',
 'the man with the golden arm',
 'blackmark',
 'kadavul paathi mirugam paathi']