In [3]:
# Data handling dependencies
import pandas as pd
import numpy as np
import csv
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Custom Libraries
import sys
sys.path.append(r'C:\Users\Kamogelo\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\Scripts\surprise.exe')
from utils.data_loader import load_movie_titles
from recommenders.collaborative_based import collab_model
from recommenders.content_based import content_model


In [10]:

# Data Loading
title_list = load_movie_titles('resources/data/movies.csv')
movie_df =  pd.read_csv('resources/data/movies.csv', index_col='movieId')
df_links = pd.read_csv('resources/data/links.csv')

## Content-based filtering

In [3]:

def data_preprocessing(subset_size):
    """Prepare data for use within Content filtering algorithm.
    Pandas Dataframe
        Subset of movies selected for content-based filtering.

    """
    # Split genre data into individual words.
    movies['keyWords'] = movies['genres'].str.replace('|', ' ')
    # Subset of the data
    movies_subset = movies[:subset_size]
    return movies_subset
 
def content_model(movie_list,top_n=10): 
    """Performs Content filtering based upon a list of movies supplied
       by the app user.
    """
    # Initializing the empty list of recommended movies
    data = data_preprocessing(2000)
    # Instantiating and generating the count matrix
    count_vec = CountVectorizer()
    count_matrix = count_vec.fit_transform(data['keyWords'])
    indices = pd.Series(data['title'])
    cosine_sim = cosine_similarity(count_matrix, count_matrix)
    cosine_sim = pd.DataFrame(cosine_sim, index = data.index, columns = data.index)
    # Getting the index of the movie that matches the title
    idx_1 = indices[indices == movie_list[0]].index[0]
    idx_2 = indices[indices == movie_list[1]].index[0]
    idx_3 = indices[indices == movie_list[2]].index[0]
    # Creating a Series with the similarity scores in descending order
    rank_1 = cosine_sim[idx_1]
    rank_2 = cosine_sim[idx_2]
    rank_3 = cosine_sim[idx_3]
    # Calculating the scores
    score_series_1 = pd.Series(rank_1).sort_values(ascending = False)
    score_series_2 = pd.Series(rank_2).sort_values(ascending = False)
    score_series_3 = pd.Series(rank_3).sort_values(ascending = False)
    # Getting the indexes of the 10 most similar movies
    listings = pd.concat([score_series_1, score_series_2, score_series_3]).sort_values(ascending=False)
    # Store movie names
    recommended_movies = []
    # Appending the names of movies
    top_50_indexes = list(listings.iloc[1:50].index)
    # Removing chosen movies
    top_indexes = np.setdiff1d(top_50_indexes,[idx_1,idx_2,idx_3])
    for i in top_indexes[:top_n]:
        recommended_movies.append(list(movies['title'])[i])
    return recommended_movies

In [4]:
movies = movies_df.dropna()
movie_list = [title_list[1],title_list[2],title_list[3]]
content_model(movie_list,top_n=10)

['Dracula: Dead and Loving It (1995)',
 'Lamerica (1994)',
 'Confessional, The (Confessionnal, Le) (1995)',
 'Eye for an Eye (1996)',
 'In the Bleak Midwinter (1995)',
 "Jupiter's Wife (1994)",
 'Frankie Starlight (1995)',
 'Stars Fell on Henrietta, The (1995)',
 'Drop Zone (1994)',
 'Ed Wood (1994)']

## Collaborative-based filtering

In [5]:
import pandas as pd
import numpy as np
import pickle
import copy
from surprise import Reader, Dataset
from surprise import SVD, NormalPredictor, BaselineOnly, KNNBasic, NMF
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [7]:
# Importing data
df_movies = pd.read_csv('resources/data/movies.csv',sep = ',')
ratings_df = pd.read_csv('resources/data/ratings.csv')
ratings_df.drop(['timestamp'], axis=1,inplace=True)

# We make use of an SVD model trained on a subset of the MovieLens 10k dataset.
model=pickle.load(open('resources/models/SVD.pkl', 'rb'))

NameError: name 'pickle' is not defined

In [7]:

# We make use of an SVD model trained on a subset of the MovieLens 10k dataset.
model=pickle.load(open('resources/models/SVD.pkl', 'rb'))

def prediction_item(item_id):
    """Map a given favourite movie to users within the
       MovieLens dataset with the same preference.

    Parameters
    ----------
    item_id : int
        A MovieLens Movie ID.

    Returns
    -------
    list
        User IDs of users with similar high ratings for the given movie.

    """
    # Data preprosessing
    reader = Reader(rating_scale=(0, 5))
    load_df = Dataset.load_from_df(ratings_df,reader)
    a_train = load_df.build_full_trainset()

    predictions = []
    for ui in a_train.all_users():
        predictions.append(model.predict(iid=item_id,uid=ui, verbose = False))
    return predictions

def pred_movies(movie_list):
    """Maps the given favourite movies selected within the app to corresponding
    users within the MovieLens dataset.

    Parameters
    ----------
    movie_list : list
        Three favourite movies selected by the app user.

    Returns
    -------
    list
        User-ID's of users with similar high ratings for each movie.

    """
    # Store the id of users
    id_store=[]
    # For each movie selected by a user of the app,
    # predict a corresponding user within the dataset with the highest rating
    for i in movie_list:
        predictions = prediction_item(item_id = i)
        predictions.sort(key=lambda x: x.est, reverse=True)
        # Take the top 10 user id's from each movie with highest rankings
        for pred in predictions[:10]:
            id_store.append(pred.uid)
    # Return a list of user id's
    return id_store

# !! DO NOT CHANGE THIS FUNCTION SIGNATURE !!
# You are, however, encouraged to change its content.  
def collab_model(movie_list, top_n=10):
    """Performs Collaborative filtering based upon a list of movies supplied
       by the app user.

    Parameters
    ----------
    movie_list : list (str)
        Favorite movies chosen by the app user.
    top_n : type
        Number of top recommendations to return to the user.

    Returns
    -------
    list (str)
        Titles of the top-n movie recommendations to the user.
    """
    
    # Retrieving indices of movies
    indices = pd.Series(movies_df['title'])
    
    # Getting movie IDs based on the provided list
    movie_ids = pred_movies(movie_list)
    
    # Collecting ratings for users who rated the movies from the provided list
    df_init_users = ratings_df[ratings_df['userId'] == movie_ids[0]]
    
    # Concatenating ratings for each movie in the list
    for i in movie_ids[1:]:  # Loop starts from the second movie_id
        df_init_users = pd.concat([df_init_users, ratings_df[ratings_df['userId'] == i]])

    # Getting the cosine similarity matrix
    cosine_sim = cosine_similarity(df_init_users.drop(columns=['userId']), df_init_users.drop(columns=['userId']))
    
    # Retrieving indices of movies from the provided list
    idx_1 = indices[indices == movie_list[0]].index[0]
    idx_2 = indices[indices == movie_list[1]].index[0]
    idx_3 = indices[indices == movie_list[2]].index[0]
    
    # Creating a Series with the similarity scores in descending order
    rank_1 = cosine_sim[idx_1]
    rank_2 = cosine_sim[idx_2]
    rank_3 = cosine_sim[idx_3]
    
    # Calculating the scores
    score_series_1 = pd.Series(rank_1).sort_values(ascending=False)
    score_series_2 = pd.Series(rank_2).sort_values(ascending=False)
    score_series_3 = pd.Series(rank_3).sort_values(ascending=False)
    
    # Appending the names of movies
    listings = pd.concat([score_series_1, score_series_2, score_series_3]).sort_values(ascending=False)
    
    recommended_movies = []
    
    # Choose top 50
    top_50_indexes = list(listings.iloc[1:50].index)
    # Removing chosen movies
    top_indexes = np.setdiff1d(top_50_indexes, [idx_1, idx_2, idx_3])
    
    # Getting the titles of the top-n recommended movies
    for i in top_indexes[:top_n]:
        recommended_movies.append(list(movies_df['title'])[i])
    
    return recommended_movies


In [8]:

movie_list = [title_list[1],title_list[2],title_list[3]]
collab_model(movie_list,top_n=10)

['Juror, The (1996)',
 'White Balloon, The (Badkonake sefid) (1995)',
 'Under Siege 2: Dark Territory (1995)',
 'Burnt by the Sun (Utomlyonnye solntsem) (1994)',
 "Muriel's Wedding (1994)",
 'Baby-Sitters Club, The (1995)',
 'Ace Ventura: Pet Detective (1994)',
 'Higher Learning (1995)',
 'I Love Trouble (1994)',
 'Tough and Deadly (1995)']

## API

In [31]:
api_key = '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'

In [32]:
from sk import generate_summary

In [33]:
api_key = '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'
movie_description = "A summary of the movie..."
summary = sk.generate_summary(movie_description, api_key)
print(summary)

NameError: name 'sk' is not defined

In [51]:
import requests
from IPython.display import display, HTML

def fetch_poster(movie_id):
    url = "https://api.themoviedb.org/3/movie/{}?api_key=c7ec19ffdd3279641fb606d19ceb9bb1&language=en-US".format(movie_id)
    data=requests.get(url)
    data=data.json()
    poster_path = data['poster_path']
    full_path = "https://image.tmdb.org/t/p/w500/"+poster_path
    return full_path

In [52]:
movies_df.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [54]:
movie_id = 499546

In [55]:
from IPython.display import Image, display

image_url = fetch_poster(movie_id)
display(Image(url=image_url))


In [33]:
from IPython.display import IFrame

# Define the URL
url = "https://www.imdb.com/title/tt0113497/"

# Display the content in an iframe
IFrame(url, width=800, height=600)


In [None]:
https://www.imdb.com/title/tt0(113497)/

In [25]:
!pip install google-api-python-client

Collecting google-api-python-client
  Obtaining dependency information for google-api-python-client from https://files.pythonhosted.org/packages/cb/4e/0b6ce3651cecea77346255b966a69ac74b544b0bb02ed463e405a8e7cf7b/google_api_python_client-2.125.0-py2.py3-none-any.whl.metadata
  Downloading google_api_python_client-2.125.0-py2.py3-none-any.whl.metadata (6.6 kB)
Collecting httplib2<1.dev0,>=0.19.0 (from google-api-python-client)
  Obtaining dependency information for httplib2<1.dev0,>=0.19.0 from https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl.metadata
  Downloading httplib2-0.22.0-py3-none-any.whl.metadata (2.6 kB)
Collecting google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0 (from google-api-python-client)
  Obtaining dependency information for google-auth!=2.24.0,!=2.25.0,<3.0.0.dev0,>=1.32.0 from https://files.pythonhosted.org/packages/9e/8d/ddbcf81ec751d8ee5fd18ac11ff38a0e110f39dfbf105e6d9db69d5

In [27]:
import os
import googleapiclient.discovery

def get_movie_trailer(movie_name):
    # Set up YouTube Data API client
    api_service_name = "youtube"
    api_version = "v3"
    api_key = "AIzaSyCxeFJnqlUpLw8vRA1jXLbq-a9FHhsOMi0"  # Replace with your own API key
    youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=api_key)

    # Search for movie trailers
    request = youtube.search().list(
        q=movie_name + " trailer",
        part="snippet",
        maxResults=1,
        type="video"
    )
    response = request.execute()

    # Extract trailer video ID
    if 'items' in response:
        items = response['items']
        if items:
            trailer_id = items[0]['id']['videoId']
            trailer_url = f"https://www.youtube.com/watch?v={trailer_id}"
            return trailer_url
        else:
            return "No trailer found."
    else:
        return "Error fetching data."

# Example usage
movie_name = input("Enter the name of the movie: ")
trailer_url = get_movie_trailer(movie_name)
print("Trailer URL:", trailer_url)


Enter the name of the movie: Toy Story (1995)
Trailer URL: https://www.youtube.com/watch?v=v-PjgYDrg70


In [30]:
get_movie_trailer('Waiting to Exhale (1995)')

'https://www.youtube.com/watch?v=t5iE3Wt2cJs'

In [None]:
'https://www.youtube.com/watch?v=t5iE3Wt2cJs


In [28]:
from IPython.display import IFrame

# Define the URL
url = "https://www.youtube.com/watch?v=v-PjgYDrg70"

# Display the content in an iframe
IFrame(url, width=800, height=600)

In [34]:
import streamlit as st

In [35]:
top_recommendations = ['Grumpier Old Men (1995)','Ace Ventura: When Nature Calls (1995)','Father of the Bride Part II (1995)']

In [37]:
links = [get_movie_trailer(movie_name) for movie_name in top_recommendations]

# Display movie trailers
for l in links:
    # Display URL using Markdown with iframe
        print(l)

https://www.youtube.com/watch?v=1lMmYysiB9A
https://www.youtube.com/watch?v=T8aos7_L4kA
https://www.youtube.com/watch?v=BbvnDlu_Zjc


In [38]:
for movie_name in top_recommendations:
    trailer_url = get_movie_trailer(movie_name)
    print(f"Movie: {movie_name}")
    print(f"Trailer URL: {trailer_url}\n")

Movie: Grumpier Old Men (1995)
Trailer URL: https://www.youtube.com/watch?v=1lMmYysiB9A

Movie: Ace Ventura: When Nature Calls (1995)
Trailer URL: https://www.youtube.com/watch?v=T8aos7_L4kA

Movie: Father of the Bride Part II (1995)
Trailer URL: https://www.youtube.com/watch?v=BbvnDlu_Zjc



In [44]:
df_links = pd.read_csv('resources/data/links.csv')
df_movies =  pd.read_csv('resources/data/movies.csv', index_col='movieId')

In [45]:
# Drop rows with non-finite values
df_links.dropna(subset=['tmdbId'], inplace=True)

# Convert float to integer in the 'tmdbId' column
df_links['tmdbId'] = df_links['tmdbId'].astype(int)



In [2]:
movie_df = pd.merge(df_movies, df_links, on='movieId', how='inner')
movie_df

NameError: name 'pd' is not defined

In [62]:
def get_movie_id(movie_title):
    movie_id = movie_df.loc[movie_df['title'] == movie_title, 'tmdbId'].values
    if len(movie_id) > 0:
        return movie_id[0]
    else:
        return "Movie not found"

# Example usage:
movie_title = "To Wong Foo, Thanks for Everything! Julie Newmar (1995)"
movie_id = get_movie_id(movie_title)

In [63]:
from IPython.display import Image, display

image_url = fetch_poster(movie_id)
display(Image(url=image_url))

In [64]:
fav_movies1 = ['Grumpier Old Men (1995)', 'Ace Ventura: When Nature Calls (1995)', 'Father of the Bride Part II (1995)']

for movie_name in fav_movies1:
    movie_id = get_movie_id(movie_name)
    if movie_id != "Movie not found":
        trailer_url = get_movie_trailer(movie_name)
        image_url = fetch_poster(movie_id)
        img = Image(url=image_url)
        display(img)
        print(f"Movie: {movie_name}")
        print(f"Trailer URL: {trailer_url}")
        print("---")
    else:
        print(f"Movie: {movie_name}")
        print("Movie not found")
        print("---")

Movie: Grumpier Old Men (1995)
Trailer URL: https://www.youtube.com/watch?v=1lMmYysiB9A
---


Movie: Ace Ventura: When Nature Calls (1995)
Trailer URL: https://www.youtube.com/watch?v=T8aos7_L4kA
---


Movie: Father of the Bride Part II (1995)
Trailer URL: https://www.youtube.com/watch?v=BbvnDlu_Zjc
---


In [73]:
import openai

def generate_movie_summary(movie_title):
    prompt = f"Generate a summary for the movie '{movie_title}'."
    response = openai.Completion.create(
        engine="text-davinci-003",  # Use Davinci engine
        prompt=prompt,
        max_tokens=100  # Adjust token limit as per free tier restrictions
    )
    return response.choices[0].text.strip()

generate_movie_summary('Father of the Bride Part II (1995)')


APIRemovedInV1: 

You tried to access openai.Completion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [76]:
generate_movie_summary('Father of the Bride Part II (1995)')

APIRemovedInV1: 

You tried to access openai.Completion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [74]:
!pip install openai==0.28

Collecting openai==0.28
  Obtaining dependency information for openai==0.28 from https://files.pythonhosted.org/packages/ae/59/911d6e5f1d7514d79c527067643376cddcf4cb8d1728e599b3b03ab51c69/openai-0.28.0-py3-none-any.whl.metadata
  Using cached openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Using cached openai-0.28.0-py3-none-any.whl (76 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.16.1
    Uninstalling openai-1.16.1:
      Successfully uninstalled openai-1.16.1
Successfully installed openai-0.28.0


In [11]:
movie_id = movie_df.loc[movie_df['title'] == movie_name, 'tmdbId'].values[0]

NameError: name 'movie_name' is not defined

In [13]:
df_links.columns

Index(['movieId', 'imdbId', 'tmdbId'], dtype='object')

In [16]:
movie_df

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
...,...,...
209157,We (2018),Drama
209159,Window of the Soul (2001),Documentary
209163,Bad Poems (2018),Comedy|Drama
209169,A Girl Thing (2001),(no genres listed)


In [17]:
df_links.dropna(subset=['tmdbId'], inplace=True)
df_links['tmdbId'] = df_links['tmdbId'].astype(int)
movie_df = pd.merge(movies_df, df_links, on='movieId', how='inner')

In [19]:
movie_df.columns

Index(['movieId', 'title', 'genres', 'imdbId', 'tmdbId'], dtype='object')

In [21]:
movie_name = 'Toy Story (1995)'

In [27]:
movie_imdbId = movie_df.loc[movie_df['title'] == movie_name, 'imdbId'].values[0]
movie_imdbId

114709

In [26]:
https://www.imdb.com/title/tt0{movie_imdbId}/

SyntaxError: invalid syntax (714329094.py, line 1)

In [28]:
def create_imdb_link(movie_imdbId):
    imdb_url = f"https://www.imdb.com/title/tt0{movie_imdbId}/"
    return imdb_url


In [29]:
movie_imdbId = 114709
create_imdb_link(movie_imdbId)

'https://www.imdb.com/title/tt0114709/'