In [42]:
import pandas as pd
import numpy as np

In [43]:
movies = pd.read_csv('datasets/tmdb_5000_movies.csv')
df = pd.read_csv('datasets/tmdb_5000_credits.csv')

In [44]:
# df.info
df.columns = ['id', 'tittle', 'cast', 'crew']
movies = movies.merge(df, on='id')

In [None]:
movies.info()

In [None]:
movies.head(3)

In [None]:
movies['overview'].head(3)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# removing english stop word like a, and , the 
tfidf = TfidfVectorizer(analyzer = 'word',stop_words = 'english')

#NaN -> ''
movies['overview'] = movies['overview'].fillna('')

tfidf_matrix =  tfidf.fit_transform(movies['overview'])

tfidf_matrix.shape

similarity score of this matrix.


In [None]:
from sklearn.metrics.pairwise import linear_kernel

cosin_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosin_sim[0]

In [None]:
# reverse map indices and movie titles
index_of_movies = pd.Series(movies.index, index=movies['title']).drop_duplicates()
index_of_movies

In [51]:
def get_recommendations(title, cosin_sim=cosin_sim):
    idx = index_of_movies[title]
    
    sim_scores = list(enumerate(cosin_sim[idx]))
    # sorting of moviesidx based on similarity score
    sim_scores = sorted(sim_scores, key = lambda x:x[1], reverse = True)
    # get top 10 of sorted 
    sim_scores = sim_scores[1:31]
    
    movies_idx = [i[0] for i in sim_scores]
    
    return movies['title'].iloc[movies_idx]


In [None]:
get_recommendations('The Godfather').head(10)

### Improving the recommender with another metadatas

In [53]:
from ast import literal_eval

features = ['cast', 'crew', 'keywords', 'genres']
for f in features:
    movies[f] = movies[f].apply(literal_eval)

In [54]:
def get_director(x):
    for i in x:
        if i['job'] == 'Director':
            return i['name']
    return np.nan

In [55]:
# get top 3 elements of list
def get_list(x):
    if isinstance(x, list):
        names = [ i['name'] for i in x]
        
        if len(names)  > 3:
            names = names[:3]
        return names
    return []

In [56]:
movies['director'] = movies['crew'].apply(get_director)

features = ['cast', 'keywords', 'genres']
for f in features:
    movies[f] = movies[f].apply(get_list)

In [None]:
movies[['title', 'cast', 'director', 'keywords', 'genres']].head(10)

In [58]:
# striping
def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(' ', '')) for i in x]
    else:
        if isinstance(x, str):
            return str.lower(x.replace(' ', ''))
        else:
            return ''

In [59]:
features = ['cast', 'keywords', 'director', 'genres']
for f in features:
    movies[f] = movies[f].apply(clean_data)

In [60]:
def create_soup(x):
    return ' '.join(x['keywords'])+' '+' '.join(x['cast'])+' '+x['director']+' '+' '.join(x['genres'])

movies['soup'] = movies.apply(create_soup, axis=1)

In [61]:
# count vectorizer
from sklearn.feature_extraction.text import CountVectorizer

count = CountVectorizer(stop_words = 'english')
count_matrix = count.fit_transform(movies['soup'])

In [62]:
from sklearn.metrics.pairwise import cosine_similarity

cosin_sim2 = cosine_similarity(count_matrix, count_matrix)

In [None]:
get_recommendations('The Godfather', cosin_sim2).head(10)

### collobrative filtering

In [None]:

from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

# Load the ratings dataset
ratings = pd.read_csv('datasets/ratings_small.csv')

# Initialize the reader with the rating scale found in the dataset
reader = Reader()

# Load dataset into the Surprise Dataset format
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Initialize the SVD algorithm
svd = SVD()

# Run 5-fold cross-validation and print the results (including RMSE)
cross_validate(svd, data, measures=['RMSE'], cv=5, verbose=True)


In [None]:
train = data.build_full_trainset()
svd.fit(train)

In [None]:
ratings[ratings['userId'] == 1]

lets's predict the user 1's rating on the movie id 302.

In [None]:
svd.predict(1, 302)

In [None]:
svd.predict(2, 5001)

In [69]:
## Hybrid recommender


In [70]:
# convert float val to int
def conv_int(x):
    try:
        return int(x)
    except:
        return np.nan

In [None]:
movie_id = pd.read_csv('datasets/links.csv')[['movieId', 'tmdbId']]
movie_id['tmdbId'] = movie_id['tmdbId'].apply(conv_int)
movie_id.columns = ['movieId', 'id']
movie_id = movie_id.merge(movies[['title', 'id']], on='id').set_index('title')
print(movie_id.shape)
movie_id

In [72]:
index_map = movie_id.set_index('id')

In [None]:
# Import necessary libraries
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
import uvicorn
import nest_asyncio
import time
import requests
import pandas as pd  # Assuming you're using pandas for DataFrames
import httpx

# Allow nested event loops
nest_asyncio.apply()

# Create the FastAPI app
app = FastAPI()

# Setup Jinja2 templates
templates = Jinja2Templates(directory="templates")

# Create a requests session for API calls
session = requests.Session()

def get_poster(tmdb_id):
    global session  # Ensure the session is recognized as a global variable
    api_key = '20c67d9e5a125f9516102db2eb6bf8dc'
    url = f"https://api.themoviedb.org/3/movie/{tmdb_id}?api_key={api_key}"
    
    # Delay to avoid hitting rate limit
    response = session.get(url)
    
    # Check if request was successful
    if response.status_code == 200:
        rate_limit_remaining = response.headers.get('X-RateLimit-Remaining')
        rate_limit_reset = response.headers.get('X-RateLimit-Reset')
        print(f"Remaining: {rate_limit_remaining}, Reset in: {rate_limit_reset} seconds")
        
        data = response.json()
        poster_path = data.get('poster_path', None)
        if poster_path:
            return f"https://image.tmdb.org/t/p/w500{poster_path}"
    return None

def recommend_for(userId, title):
    # Assuming index_of_movies and movie_id are defined elsewhere in your code
    index = index_of_movies[title]
    tmdbId = movie_id.loc[title]['id']
    
    # Content-based filtering
    sim_scores = list(enumerate(cosin_sim2[int(index)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:30]  # Get top 29 similar movies
    movie_indices = [i[0] for i in sim_scores]
    
    mv = movies.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'id']]
    mv = mv[mv['id'].isin(movie_id['id'])]

    # Collaborative filtering with SVD
    mv['est'] = mv['id'].apply(lambda x: svd.predict(userId, index_map.loc[x]['movieId']).est)

    mv = mv.sort_values('est', ascending=False)

    # Fetch posters from TMDb API for the top 10 recommended movies
    mv['poster_url'] = mv['id'].apply(get_poster)
    
    # Return the top 10 recommendations
    recommendations = mv[['title', 'vote_average', 'poster_url']].head(10).to_dict(orient='records')
    return {'recommendations': recommendations}

import random

def get_random_movie_title(movies):
    # Extract the list of movie titles
    movie_titles = movies['title'].tolist()
    
    # Select a random title from the list
    random_title = random.choice(movie_titles)
    
    return random_title


@app.get("/get_movie", response_class=HTMLResponse)
async def get_movie(request: Request, title: str = None):
    if not title:
        # Get a random movie title if no title is provided
        title = get_random_movie_title(movies[['title', 'cast', 'director', 'keywords', 'genres']])
        header_title = "Random Movie Suggestion"  # Default header when a random movie is selected
    else:
        header_title = f"Similar Movies Like '{title}'"  # Header when a specific movie is entered
    
    # Get recommendations for the selected movie title
    result = recommend_for(4, title)
    
    # Pass the selected movie title and header title to the template
    return templates.TemplateResponse("page.html", {
        "request": request,
        "result": result,
        "header_title": header_title  # Include dynamic header title for the page
    })


API_KEY = '20c67d9e5a125f9516102db2eb6bf8dc'

async def get_movie_details_by_title(movie_title: str):
    # Search for the movie to get its ID
    search_url = f"https://api.themoviedb.org/3/search/movie?api_key={API_KEY}&query={movie_title}&language=en-US"
    
    async with httpx.AsyncClient() as client:
        try:
            search_response = await client.get(search_url)
            search_response.raise_for_status()  # Raise an error for bad responses
            results = search_response.json().get("results", [])
            
            if results:
                movie_id = results[0]['id']  # Get the ID of the first result
                details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={API_KEY}&language=en-US"
                details_response = await client.get(details_url)
                details_response.raise_for_status()
                return details_response.json()
        except httpx.HTTPStatusError as exc:
            print(f"HTTP error occurred: {exc}")
        except httpx.RequestError as exc:
            print(f"A network error occurred: {exc}")
    
    return None  # Return None if no movie is found or an error occurred

@app.get("/movie/{movie_title}", response_class=HTMLResponse)
async def movie_detail(request: Request, movie_title: str):
    # Fetch movie details by title
    movie_details = await get_movie_details_by_title(movie_title)

    if movie_details:
        # Create a movie object with relevant details
        movie = {
            "title": movie_details.get("title"),
            "poster_url": f"https://image.tmdb.org/t/p/w500{movie_details.get('poster_path')}",
            "description": movie_details.get("overview"),
            "release_date": movie_details.get("release_date"),
            "rating": movie_details.get("vote_average")
        }
        
        # Render the HTML template with movie details
        return templates.TemplateResponse("movies_d.html", {"request": request, "movie": movie})
    
    return templates.TemplateResponse("error.html", {"request": request, "error": "Movie not found."})  # Error template



# Run the FastAPI app with uvicorn
def run_app():
    uvicorn.run(app, host="127.0.0.1", port=8000)

# Call the run_app function
if __name__ == "__main__":
    run_app()


INFO:     Started server process [15992]
INFO:     Waiting for application startup.


INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:52312 - "GET /get_movie?title=Flicka HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 466, in _make_request
    self._validate_conn(conn)
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 1095, in _validate_conn
    conn.connect()
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 730, in connect
    sock_and_verified = _ssl_wrap_socket_and_match_hostname(
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 909, in _ssl_wrap_socket_and_match_hostname
    ssl_sock = ssl_wrap_socket(
               ^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\util\ssl_.py", line 469, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(s

INFO:     127.0.0.1:52313 - "GET /get_movie?title=Flicka HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 466, in _make_request
    self._validate_conn(conn)
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 1095, in _validate_conn
    conn.connect()
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 730, in connect
    sock_and_verified = _ssl_wrap_socket_and_match_hostname(
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 909, in _ssl_wrap_socket_and_match_hostname
    ssl_sock = ssl_wrap_socket(
               ^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\util\ssl_.py", line 469, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(s

INFO:     127.0.0.1:52315 - "GET /get_movie?title=Flicka HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 466, in _make_request
    self._validate_conn(conn)
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 1095, in _validate_conn
    conn.connect()
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 730, in connect
    sock_and_verified = _ssl_wrap_socket_and_match_hostname(
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 909, in _ssl_wrap_socket_and_match_hostname
    ssl_sock = ssl_wrap_socket(
               ^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\util\ssl_.py", line 469, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(s

INFO:     127.0.0.1:52318 - "GET /get_movie?title=Flicka HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 466, in _make_request
    self._validate_conn(conn)
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 1095, in _validate_conn
    conn.connect()
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 730, in connect
    sock_and_verified = _ssl_wrap_socket_and_match_hostname(
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 909, in _ssl_wrap_socket_and_match_hostname
    ssl_sock = ssl_wrap_socket(
               ^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\util\ssl_.py", line 469, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(s

INFO:     127.0.0.1:52320 - "GET /get_movie?title=Flicka HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 466, in _make_request
    self._validate_conn(conn)
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 1095, in _validate_conn
    conn.connect()
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 730, in connect
    sock_and_verified = _ssl_wrap_socket_and_match_hostname(
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 909, in _ssl_wrap_socket_and_match_hostname
    ssl_sock = ssl_wrap_socket(
               ^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\util\ssl_.py", line 469, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(s

INFO:     127.0.0.1:52322 - "GET /get_movie?title=Flicka HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 466, in _make_request
    self._validate_conn(conn)
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connectionpool.py", line 1095, in _validate_conn
    conn.connect()
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 730, in connect
    sock_and_verified = _ssl_wrap_socket_and_match_hostname(
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\connection.py", line 909, in _ssl_wrap_socket_and_match_hostname
    ssl_sock = ssl_wrap_socket(
               ^^^^^^^^^^^^^^^^
  File "d:\Games\Movie-Recommendation-system-main\myenv\Lib\site-packages\urllib3\util\ssl_.py", line 469, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(s

Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
Remaining: None, Reset in: None seconds
INFO:     127.0.0.1:52324 - "GET /get_movie?title=Flicka HTTP/1.1" 200 OK
Remain