# New Section

In [8]:
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import pandas as pd
movies=pd.read_csv('/content/drive/MyDrive/movies.csv')
ratings=pd.read_csv('/content/drive/MyDrive/ratings.csv')
movies.head()
ratings.head()
movies_name=movies.set_index('movieId')['title'].to_dict()
n_users = len(ratings.userId.unique())
n_items = len(ratings.movieId.unique())
print("Number of unique users:", n_users)
print("Number of unique movies:", n_items)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Number of unique users: 88237
Number of unique movies: 54860


In [7]:
import numpy as np
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
movies = pd.read_csv('/content/drive/MyDrive/movies.csv')
if "title" not in movies.columns:
    raise KeyError("The dataset does not contain a 'title' column.")
def clean_title(title):
    title = re.sub(r"[^a-zA-Z0-9 ]", "", title)
    return title
movies["clean_title"] = movies["title"].apply(clean_title)
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
tfidf = vectorizer.fit_transform(movies["clean_title"])
print(movies.head())

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  \
0  Adventure|Animation|Children|Comedy|Fantasy   
1                   Adventure|Children|Fantasy   
2                               Comedy|Romance   
3                         Comedy|Drama|Romance   
4                                       Comedy   

                        clean_title  
0                    Toy Story 1995  
1                      Jumanji 1995  
2             Grumpier Old Men 1995  
3            Waiting to Exhale 1995  
4  Father of the Bride Part II 1995  


In [None]:
movies


Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995
...,...,...,...,...
62418,209157,We (2018),Drama,We 2018
62419,209159,Window of the Soul (2001),Documentary,Window of the Soul 2001
62420,209163,Bad Poems (2018),Comedy|Drama,Bad Poems 2018
62421,209169,A Girl Thing (2001),(no genres listed),A Girl Thing 2001


In [11]:
from sklearn.metrics.pairwise import cosine_similarity


def search(title):
    title=clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices].iloc[::-1]

    return results

In [None]:
import ipywidgets as widgets
from IPython.display import display

movie_input = widgets.Text(
    value='We',
    description='Movie Title:',
    disabled=False
)
movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search(title))

movie_input.observe(on_type, names='value')


display(movie_input, movie_list)

Text(value='We', description='Movie Title:')

Output()

In [13]:
import pandas as pd
movies=pd.read_csv('/content/drive/MyDrive/movies.csv')
movie_id = 89745

#def find_similar_movies(movie_id):
movie = movies[movies["movieId"] == movie_id]

In [10]:
ratings = pd.read_csv("/content/drive/MyDrive/ratings.csv")
ratings.dtypes

Unnamed: 0,0
userId,int64
movieId,int64
rating,float64
timestamp,int64


In [14]:
similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]
similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

similar_user_recs = similar_user_recs[similar_user_recs > .10]
all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
rec_percentages.columns = ["similar", "all"]
rec_percentages

Unnamed: 0_level_0,similar,all
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
89745,1.000000,0.040448
58559,0.571627,0.146928
59315,0.529903,0.054658
79132,0.521001,0.132831
2571,0.502643,0.246163
...,...,...
1258,0.103477,0.083900
31658,0.102921,0.033483
1193,0.102364,0.119926
780,0.101808,0.053668


In [15]:
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
rec_percentages = rec_percentages.sort_values("score", ascending=False)
rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")

Unnamed: 0,similar,all,score,movieId,title,genres
17067,1.0,0.040448,24.723227,89745,"Avengers, The (2012)",Action|Adventure|Sci-Fi|IMAX
25058,0.242837,0.012399,19.585641,122892,Avengers: Age of Ultron (2015),Action|Adventure|Sci-Fi
20513,0.107928,0.005536,19.497179,106072,Thor: The Dark World (2013),Action|Adventure|Fantasy|IMAX
16725,0.212796,0.011757,18.098821,88140,Captain America: The First Avenger (2011),Action|Adventure|Sci-Fi|Thriller|War
19678,0.208067,0.01151,18.077198,102125,Iron Man 3 (2013),Action|Sci-Fi|Thriller|IMAX
16312,0.173574,0.010115,17.160504,86332,Thor (2011),Action|Adventure|Drama|Fantasy|IMAX
21348,0.285953,0.016708,17.114799,110102,Captain America: The Winter Soldier (2014),Action|Adventure|Sci-Fi|IMAX
25071,0.21363,0.012984,16.453586,122920,Captain America: Civil War (2016),Action|Sci-Fi|Thriller
25061,0.1363,0.008607,15.835792,122900,Ant-Man (2015),Action|Adventure|Sci-Fi
14628,0.236161,0.015133,15.605962,77561,Iron Man 2 (2010),Action|Adventure|Sci-Fi|Thriller|IMAX


In [16]:
def find_similar_movies(movie_id):
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

    similar_user_recs = similar_user_recs[similar_user_recs > .10]
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]

    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]

In [21]:
import ipywidgets as widgets
from IPython.display import display

movie_name_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)
recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            results = search(title)
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))

movie_name_input.observe(on_type, names='value')

display(movie_name_input, recommendation_list)

Text(value='Toy Story', description='Movie Title:')

Output()

In [26]:
from flask import Flask, request, jsonify
import random

movie = Flask(__name__)

# Sample movie data
movies = {
    'action': ['Mad Max: Fury Road', 'John Wick', 'Die Hard'],
    'comedy': ['Superbad', 'The Hangover', 'Step Brothers'],
    'drama': ['The Shawshank Redemption', 'Forrest Gump', 'The Godfather'],
    'horror': ['Get Out', 'A Quiet Place', 'The Conjuring'],
    'romance': ['The Notebook', 'Pride and Prejudice', 'La La Land']
}

@movie.route('/recommend', methods=['GET'])
def recommend():
    genre = request.args.get('genre')
    recommended_movies = movies.get(genre, [])
    return jsonify({'movies': recommended_movies})

if __name__ == '__main__':
    movie.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
