In [159]:
import pickle
import pandas as pd
import numpy as np
new_df = pickle.load(open('new.pkl','rb'))
movies = pickle.load(open('movie_dict.pkl','rb'))
similarity = pickle.load(open('similarity.pkl','rb'))
print(type(new_df))


<class 'pandas.core.frame.DataFrame'>


Search

In [160]:
import json
import urllib.request as urllib_request
import ssl
import pickle

def fetch_poster_path(api_key, base_url, movie_id):
    poster_url = f"{base_url}{movie_id}?api_key={api_key}&language=en-US"
    response = urllib_request.urlopen(poster_url)
    json_data = json.loads(response.read())

    # Extract the poster path from the API response
    poster_path = json_data.get('poster_path', '')
    
    return f"https://image.tmdb.org/t/p/w500/{poster_path}"

In [161]:
def recommend_10(movie):
    movie_list = []

    # Check if movie is None or an empty string
    if movie is None or not movie.strip():
        print("Error: Movie is None or empty.")
        return movie_list

    # Normalize movie title to lowercase and remove leading/trailing spaces
    movie = movie.strip().lower()

    # Check if movie exists in DataFrame
    if movie in new_df['title'].str.lower().values:
        index = new_df[new_df['title'].str.lower() == movie].index[0]
    else:
        # Movie not found in DataFrame
        print(f"Movie '{movie}' not found in DataFrame.")
        return movie_list

    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])

    for i in distances[0:20]:
        movie_index = i[0]
        movie_title = new_df.iloc[movie_index]['title']

        # Fetch the poster path dynamically from the TMDB API
        api_key = "ff1e18b29203a4007ed0df1907cccf71"
        base_url = "https://api.themoviedb.org/3/movie/"
        movie_id = new_df.iloc[movie_index]['movie_id']
        print(movie_id)
        poster_path = fetch_poster_path(api_key, base_url, movie_id)

        movie_list.append({
            'title': movie_title,
            'poster_path': poster_path
        })

    return movie_list

In [162]:
recommend_10('Avatar')

19995
7450
602
440
11551
44943
849
80274
11954
270938
76757
137113
50357
79698
205126
698
18
106
34851
601


[{'title': 'Avatar',
  'poster_path': 'https://image.tmdb.org/t/p/w500//kyeqWdyUXW608qlYkRqosgbbJyK.jpg'},
 {'title': 'Titan A.E.',
  'poster_path': 'https://image.tmdb.org/t/p/w500//el2iHk3LTJWfEnwrvcRkvWY501G.jpg'},
 {'title': 'Independence Day',
  'poster_path': 'https://image.tmdb.org/t/p/w500//p0BPQGSPoSa8Ml0DAf2mB2kCU0R.jpg'},
 {'title': 'Aliens vs Predator: Requiem',
  'poster_path': 'https://image.tmdb.org/t/p/w500//jCyJN1vj8jqJJ0vNw4hDH2KlySO.jpg'},
 {'title': 'Small Soldiers',
  'poster_path': 'https://image.tmdb.org/t/p/w500//2nuUjSzHsoYlRvTPmLo7m7gCQry.jpg'},
 {'title': 'Battle: Los Angeles',
  'poster_path': 'https://image.tmdb.org/t/p/w500//jloyGeVYZSxM9zsLFvVOWuj2ey4.jpg'},
 {'title': 'Krull',
  'poster_path': 'https://image.tmdb.org/t/p/w500//2dyMDv6W3ugMfaC8DEXPiqMcurD.jpg'},
 {'title': "Ender's Game",
  'poster_path': 'https://image.tmdb.org/t/p/w500//vrEpLNkv30qw7JiVyorgR6NOWDm.jpg'},
 {'title': 'Lifeforce',
  'poster_path': 'https://image.tmdb.org/t/p/w500//953hMDf9

Collaborative Filtering

In [163]:
df_ratings = pd.read_csv('rate.csv')

In [164]:
from scipy.sparse import csr_matrix

def create_X(df):
    """
    Generates a sparse matrix from ratings dataframe.

    Args:
        df: pandas dataframe containing 3 columns (userId, movieId, rating)

    Returns:
        X: sparse matrix
        user_mapper: dict that maps user id's to user indices
        user_inv_mapper: dict that maps user indices to user id's
        movie_mapper: dict that maps movie id's to movie indices
        movie_inv_mapper: dict that maps movie indices to movie id's
    """
    M = df['userId'].nunique()
    N = df['movie_id'].nunique()
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(M))))
    movie_mapper = dict(zip(np.unique(df["movie_id"]), list(range(N))))

    user_inv_mapper = dict(zip(list(range(M)), np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(N)), np.unique(df["movie_id"])))

    user_index = [user_mapper[i] for i in df['userId']]
    item_index = [movie_mapper[i] for i in df['movie_id']]

    X = csr_matrix((df["rating"], (user_index,item_index)), shape=(M,N))

    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper

X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_X(df_ratings)

In [165]:
from sklearn.neighbors import NearestNeighbors

def knn_similar_users(userId, X, user_mapper, user_inv_mapper, k, metric='cosine'):
    """
    Finds k-nearest neighbours for a given movie id.

    Args:
        movie_id: id of the movie of interest
        X: user-item utility matrix
        k: number of similar movies to retrieve
        metric: distance metric for kNN calculations

    Output: returns list of k similar movie ID's
    """
    neighbour_ids = []

    user_index = user_mapper[userId]
    user_vector = X[user_index]
    if isinstance(user_vector, (np.ndarray)):
        user_vector = user_vector.reshape(1,-1)
    # use k+1 since kNN output includes the movieId of interest
    kNN = NearestNeighbors(n_neighbors=k+1, algorithm="brute", metric=metric)
    kNN.fit(X)
    neighbour = kNN.kneighbors(user_vector, return_distance=False)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(user_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids

In [166]:
def recommended_movies_by_collab_filtering(userId):
    movie_list = []
    similar_users = knn_similar_users(userId,X,user_mapper,user_inv_mapper,k=10)
    df1 = df_ratings.loc[df_ratings['userId'] == userId].sort_values(by='rating', ascending = False)
    df2 = df_ratings.loc[df_ratings['userId'] == similar_users[0]].sort_values(by='rating', ascending = False)
    movies_notseen = list(set(df2['movie_id']) - set(df1['movie_id']))
    filtered_df = df_ratings[(df_ratings['userId'] == similar_users[0]) & (df_ratings['movie_id'].isin(movies_notseen))].sort_values(by='rating',ascending=False)
    movies = filtered_df.iloc[0:20,1:2]
    rec_movies = movies['movie_id'].tolist()

    for i in rec_movies:
        movie_id = i
        movie_title = new_df.loc[new_df['movie_id'] == i, 'title'].iloc[0]
        
        # Fetch the poster path dynamically from the TMDB API
        api_key = "ff1e18b29203a4007ed0df1907cccf71"
        base_url = "https://api.themoviedb.org/3/movie/"
        
        poster_path = fetch_poster_path(api_key, base_url, movie_id)

        movie_list.append({
            'title': movie_title,
            'poster_path': poster_path
        })

    return movie_list

Recommend by Genre

In [167]:
movies_genres = pd.read_csv('tmdb_5000_movies.csv')
movies_genres = movies_genres.rename(columns={"id":"movie_id"})
movies_genres = movies_genres[['movie_id','title','genres','vote_average']]

In [168]:
import ast

def convert(text):            #helper function to fetch appropriate element from genres and keywords
    lst = []
    for i in ast.literal_eval(text):        #converts string of list to list
        lst.append(i['name'])
    return lst

movies_genres['genres'] = movies_genres['genres'].apply(convert)

In [169]:
def filter_movies_by_genres(df, genres):
    """
    Filter movies DataFrame based on genres entered by the user.
    
    Args:
        df (DataFrame): DataFrame containing movies and genres.
        genres (list): List of genres entered by the user.
        
    Returns:
        DataFrame: Filtered DataFrame containing movies matching the genres.
    """
    filtered_df = df[df['genres'].apply(lambda x: all(genre in x for genre in genres))]
    return filtered_df

In [171]:
def genre_filterer_by_fans(genres):
    movie_list = []
    filtered_movies = filter_movies_by_genres(movies_genres, genres)
    sorted_frame = filtered_movies.sort_values(by='vote_average',ascending=False)
    final = sorted_frame.iloc[0:20,0:1]
    rec_movies = final['movie_id'].tolist()
    
    for i in rec_movies:
        movie_id = i
        movie_title = new_df.loc[new_df['movie_id']==i,'title'].iloc[0]

        # Fetch the poster path dynamically from the TMDB API
        api_key = "ff1e18b29203a4007ed0df1907cccf71"
        base_url = "https://api.themoviedb.org/3/movie/"

        poster_path = fetch_poster_path(api_key, base_url, movie_id)

        movie_list.append({
            'title': movie_title,
            'poster_path': poster_path
        })

    return movie_list

In [172]:
new_df.head(20)

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"in the 22nd century, a parapleg marin is dispa..."
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believ to be dead, ha c..."
2,206647,Spectre,a cryptic messag from bond’ past send him on a...
3,49026,The Dark Knight Rises,follow the death of district attorney harvey d...
4,49529,John Carter,"john carter is a war-weary, former militari ca..."
5,559,Spider-Man 3,the seemingli invinc spider-man goe up against...
6,38757,Tangled,when the kingdom' most wanted-and most charmin...
7,99861,Avengers: Age of Ultron,when toni stark tri to jumpstart a dormant pea...
8,767,Harry Potter and the Half-Blood Prince,"as harri begin hi sixth year at hogwarts, he d..."
9,209112,Batman v Superman: Dawn of Justice,fear the action of a god-lik super hero left u...


In [173]:
# from flask import Flask
# from flask_mysqldb import MySQL
# from flask import render_template, redirect, request, url_for, flash, session,request as flask_request
# import hashlib

# app = Flask(__name__)

# app.config['MYSQL_HOST'] = 'localhost'
# app.config['MYSQL_USER'] = 'root'
# app.config['MYSQL_PASSWORD'] = '@gArwal80okD3p0rt'
# app.config['MYSQL_DB'] = 'movie_rec'

# app.config['MYSQL_CURSORCLASS'] = 'DictCursor'

# api_key = "ff1e18b29203a4007ed0df1907cccf71"
# base_url = "https://api.themoviedb.org/3/discover/movie?api_key=" + api_key

# mysql = MySQL(app)

# app.secret_key = 'your_secret_key'

# # Hashing function
# def hash_password(password):
#     return hashlib.sha256(password.encode()).hexdigest()

# @app.route('/register', methods=['GET', 'POST'])
# def register():
#     if request.method == 'POST':
#         username = request.form['username']
#         password = request.form['password']
#         name = request.form['name']
#         try:
#             cur = mysql.connection.cursor()
#             cur.execute("INSERT INTO users (keyname, username, pass) VALUES (%s, %s, %s)",
#                         (name, username, password))
#             mysql.connection.commit()
#             cur.close()

#             flash('Registration successful. You can now login.', 'success')
#             return redirect(url_for('login'))
#         except Exception as e:
#             flash('Registration failed. Please try again later.', 'danger')
#             print(f"An error occurred during registration: {e}")
            
#     return render_template('register.html')


# @app.route('/login', methods=['GET', 'POST'])
# def login():
#     if request.method == 'POST':
#         username = request.form['username']
#         # password = hashlib.sha256(request.form['password'].encode()).hexdigest()
#         password = request.form['password']
#         print(username)
#         print(password)

#         try:
#             cur = mysql.connection.cursor()
#             cur.execute("SELECT * FROM users WHERE username = %s AND pass = %s", (username, password))
#             user = cur.fetchone()
#             cur.close()

#             if user:
#                 session['username'] = user['username']
#                 session['user_id'] = user['id']
#                 flash('Login successful', 'success')
#                 return redirect(url_for('home'))
#             else:
#                 flash('Invalid credentials', 'danger')
#         except Exception as e:
#             flash('Login failed. Please try again later.', 'danger')
#             print(f"An error occurred during login: {e}")
#     return render_template('login.html')


# @app.route('/logout')
# def logout():
#     session.clear()
#     flash('You have been logged out.', 'info')
#     return redirect(url_for('login'))

# @app.route('/')
# def index():
#     return redirect(url_for('login'))

# @app.route('/home', methods=['GET', 'POST'])
# def home():
    
#     if flask_request.method == 'POST':
#         search_query = flask_request.form.get('search')
#         print(f"Search Query: {search_query}")
#         recommended_movies = recommend_10(search_query)
#         print(f"Recommended Movies: {recommended_movies}")
#         return render_template("home.html", data=recommended_movies, search_query=search_query)
    
#     userId = session['user_id']
#     movieList = recommended_movies_by_collab_filtering(userId)
#     print(movieList)
#     return render_template("home.html", data=movieList, search_query=None)

# @app.route('/discover', methods=['GET', 'POST'])
# def discover():
    
#     if flask_request.method == 'POST':
#         search_query = flask_request.form.get('search')
#         print(f"Search Query: {search_query}")

#         recommended_movies = recommend_10(search_query)
#         print(f"Recommended Movies: {recommended_movies}")

#         return render_template("index.html", data=recommended_movies, search_query=search_query)

#     ssl._create_default_https_context = ssl._create_unverified_context
#     conn = urllib_request.urlopen(base_url)
#     json_data = json.loads(conn.read())
#     return render_template("index.html", data=json_data["results"], search_query=None)

# @app.route('/submit', methods=['GET','POST'])
# def submit():
#     movie=''
#     if request.method == 'POST':
#         movie = request.form['search']
#         movie_list = recommend_10(movie)
#     return render_template('results.html',movies=movie_list)

# @app.route('/details')
# def details():
#     movie = request.args.get('movie')
#     return render_template('movie.html',movie = movie)


# @app.before_request
# def require_login():
#     allowed_routes = ['login', 'register','logout']  # add other allowed routes
#     if 'user_id' not in session and request.endpoint not in allowed_routes:
#         return redirect(url_for('login'))
    
# if __name__ == "__main__":
#     app.run()
    
    

In [183]:
from flask import Flask
from flask_mysqldb import MySQL
from flask import render_template, redirect, request, url_for, flash, session,request as flask_request
import hashlib

app = Flask(__name__)

app.config['MYSQL_HOST'] = 'localhost'
app.config['MYSQL_USER'] = 'root'
app.config['MYSQL_PASSWORD'] = '@gArwal80okD3p0rt'
app.config['MYSQL_DB'] = 'movie_rec'

app.config['MYSQL_CURSORCLASS'] = 'DictCursor'

api_key = "ff1e18b29203a4007ed0df1907cccf71"
base_url = "https://api.themoviedb.org/3/discover/movie?api_key=" + api_key

mysql = MySQL(app)

app.secret_key = 'your_secret_key'

# Hashing function
def hash_password(password):
    return hashlib.sha256(password.encode()).hexdigest()

@app.route('/register', methods=['GET', 'POST'])
def register():
    if request.method == 'POST':
        username = request.form['username']
        password = request.form['password']
        name = request.form['name']
        try:
            cur = mysql.connection.cursor()
            cur.execute("INSERT INTO users (keyname, username, pass) VALUES (%s, %s, %s)",
                        (name, username, password))
            mysql.connection.commit()
            cur.close()

            flash('Registration successful. You can now login.', 'success')
            return redirect(url_for('login'))
        except Exception as e:
            flash('Registration failed. Please try again later.', 'danger')
            print(f"An error occurred during registration: {e}")
            
    return render_template('register.html')


@app.route('/login', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        username = request.form['username']
        # password = hashlib.sha256(request.form['password'].encode()).hexdigest()
        password = request.form['password']
        print(username)
        print(password)

        try:
            cur = mysql.connection.cursor()
            cur.execute("SELECT * FROM users WHERE username = %s AND pass = %s", (username, password))
            user = cur.fetchone()
            cur.close()

            if user:
                session['username'] = user['username']
                session['user_id'] = user['id']
                # session['genres'] = user['genres']
                flash('Login successful', 'success')
                return redirect(url_for('home'))
            else:
                flash('Invalid credentials', 'danger')
        except Exception as e:
            flash('Login failed. Please try again later.', 'danger')
            print(f"An error occurred during login: {e}")
    return render_template('login.html')


@app.route('/logout')
def logout():
    session.clear()
    flash('You have been logged out.', 'info')
    return redirect(url_for('login'))

@app.route('/')
def index():
    return redirect(url_for('login'))

@app.route('/home', methods=['GET', 'POST'])
def home():
    
    if flask_request.method == 'POST':
        search_query = flask_request.form.get('search')
        print(f"Search Query: {search_query}")
        recommended_movies = recommend_10(search_query)
        print(f"Recommended Movies: {recommended_movies}")
        return render_template("home.html", recommended_movies=recommended_movies, search_query=search_query)
    
    userId = session['user_id']
    movieList = recommended_movies_by_collab_filtering(userId)
    genres = session['genres']
    Movies_by_genre = genre_filterer_by_fans([genres])
    print(Movies_by_genre)
    # print(Movies_by_genre)
    return render_template("home.html", data=movieList, movies = Movies_by_genre,search_query=None)

@app.route('/discover', methods=['GET', 'POST'])
def discover():
    
    if flask_request.method == 'POST':
        search_query = flask_request.form.get('search')
        print(f"Search Query: {search_query}")

        recommended_movies = recommend_10(search_query)
        print(f"Recommended Movies: {recommended_movies}")

        return render_template("index.html", recommended_movies=recommended_movies, search_query=search_query)

    ssl._create_default_https_context = ssl._create_unverified_context
    conn = urllib_request.urlopen(base_url)
    json_data = json.loads(conn.read())
    return render_template("index.html", data=json_data["results"], search_query=None)

# @app.route('/submit', methods=['GET','POST'])
# def submit():
#     movie=''
#     if request.method == 'POST':
#         movie = request.form['search']
#         movie_list = recommend_10(movie)
#     return render_template('results.html',movies=movie_list)

# @app.route('/details')
# def details():
#     movie = request.args.get('movie')
#     return render_template('movie.html',movie = movie)


@app.before_request
def require_login():
    allowed_routes = ['login', 'register','logout']  # add other allowed routes
    if 'user_id' not in session and request.endpoint not in allowed_routes:
        return redirect(url_for('login'))
    
if __name__ == "__main__":
    app.run()
    
    

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [10/Mar/2024 17:02:42] "GET / HTTP/1.1" 302 -
127.0.0.1 - - [10/Mar/2024 17:02:42] "GET /login HTTP/1.1" 200 -
127.0.0.1 - - [10/Mar/2024 17:02:47] "POST /login HTTP/1.1" 302 -


hel
hel


127.0.0.1 - - [10/Mar/2024 17:02:49] "GET /home HTTP/1.1" 200 -
127.0.0.1 - - [10/Mar/2024 17:02:49] "GET /static/style.css HTTP/1.1" 304 -


[{'title': 'Dancer, Texas Pop. 81', 'poster_path': 'https://image.tmdb.org/t/p/w500//khV9zPcvgOjrqwlk3dI1lGojZib.jpg'}, {'title': 'Me You and Five Bucks', 'poster_path': 'https://image.tmdb.org/t/p/w500//ukQf5WWnLerLsMR1bo33HmQ6EUG.jpg'}, {'title': "One Man's Hero", 'poster_path': 'https://image.tmdb.org/t/p/w500//49ZysIrg57lE0vXN5aJeHtvYA8d.jpg'}, {'title': 'There Goes My Baby', 'poster_path': 'https://image.tmdb.org/t/p/w500//lxvY6as28ykgbPEuvZ5T29am99L.jpg'}, {'title': 'The Shawshank Redemption', 'poster_path': 'https://image.tmdb.org/t/p/w500//9cqNxx0GxF0bflZmeSMuL5tnGzr.jpg'}, {'title': 'The Prisoner of Zenda', 'poster_path': 'https://image.tmdb.org/t/p/w500//tvMF7iljOgOXLvkVsWGr5GrPE4j.jpg'}, {'title': 'The Godfather', 'poster_path': 'https://image.tmdb.org/t/p/w500//3bhkrj58Vtu7enYsRolD1fZdja1.jpg'}, {'title': 'Fight Club', 'poster_path': 'https://image.tmdb.org/t/p/w500//pB8BM7pdSp6B6Ih7QZ4DrQ3PmJK.jpg'}, {'title': "Schindler's List", 'poster_path': 'https://image.tmdb.org/t/p/