In [2]:
import pickle
import pandas as pd
import numpy as np
new_df = pickle.load(open('new.pkl','rb'))
similarity = pickle.load(open('similarity.pkl','rb'))
cleaned_df = pd.read_csv('cleaned_df.csv')

Search

In [3]:
import json
import urllib.request as urllib_request
import ssl

def fetch_poster_path(api_key, base_url, movie_id):
    poster_url = f"{base_url}{movie_id}?api_key={api_key}&language=en-US"
    response = urllib_request.urlopen(poster_url)
    json_data = json.loads(response.read())

    # Extract the poster path from the API response
    poster_path = json_data.get('poster_path', '')
    
    return f"https://image.tmdb.org/t/p/w500/{poster_path}"

In [4]:
def fetch_movie_details(api_key, base_url, movie_id):
    details_url = f"{base_url}{movie_id}?api_key={api_key}&language=en-US"
    response = urllib_request.urlopen(details_url)
    details_data = json.loads(response.read())

    # Extract the movie overview from the API response
    overview = details_data.get('overview', '')
    
    return overview

In [5]:
def recommend_20(movie):
    movie_list = []

    # Check if movie is None or an empty string
    if movie is None or not movie.strip():
        print("Error: Movie is None or empty.")
        return movie_list

    # Normalize movie title to lowercase and remove leading/trailing spaces
    movie = movie.strip().lower()

    # Check if movie exists in DataFrame
    if movie in new_df['title'].str.lower().values:
        index = new_df[new_df['title'].str.lower() == movie].index[0]
    else:
        # Movie not found in DataFrame
        print(f"Movie '{movie}' not found in DataFrame.")
        return movie_list

    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])

    for i in distances[0:20]:
        movie_index = i[0]
        movie_title = new_df.iloc[movie_index]['title']

        # Fetch the poster path dynamically from the TMDB API
        api_key = "ff1e18b29203a4007ed0df1907cccf71"
        base_url = "https://api.themoviedb.org/3/movie/"
        movie_id = new_df.iloc[movie_index]['movie_id']

        poster_path = fetch_poster_path(api_key, base_url, movie_id)
        overview = fetch_movie_details(api_key, base_url, movie_id)

        movie_list.append({
            'title': movie_title,
            'poster_path': poster_path,
            'overview': overview,
            'movie_id': movie_id
        })

    return movie_list

Collaborative Filtering

In [6]:
df_ratings = pd.read_csv('rate.csv')

In [7]:
from scipy.sparse import csr_matrix

def create_X(df):
    """
    Generates a sparse matrix from ratings dataframe.

    Args:
        df: pandas dataframe containing 3 columns (userId, movieId, rating)

    Returns:
        X: sparse matrix
        user_mapper: dict that maps user id's to user indices
        user_inv_mapper: dict that maps user indices to user id's
        movie_mapper: dict that maps movie id's to movie indices
        movie_inv_mapper: dict that maps movie indices to movie id's
    """
    M = df['userId'].nunique()
    N = df['movie_id'].nunique()
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(M))))
    movie_mapper = dict(zip(np.unique(df["movie_id"]), list(range(N))))

    user_inv_mapper = dict(zip(list(range(M)), np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(N)), np.unique(df["movie_id"])))

    user_index = [user_mapper[i] for i in df['userId']]
    item_index = [movie_mapper[i] for i in df['movie_id']]

    X = csr_matrix((df["rating"], (user_index,item_index)), shape=(M,N))

    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper

X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_X(df_ratings)

In [8]:
from sklearn.neighbors import NearestNeighbors

def knn_similar_users(userId, X, user_mapper, user_inv_mapper, k, metric='cosine'):
    """
    Finds k-nearest neighbours for a given movie id.

    Args:
        movie_id: id of the movie of interest
        X: user-item utility matrix
        k: number of similar movies to retrieve
        metric: distance metric for kNN calculations

    Output: returns list of k similar movie ID's
    """
    neighbour_ids = []

    user_index = user_mapper[userId]
    user_vector = X[user_index]
    if isinstance(user_vector, (np.ndarray)):
        user_vector = user_vector.reshape(1,-1)
    # use k+1 since kNN output includes the movieId of interest
    kNN = NearestNeighbors(n_neighbors=k+1, algorithm="brute", metric=metric)
    kNN.fit(X)
    neighbour = kNN.kneighbors(user_vector, return_distance=False)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(user_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids

In [9]:
def recommended_movies_by_collab_filtering(userId):
    movie_list = []
    similar_users = knn_similar_users(userId,X,user_mapper,user_inv_mapper,k=10)
    df1 = df_ratings.loc[df_ratings['userId'] == userId].sort_values(by='rating', ascending = False)
    df2 = df_ratings.loc[df_ratings['userId'] == similar_users[0]].sort_values(by='rating', ascending = False)
    movies_notseen = list(set(df2['movie_id']) - set(df1['movie_id']))
    filtered_df = df_ratings[(df_ratings['userId'] == similar_users[0]) & (df_ratings['movie_id'].isin(movies_notseen))].sort_values(by='rating',ascending=False)
    movies = filtered_df.iloc[0:20,1:2]
    rec_movies = movies['movie_id'].tolist()
    print(rec_movies)
    for i in rec_movies:
        movie_id = i
        movie_title = new_df.loc[new_df['movie_id'] == i, 'title'].iloc[0]
        
        # Fetch the poster path dynamically from the TMDB API
        api_key = "ff1e18b29203a4007ed0df1907cccf71"
        base_url = "https://api.themoviedb.org/3/movie/"
        
        poster_path = fetch_poster_path(api_key, base_url, movie_id)

        movie_list.append({
            'title': movie_title,
            'poster_path': poster_path
        })

    return movie_list

In [10]:
recommended_movies_by_collab_filtering(1)

[10743, 2976, 256962, 40652, 93456, 257091, 824, 9034, 5494, 12596, 74465, 15059, 64682, 188207, 98557, 2898, 9702, 78383, 44147, 10207]


[{'title': 'Confidence',
  'poster_path': 'https://image.tmdb.org/t/p/w500//yJzmAtY9DmhhjJwFnW0Zjlvz6yg.jpg'},
 {'title': 'Hairspray',
  'poster_path': 'https://image.tmdb.org/t/p/w500//fgMka3HtFvI5OgW1eYdR9XpySxH.jpg'},
 {'title': 'Little Boy',
  'poster_path': 'https://image.tmdb.org/t/p/w500//lAU9pgq3niI6sFemcKm4FGGmrnW.jpg'},
 {'title': 'The Married Woman',
  'poster_path': 'https://image.tmdb.org/t/p/w500//s3IKTtCCcaD3dAv7Uiqtd1PFQIR.jpg'},
 {'title': 'Despicable Me 2',
  'poster_path': 'https://image.tmdb.org/t/p/w500//5Fh4NdoEnCjCK9wLjdJ9DJNFl2b.jpg'},
 {'title': 'Get Hard',
  'poster_path': 'https://image.tmdb.org/t/p/w500//rJPbS2cYOYhDCjT5NmW1Fm6gFl3.jpg'},
 {'title': 'Moulin Rouge!',
  'poster_path': 'https://image.tmdb.org/t/p/w500//2kjM5CUZRIU5yOANUowrbJcRL9L.jpg'},
 {'title': 'The Banger Sisters',
  'poster_path': 'https://image.tmdb.org/t/p/w500//bfXXqz4yvsHnQwwOSa23ASUaGXA.jpg'},
 {'title': 'George and the Dragon',
  'poster_path': 'https://image.tmdb.org/t/p/w500//50Nxk

Recommend by Genre

In [11]:
movies_genres = pd.read_csv('tmdb_5000_movies.csv')
movies_genres = movies_genres.rename(columns={"id":"movie_id"})
movies_genres = movies_genres[['movie_id','title','genres','vote_average']]

In [12]:
import ast

def convert(text):            #helper function to fetch appropriate element from genres and keywords
    lst = []
    for i in ast.literal_eval(text):        #converts string of list to list
        lst.append(i['name'])
    return lst

movies_genres['genres'] = movies_genres['genres'].apply(convert)

In [13]:
def filter_movies_by_genres(df, genres):
    """
    Filter movies DataFrame based on genres entered by the user.
    
    Args:
        df (DataFrame): DataFrame containing movies and genres.
        genres (list): List of genres entered by the user.
        
    Returns:
        DataFrame: Filtered DataFrame containing movies matching the genres.
    """
    filtered_df = df[df['genres'].apply(lambda x: all(genre in x for genre in genres))]
    return filtered_df

In [14]:
new_df.head()

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"in the 22nd century, a parapleg marin is dispa..."
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believ to be dead, ha c..."
2,206647,Spectre,a cryptic messag from bond’ past send him on a...
3,49026,The Dark Knight Rises,follow the death of district attorney harvey d...
4,49529,John Carter,"john carter is a war-weary, former militari ca..."


In [15]:
def genre_filterer_by_fans(genres):
    movie_list = []
    filtered_movies = filter_movies_by_genres(movies_genres, genres)
    sorted_frame = filtered_movies.sort_values(by='vote_average',ascending=False)
    final = sorted_frame.iloc[0:20,0:1]
    rec_movies = final['movie_id'].tolist()
    
    for i in rec_movies:
        movie_id = i
        movie_title = new_df.loc[new_df['movie_id']==i,'title'].iloc[0]

        # Fetch the poster path dynamically from the TMDB API
        api_key = "ff1e18b29203a4007ed0df1907cccf71"
        base_url = "https://api.themoviedb.org/3/movie/"

        poster_path = fetch_poster_path(api_key, base_url, movie_id)

        movie_list.append({
            'title': movie_title,
            'poster_path': poster_path
        })

    return movie_list

In [16]:
new_df.head(20)

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"in the 22nd century, a parapleg marin is dispa..."
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believ to be dead, ha c..."
2,206647,Spectre,a cryptic messag from bond’ past send him on a...
3,49026,The Dark Knight Rises,follow the death of district attorney harvey d...
4,49529,John Carter,"john carter is a war-weary, former militari ca..."
5,559,Spider-Man 3,the seemingli invinc spider-man goe up against...
6,38757,Tangled,when the kingdom' most wanted-and most charmin...
7,99861,Avengers: Age of Ultron,when toni stark tri to jumpstart a dormant pea...
8,767,Harry Potter and the Half-Blood Prince,"as harri begin hi sixth year at hogwarts, he d..."
9,209112,Batman v Superman: Dawn of Justice,fear the action of a god-lik super hero left u...


In [17]:
# from flask import Flask
# from flask_mysqldb import MySQL
# from flask import render_template, redirect, request, url_for, flash, session,request as flask_request
# import hashlib

# app = Flask(__name__)

# app.config['MYSQL_HOST'] = 'localhost'
# app.config['MYSQL_USER'] = 'root'
# app.config['MYSQL_PASSWORD'] = '@gArwal80okD3p0rt'
# app.config['MYSQL_DB'] = 'movie_rec'

# app.config['MYSQL_CURSORCLASS'] = 'DictCursor'

# api_key = "ff1e18b29203a4007ed0df1907cccf71"
# base_url = "https://api.themoviedb.org/3/discover/movie?api_key=" + api_key

# mysql = MySQL(app)

# app.secret_key = 'your_secret_key'

# # Hashing function
# def hash_password(password):
#     return hashlib.sha256(password.encode()).hexdigest()

# @app.route('/register', methods=['GET', 'POST'])
# def register():
#     if request.method == 'POST':
#         username = request.form['username']
#         password = request.form['password']
#         name = request.form['name']
#         try:
#             cur = mysql.connection.cursor()
#             cur.execute("INSERT INTO users (keyname, username, pass) VALUES (%s, %s, %s)",
#                         (name, username, password))
#             mysql.connection.commit()
#             cur.close()

#             flash('Registration successful. You can now login.', 'success')
#             return redirect(url_for('login'))
#         except Exception as e:
#             flash('Registration failed. Please try again later.', 'danger')
#             print(f"An error occurred during registration: {e}")
            
#     return render_template('register.html')


# @app.route('/login', methods=['GET', 'POST'])
# def login():
#     if request.method == 'POST':
#         username = request.form['username']
#         # password = hashlib.sha256(request.form['password'].encode()).hexdigest()
#         password = request.form['password']
#         print(username)
#         print(password)

#         try:
#             cur = mysql.connection.cursor()
#             cur.execute("SELECT * FROM users WHERE username = %s AND pass = %s", (username, password))
#             user = cur.fetchone()
#             cur.close()

#             if user:
#                 session['username'] = user['username']
#                 session['user_id'] = user['id']
#                 flash('Login successful', 'success')
#                 return redirect(url_for('home'))
#             else:
#                 flash('Invalid credentials', 'danger')
#         except Exception as e:
#             flash('Login failed. Please try again later.', 'danger')
#             print(f"An error occurred during login: {e}")
#     return render_template('login.html')


# @app.route('/logout')
# def logout():
#     session.clear()
#     flash('You have been logged out.', 'info')
#     return redirect(url_for('login'))

# @app.route('/')
# def index():
#     return redirect(url_for('login'))

# @app.route('/home', methods=['GET', 'POST'])
# def home():
    
#     if flask_request.method == 'POST':
#         search_query = flask_request.form.get('search')
#         print(f"Search Query: {search_query}")
#         recommended_movies = recommend_10(search_query)
#         print(f"Recommended Movies: {recommended_movies}")
#         return render_template("home.html", data=recommended_movies, search_query=search_query)
    
#     userId = session['user_id']
#     movieList = recommended_movies_by_collab_filtering(userId)
#     print(movieList)
#     return render_template("home.html", data=movieList, search_query=None)

# @app.route('/discover', methods=['GET', 'POST'])
# def discover():
    
#     if flask_request.method == 'POST':
#         search_query = flask_request.form.get('search')
#         print(f"Search Query: {search_query}")

#         recommended_movies = recommend_10(search_query)
#         print(f"Recommended Movies: {recommended_movies}")

#         return render_template("index.html", data=recommended_movies, search_query=search_query)

#     ssl._create_default_https_context = ssl._create_unverified_context
#     conn = urllib_request.urlopen(base_url)
#     json_data = json.loads(conn.read())
#     return render_template("index.html", data=json_data["results"], search_query=None)

# @app.route('/submit', methods=['GET','POST'])
# def submit():
#     movie=''
#     if request.method == 'POST':
#         movie = request.form['search']
#         movie_list = recommend_10(movie)
#     return render_template('results.html',movies=movie_list)

# @app.route('/details')
# def details():
#     movie = request.args.get('movie')
#     return render_template('movie.html',movie = movie)


# @app.before_request
# def require_login():
#     allowed_routes = ['login', 'register','logout']  # add other allowed routes
#     if 'user_id' not in session and request.endpoint not in allowed_routes:
#         return redirect(url_for('login'))
    
# if __name__ == "__main__":
#     app.run()
    
    

In [25]:
from flask import Flask
from flask_mysqldb import MySQL
from flask import render_template, redirect, request, url_for, flash, session,request as flask_request
import hashlib

app = Flask(__name__)

app.config['MYSQL_HOST'] = 'localhost'
app.config['MYSQL_USER'] = 'root'
app.config['MYSQL_PASSWORD'] = '@gArwal80okD3p0rt'
app.config['MYSQL_DB'] = 'movie_rec'

app.config['MYSQL_CURSORCLASS'] = 'DictCursor'

api_key = "ff1e18b29203a4007ed0df1907cccf71"
base_url = "https://api.themoviedb.org/3/discover/movie?api_key=" + api_key

mysql = MySQL(app)

app.secret_key = 'your_secret_key'

# Hashing function
def hash_password(password):
    return hashlib.sha256(password.encode()).hexdigest()

@app.route('/register', methods=['GET', 'POST'])
def register():
    if request.method == 'POST':
        username = request.form['username']
        password = request.form['password']
        name = request.form['name']
        genres = request.form.getlist('genre')
        try:
            cur = mysql.connection.cursor()
            cur.execute("INSERT INTO users (keyname, username, pass, genres) VALUES (%s, %s, %s, %s)",
                        (name, username, password, ','.join(genres)))
            mysql.connection.commit()
            cur.close()

            flash('Registration successful. You can now login.', 'success')
            return redirect(url_for('login'))
        except Exception as e:
            flash('Registration failed. Please try again later.', 'danger')
            print(f"An error occurred during registration: {e}")
            
    return render_template('register.html')


@app.route('/login', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        username = request.form['username']
        # password = hashlib.sha256(request.form['password'].encode()).hexdigest()
        password = request.form['password']
        print(username)
        print(password)

        try:
            cur = mysql.connection.cursor()
            cur.execute("SELECT * FROM users WHERE username = %s AND pass = %s", (username, password))
            user = cur.fetchone()
            cur.close()

            if user:
                session['username'] = user['username']
                session['user_id'] = user['id']
                session['genres'] = user['genres']
                flash('Login successful', 'success')
                return redirect(url_for('home'))
            else:
                flash('Invalid credentials', 'danger')
        except Exception as e:
            flash('Login failed. Please try again later.', 'danger')
            print(f"An error occurred during login: {e}")
    return render_template('login.html')


@app.route('/logout')
def logout():
    session.clear()
    flash('You have been logged out.', 'info')
    return redirect(url_for('login'))

@app.route('/')
def index():
    return redirect(url_for('login'))

@app.route('/home', methods=['GET', 'POST'])
def home():
    
    if flask_request.method == 'POST':
        search_query = flask_request.form.get('search')
        print(f"Search Query: {search_query}")
        recommended_movies = recommend_20(search_query)
        print(f"Recommended Movies: {recommended_movies}")
        return render_template("home.html", recommended_movies=recommended_movies, search_query=search_query)
    
    userId = session['user_id']
    movieList = recommended_movies_by_collab_filtering(userId)
    genres = list(session['genres'].split(','))
    Movies_by_genre = genre_filterer_by_fans(genres)
    return render_template("home.html", data=movieList,movies = Movies_by_genre,search_query=None)

@app.route('/discover', methods=['GET', 'POST'])
def discover():
    
    if flask_request.method == 'POST':
        search_query = flask_request.form.get('search')
        print(f"Search Query: {search_query}")

        recommended_movies = recommend_20(search_query)
        print(f"Recommended Movies: {recommended_movies}")

        return render_template("index.html", recommended_movies=recommended_movies, search_query=search_query)

    ssl._create_default_https_context = ssl._create_unverified_context
    # Set the number of movies you want to display
    num_movies_to_display = 50
    
    # Calculate the number of pages required to display the specified number of movies
    num_pages = (num_movies_to_display + 19) // 20

    # Fetch the data for each page and concatenate the results
    all_results = []
    for page in range(1, num_pages + 1):
        page_url = base_url + f"&page={page}"
        conn = urllib_request.urlopen(page_url)
        json_data = json.loads(conn.read())
        all_results.extend(json_data["results"])
    return render_template("index.html", data=all_results, search_query=None)

@app.route("/movie_details/<int:movie_id>")
def movie_details(movie_id):
    # Get details from the DataFrame based on movie_id
    movie_details_data = cleaned_df[cleaned_df['movie_id'] == movie_id].iloc[0]


    api_key = "ff1e18b29203a4007ed0df1907cccf71"
    base_url = "https://api.themoviedb.org/3/movie/"
    
    # Extract relevant information from movie_details_data
    movie_details = {
        'title': movie_details_data['title'],
        'poster_path': fetch_poster_path(api_key, base_url, movie_id),
        'genres': movie_details_data['genres'],
        'cast': movie_details_data['cast'],
        'crew': movie_details_data['crew'],
        'overview': movie_details_data['overview'],
    }

    return render_template("movie_details.html", movie_details=movie_details)

@app.route('/submit', methods=['GET','POST'])
def submit():
    movie=''
    if request.method == 'POST':
        movie = request.form['search']
        movie_list = recommend_20(movie)
    return render_template('results.html',movies=movie_list)

@app.route('/details')
def details():
    movie = request.args.get('movie')
    return render_template('movie.html',movie = movie)


@app.before_request
def require_login():
    allowed_routes = ['login', 'register','logout']  # add other allowed routes
    if 'user_id' not in session and request.endpoint not in allowed_routes:
        return redirect(url_for('login'))
    
if __name__ == "__main__":
    app.run()
    
    

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [10/Mar/2024 20:57:54] "GET / HTTP/1.1" 302 -
127.0.0.1 - - [10/Mar/2024 20:57:54] "GET /login HTTP/1.1" 200 -
127.0.0.1 - - [10/Mar/2024 20:57:58] "POST /login HTTP/1.1" 302 -


vijit
bubu
[10743, 2976, 256962, 40652, 93456, 257091, 824, 9034, 5494, 12596, 74465, 15059, 64682, 188207, 98557, 2898, 9702, 78383, 44147, 10207]


127.0.0.1 - - [10/Mar/2024 20:58:00] "GET /home HTTP/1.1" 200 -
127.0.0.1 - - [10/Mar/2024 20:58:00] "GET /static/style.css HTTP/1.1" 304 -
127.0.0.1 - - [10/Mar/2024 20:58:02] "GET /movie_details/ HTTP/1.1" 404 -
127.0.0.1 - - [10/Mar/2024 20:58:04] "GET /movie_details/ HTTP/1.1" 404 -
127.0.0.1 - - [10/Mar/2024 20:58:08] "GET /movie_details/ HTTP/1.1" 404 -
