# **Movie Recommendation System**

In [1]:
#@title Import Libraries and Setup
# Import necessary libraries
import numpy as np
import pandas as pd
import os
import ast

# Install kagglehub if necessary
!pip install kagglehub
import kagglehub




In [2]:
#@title Download the Dataset
# Download the TMDB dataset using kagglehub
path = kagglehub.dataset_download("tmdb/tmdb-movie-metadata")
print("Path to dataset files:", path)

# Set dataset file paths
movies_path = os.path.join(path, 'tmdb_5000_movies.csv')
credits_path = os.path.join(path, 'tmdb_5000_credits.csv')


Path to dataset files: /kaggle/input/tmdb-movie-metadata


In [3]:
#@title Load the Data
# Load the datasets
movies = pd.read_csv(movies_path)
credits = pd.read_csv(credits_path)

# Merge movies and credits datasets
movies = movies.merge(credits, on='title')
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]

# Drop missing values
movies.dropna(inplace=True)


In [4]:
#@title Data Preprocessing
# Helper function to extract names from JSON-like strings
def convert(text):
    L = []
    for i in ast.literal_eval(text):
        L.append(i['name'])
    return L

# Apply the function to extract genres and keywords
movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)

# Extract top 3 cast members
movies['cast'] = movies['cast'].apply(convert).apply(lambda x: x[:3])

# Extract director from crew
def fetch_director(text):
    L = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            L.append(i['name'])
    return L

movies['crew'] = movies['crew'].apply(fetch_director)


In [5]:
#@title Combine Tags for Each Movie
# Remove spaces from text data
def collapse(L):
    return [i.replace(" ", "") for i in L]

movies['genres'] = movies['genres'].apply(collapse)
movies['keywords'] = movies['keywords'].apply(collapse)
movies['cast'] = movies['cast'].apply(collapse)
movies['crew'] = movies['crew'].apply(collapse)
movies['overview'] = movies['overview'].apply(lambda x: x.split())

# Combine all tags into a single column
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
new = movies[['movie_id', 'title', 'tags']]

# Convert tags list into a single string
new['tags'] = new['tags'].apply(lambda x: " ".join(x))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new['tags'] = new['tags'].apply(lambda x: " ".join(x))


In [6]:
#@title Feature Extraction and Similarity Calculation
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Create a CountVectorizer and transform the tags
cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(new['tags']).toarray()

# Compute cosine similarity
similarity = cosine_similarity(vector)


In [7]:
#@title Recommendation Function
def recommend(movie):
    index = new[new['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
    for i in distances[1:6]:
        print(new.iloc[i[0]].title)


In [8]:
#@title Test the Recommendation System
# Test the system with an example
recommend('Gandhi')


Gandhi, My Father
The Wind That Shakes the Barley
A Passage to India
Guiana 1838
Ramanujan


In [9]:
#@title Save the Model for Deployment
import pickle

# Save the processed data and similarity matrix
pickle.dump(new, open('movie_list.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))


---

In [10]:
#@title Install Necessary Libraries
# Install Streamlit and SQLite support
!pip install streamlit pyngrok


Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.8-py3-none-any.whl.metadata (10 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m101.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.8-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m106.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (7

In [11]:
#@title Import Libraries
import pickle
import pandas as pd
import requests
import sqlite3
import streamlit as st


In [12]:
#@title Set Up SQLite Database for User Authentication
# Create or connect to SQLite database
conn = sqlite3.connect('users.db')
c = conn.cursor()

# Create users table
c.execute('''
    CREATE TABLE IF NOT EXISTS users (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        username TEXT UNIQUE NOT NULL,
        password TEXT NOT NULL
    )
''')
conn.commit()


In [13]:
#@title Authentication Helper Functions
# Register user
def register_user(username, password):
    try:
        c.execute('INSERT INTO users (username, password) VALUES (?, ?)', (username, password))
        conn.commit()
        return True
    except sqlite3.IntegrityError:
        return False

# Authenticate user
def authenticate_user(username, password):
    c.execute('SELECT * FROM users WHERE username = ? AND password = ?', (username, password))
    return c.fetchone() is not None

# Logout user
def logout_user():
    st.session_state['logged_in'] = False
    st.session_state['username'] = None


In [14]:
#@title Recommendation Functions
def fetch_poster_and_url(movie_id, api_key):
    """Fetch movie poster and TMDB URL using the movie ID."""
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        poster_path = data.get('poster_path', '')
        poster_url = f"https://image.tmdb.org/t/p/w500/{poster_path}" if poster_path else None
        tmdb_url = f"https://www.themoviedb.org/movie/{movie_id}"
        return poster_url, tmdb_url
    except Exception:
        return None, None

def recommend(movie, movies, similarity, api_key):
    """Get movie recommendations based on similarity scores."""
    try:
        index = movies[movies['title'].str.lower() == movie.lower()].index[0]
        distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
        recommended_movies = []
        for i in distances[1:6]:
            movie_id = movies.iloc[i[0]].movie_id
            poster_url, tmdb_url = fetch_poster_and_url(movie_id, api_key)
            recommended_movies.append({
                "title": movies.iloc[i[0]].title,
                "poster_url": poster_url,
                "tmdb_url": tmdb_url
            })
        return recommended_movies
    except IndexError:
        return []


In [15]:
#@title Load Data
# Load the preprocessed data and similarity matrix
movies = pickle.load(open('/content/movie_list.pkl', 'rb'))
similarity = pickle.load(open('/content/similarity.pkl', 'rb'))

# TMDB API Key
TMDB_API_KEY = "8265bd1679663a7ea12ac168da84d2e8"  # Replace with your TMDB API key


In [16]:
#@title Create Streamlit App Script
%%writefile app.py
import pickle
import pandas as pd
import requests
import sqlite3
import streamlit as st

# Database connection
conn = sqlite3.connect('users.db')
c = conn.cursor()

# Authentication Functions
def register_user(username, password):
    try:
        c.execute('INSERT INTO users (username, password) VALUES (?, ?)', (username, password))
        conn.commit()
        return True
    except sqlite3.IntegrityError:
        return False

def authenticate_user(username, password):
    c.execute('SELECT * FROM users WHERE username = ? AND password = ?', (username, password))
    return c.fetchone() is not None

def logout_user():
    st.session_state['logged_in'] = False
    st.session_state['username'] = None

def fetch_poster_and_url(movie_id, api_key):
    """Fetch movie poster and TMDB URL using the movie ID."""
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        poster_path = data.get('poster_path', '')
        poster_url = f"https://image.tmdb.org/t/p/w500/{poster_path}" if poster_path else None
        tmdb_url = f"https://www.themoviedb.org/movie/{movie_id}"
        return poster_url, tmdb_url
    except Exception:
        return None, None

def recommend(movie, movies, similarity, api_key):
    """Get movie recommendations based on similarity scores."""
    try:
        index = movies[movies['title'].str.lower() == movie.lower()].index[0]
        distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
        recommended_movies = []
        for i in distances[1:6]:
            movie_id = movies.iloc[i[0]].movie_id
            poster_url, tmdb_url = fetch_poster_and_url(movie_id, api_key)
            recommended_movies.append({
                "title": movies.iloc[i[0]].title,
                "poster_url": poster_url,
                "tmdb_url": tmdb_url
            })
        return recommended_movies
    except IndexError:
        return []

# Load Data
movies = pickle.load(open('/content/movie_list.pkl', 'rb'))
similarity = pickle.load(open('/content/similarity.pkl', 'rb'))
TMDB_API_KEY = "8265bd1679663a7ea12ac168da84d2e8"

# Streamlit App
st.title("🎬 Movie Recommendation System with Authentication")

# Initialize session state
if 'logged_in' not in st.session_state:
    st.session_state['logged_in'] = False
    st.session_state['username'] = None

# Navbar
if st.session_state['logged_in']:
    st.sidebar.markdown(f"👤 Logged in as: **{st.session_state['username']}**")
    if st.sidebar.button("Log Out", key="logout_button"):
        logout_user()
        st.rerun()
else:
    nav_option = st.sidebar.radio("Navigation", ["Log In", "Register"])

# Authentication Pages
if not st.session_state['logged_in']:
    if nav_option == "Log In":
        st.subheader("Log In")
        username = st.text_input("Username")
        password = st.text_input("Password", type="password")
        if st.button("Log In"):
            if authenticate_user(username, password):
                st.session_state['logged_in'] = True
                st.session_state['username'] = username
                st.success("Login successful!")
                st.rerun()
            else:
                st.error("Invalid username or password.")
    elif nav_option == "Register":
        st.subheader("Register")
        new_username = st.text_input("Create Username")
        new_password = st.text_input("Create Password", type="password")
        if st.button("Register"):
            if register_user(new_username, new_password):
                st.success("Registration successful! Please log in.")
            else:
                st.error("Username already exists. Please choose a different username.")
else:
    # Recommendation Section
    st.subheader("Welcome to Movie Recommendations!")
    movie_list = movies['title'].values
    selected_movie = st.selectbox("Type or select a movie:", movie_list)

    if st.button("Show Recommendations"):
        recommended_movies = recommend(selected_movie, movies, similarity, TMDB_API_KEY)
        if recommended_movies:
            cols = st.columns(5)
            for i, col in enumerate(cols):
                if i < len(recommended_movies):
                    movie = recommended_movies[i]
                    with col:
                        st.markdown(f"[![{movie['title']}]({movie['poster_url']})]({movie['tmdb_url']})")
                        st.caption(movie['title'])
        else:
            st.error("No recommendations found!")


Writing app.py


In [17]:
!curl https://loca.lt/mytunnelpassword

35.194.81.131

In [None]:
#@title Run the App
# Run the Streamlit app and expose it using LocalTunnel
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.194.81.131:8501[0m
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20Gy

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0Kyour url is: https://great-banks-sleep.loca.lt
