In [None]:
import numpy as np
import pandas as pd
import nltk
import pickle
from nltk.stem.porter import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import ast

# Load the datasets
credits = pd.read_csv('tmdb_5000_credits.csv')
movies = pd.read_csv('tmdb_5000_movies.csv')

# Merge datasets on movie title
movies = movies.merge(credits, on='title')
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
movies.dropna(inplace=True)

In [None]:
def convert(text):
    return [i['name'] for i in ast.literal_eval(text)]

movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)
movies['cast'] = movies['cast'].apply(convert).apply(lambda x: x[0:3])
movies['crew'] = movies['crew'].apply(lambda x: [i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director'])

def collapse(L):
    return [i.replace(" ", "") for i in L]

movies['cast'] = movies['cast'].apply(collapse)
movies['crew'] = movies['crew'].apply(collapse)
movies['genres'] = movies['genres'].apply(collapse)
movies['keywords'] = movies['keywords'].apply(collapse)

In [None]:
movies['overview'] = movies['overview'].apply(lambda x: x.split())
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']

new = movies.drop(columns=['overview', 'genres', 'keywords', 'cast', 'crew'])
new['tags'] = new['tags'].apply(lambda x: " ".join(x))

In [None]:
cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(new['tags']).toarray()

ps = PorterStemmer()

def stem(text):
    return " ".join([ps.stem(word) for word in text.split()])

new['tags'] = new['tags'].apply(stem)
similarity = cosine_similarity(vector)

In [None]:
def recommend(movie):
    index = new[new['title'] == movie].index[0]
    movie_list = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
    recommendations = [new.iloc[i[0]].title for i in movie_list[1:6]]
    return recommendations

print(recommend('Batman Begins'))

['The Dark Knight', 'The Dark Knight Rises', 'Batman', 'Batman & Robin', 'Batman']


In [None]:
pickle.dump(new, open('movie_list.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))

In [None]:
!pip install streamlit
!pip install pyngrok

Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [3

In [None]:
!ls

movie_list.pkl	sample_data  similarity.pkl  tmdb_5000_credits.csv  tmdb_5000_movies.csv


In [None]:
import os
os.environ['TMDB_API_KEY'] = '41c4dae9b4c5e47653d98fa2b8294954'

In [None]:
from pyngrok import ngrok

# Replace 'YOUR_NGROK_AUTH_TOKEN' with your actual ngrok auth token
ngrok.set_auth_token("2nI2xRS5myV6ZhZmHJPA1Qu6Uyh_eVHB98NeHbghGKssEF8E")

In [None]:
# Step 2: Set up ngrok and run Streamlit
from pyngrok import ngrok

# Set your ngrok auth token
ngrok.set_auth_token("2nI2xRS5myV6ZhZmHJPA1Qu6Uyh_eVHB98NeHbghGKssEF8E")

# Open a tunnel on port 8501 (default port for Streamlit)
public_url = ngrok.connect(8501)
print(f"Public URL: {public_url}")




Public URL: NgrokTunnel: "https://1694-34-74-126-48.ngrok-free.app" -> "http://localhost:8501"


In [None]:
app_code = """
import streamlit as st
import pickle
import pandas as pd
import requests

def fetch_poster_and_description(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=41c4dae9b4c5e47653d98fa2b8294954&language=en-US"
    data = requests.get(url)
    data = data.json()
    poster_path = data['poster_path']
    full_path = "https://image.tmdb.org/t/p/w500/" + poster_path
    description = data['overview']  # Fetch movie description
    return full_path, description

def recommend(movie):
    index = movies[movies['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])

    recommended_movie_names = []
    recommended_movie_posters = []
    recommended_movie_descriptions = []

    for i in distances[1:6]:
        movie_id = movies.iloc[i[0]].movie_id
        poster, description = fetch_poster_and_description(movie_id)
        recommended_movie_posters.append(poster)
        recommended_movie_names.append(movies.iloc[i[0]].title)
        recommended_movie_descriptions.append(description)

    return recommended_movie_names, recommended_movie_posters, recommended_movie_descriptions

# Load the movies and similarity matrix
movies = pickle.load(open('movie_list.pkl', 'rb'))
similarity = pickle.load(open('similarity.pkl', 'rb'))
movies = pd.DataFrame(movies)

# Streamlit UI
st.header('Movie Recommendation System')

# Movie select box
movie_list = movies['title'].values
selected_movie = st.selectbox("Type or select a movie from the dropdown", movie_list)

# Button to show recommendations
if st.button('Show Recommendation'):
    recommended_movie_names, recommended_movie_posters, recommended_movie_descriptions = recommend(selected_movie)

    # Display recommendations in columns with titles, posters, and descriptions
    for i in range(5):
        col = st.columns(1)[0]  # Single column layout for movie details
        with col:
            st.text(recommended_movie_names[i])
            st.image(recommended_movie_posters[i])
            st.write(f"*Description:* {recommended_movie_descriptions[i]}")
        """
with open('app.py', 'w') as f:
    f.write(app_code)

In [None]:
# Run the app.py script in the background
get_ipython().system_raw('streamlit run app.py &')

In [None]:
# Step 4: Run the Streamlit app
!streamlit run app.py --server.port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
2024-10-11 12:08:51.078 Port 8501 is already in use


In [None]:
app_code = """
import streamlit as st
import pickle
import pandas as pd
import requests

def fetch_poster_and_description(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=41c4dae9b4c5e47653d98fa2b8294954&language=en-US"
    data = requests.get(url)
    data = data.json()
    poster_path = data['poster_path']
    full_path = "https://image.tmdb.org/t/p/w500/" + poster_path
    description = data['overview']  # Fetch movie description
    return full_path, description

def recommend(movie):
    index = movies[movies['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])

    recommended_movie_names = []
    recommended_movie_posters = []
    recommended_movie_descriptions = []

    for i in distances[1:6]:
        movie_id = movies.iloc[i[0]].movie_id
        poster, description = fetch_poster_and_description(movie_id)  # Fetch both poster and description
        recommended_movie_posters.append(poster)
        recommended_movie_names.append(movies.iloc[i[0]].title)
        recommended_movie_descriptions.append(description)

    return recommended_movie_names, recommended_movie_posters, recommended_movie_descriptions

# Load the movies and similarity matrix
movies = pickle.load(open('movie_list.pkl', 'rb'))
similarity = pickle.load(open('similarity.pkl', 'rb'))
movies = pd.DataFrame(movies)

# Streamlit UI
st.header('Movie Recommendation System')

# Movie select box
movie_list = movies['title'].values
selected_movie = st.selectbox("Type or select a movie from the dropdown", movie_list)

# Button to show recommendations
if st.button('Show Recommendation'):
    recommended_movie_names, recommended_movie_posters, recommended_movie_descriptions = recommend(selected_movie)

    # Create 5 columns for the recommended movies
    col1, col2, col3, col4, col5 = st.columns(5)

    # Display movie details (title, poster, and description) in each column
    with col1:
        st.text(recommended_movie_names[0])
        st.image(recommended_movie_posters[0])
        st.write(f"*Description:* {recommended_movie_descriptions[0]}")

    with col2:
        st.text(recommended_movie_names[1])
        st.image(recommended_movie_posters[1])
        st.write(f"*Description:* {recommended_movie_descriptions[1]}")

    with col3:
        st.text(recommended_movie_names[2])
        st.image(recommended_movie_posters[2])
        st.write(f"*Description:* {recommended_movie_descriptions[2]}")

    with col4:
        st.text(recommended_movie_names[3])
        st.image(recommended_movie_posters[3])
        st.write(f"*Description:* {recommended_movie_descriptions[3]}")

    with col5:
        st.text(recommended_movie_names[4])
        st.image(recommended_movie_posters[4])
        st.write(f"*Description:* {recommended_movie_descriptions[4]}")
    """
with open('app.py', 'w') as f:
    f.write(app_code)