In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import requests

In [None]:
import pandas as pd

# Load movies.dat with correct encoding
movies = pd.read_csv("/content/movies.dat",
                     sep='::',
                     engine='python',
                     encoding='ISO-8859-1',
                     names=['movieId', 'title', 'genres'])

# Load ratings.dat with same encoding
ratings = pd.read_csv('/content/ratings.dat',
                      sep='::',
                      engine='python',
                      encoding='ISO-8859-1',
                      names=['userId', 'movieId', 'rating', 'timestamp'])

# Display the first few rows of each
print("Movies:\n", movies.head())
print("\nRatings:\n", ratings.head())

Movies:
    movieId                               title                        genres
0        1                    Toy Story (1995)   Animation|Children's|Comedy
1        2                      Jumanji (1995)  Adventure|Children's|Fantasy
2        3             Grumpier Old Men (1995)                Comedy|Romance
3        4            Waiting to Exhale (1995)                  Comedy|Drama
4        5  Father of the Bride Part II (1995)                        Comedy

Ratings:
    userId  movieId  rating  timestamp
0       1     1193       5  978300760
1       1      661       3  978302109
2       1      914       3  978301968
3       1     3408       4  978300275
4       1     2355       5  978824291


In [None]:
# Merge ratings with movie titles
movie_data = pd.merge(ratings, movies, on='movieId')

In [None]:
# --------- CONTENT-BASED RECOMMENDER ---------
tfidf = TfidfVectorizer(stop_words='english')
movies['genres'] = movies['genres'].fillna('')
tfidf_matrix = tfidf.fit_transform(movies['genres'])
content_similarity = cosine_similarity(tfidf_matrix)

In [None]:
# --------- COLLABORATIVE FILTERING (USER RATING) ---------
user_movie_matrix = movie_data.pivot_table(index='userId', columns='title', values='rating')
collab_similarity = cosine_similarity(user_movie_matrix.fillna(0).T)
collab_sim_df = pd.DataFrame(collab_similarity, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)

In [None]:
# --------- TMDb API Function ---------
def fetch_poster(title):
    api_key = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJhYzdlOWNiZGJkODZjYTdhODgwOTc4OTZiYjY1NzIzZiIsIm5iZiI6MTc0NTQ5NjAwNi45MTI5OTk5LCJzdWIiOiI2ODBhMjdjNjEzMGY3NmViM2I5ZDBmMWIiLCJzY29wZXMiOlsiYXBpX3JlYWQiXSwidmVyc2lvbiI6MX0.siouWYDU1msyxXHD3upluDtAPa0djpWUmnYIKObGPAM"  # Replace with your TMDb key
    response = requests.get(
        f"https://api.themoviedb.org/3/search/movie?api_key={api_key}&query={title}"
    )
    data = response.json()
    try:
        poster_path = data['results'][0]['poster_path']
        return "https://image.tmdb.org/t/p/w500/" + poster_path
    except:
        return ""

In [None]:
# --------- HYBRID RECOMMENDER FUNCTION ---------
def hybrid_recommend(movie_title, top_n=5):
    # Content-based
    idx = movies[movies['title'] == movie_title].index[0]
    content_scores = list(enumerate(content_similarity[idx]))
    content_scores = sorted(content_scores, key=lambda x: x[1], reverse=True)

    # Collaborative
    try:
        collab_scores = collab_sim_df[movie_title].sort_values(ascending=False)
    except:
        collab_scores = pd.Series(dtype='float64')

    # Combine scores
    final_scores = {}
    for i, score in content_scores:
        title = movies.iloc[i]['title']
        final_scores[title] = final_scores.get(title, 0) + score * 0.5
    for title, score in collab_scores.items():
        final_scores[title] = final_scores.get(title, 0) + score * 0.5

    # Sort final scores
    final_recommendations = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)

    # Filter same movie and get posters
    result = []
    for title, score in final_recommendations:
        if title != movie_title:
            poster = fetch_poster(title)
            result.append((title, poster))
        if len(result) >= top_n:
            break
    return result

In [None]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.44.1-py3-none-any.whl (9.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [1]:
!apt-get install nodejs npm -y
!npm install -g localtunnel

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
npm is already the newest version (8.5.1~ds-1).
nodejs is already the newest version (12.22.9~dfsg-1ubuntu3.6).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
[K[?25h
changed 22 packages, and audited 23 packages in 3s

3 packages are looking for funding
  run `npm fund` for details

1 [31m[1mhigh[22m[39m severity vulnerability

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [2]:
import streamlit as st

st.title("🎬 Hybrid Movie Recommender System")
movie = st.text_input("Enter a movie name")
st.write("Your selected movie is:", movie)

2025-04-25 16:43:03.743 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-04-25 16:43:03.753 Session state does not function when running a script without `streamlit run`


In [3]:
%%writefile app.py
import streamlit as st

st.title("🎬 Hybrid Movie Recommender System")
movie = st.text_input("Enter a movie name")
st.write("Your selected movie is:", movie)

Overwriting app.py


In [7]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://104.155.214.135:8501[0m
[0m
your url is: https://lazy-trees-kneel.loca.lt
[34m  Stopping...[0m
^C


In [6]:
!curl https://loca.lt/mytunnelpassword

104.155.214.135