In [1]:
!pip install scikit-surprise streamlit

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting streamlit
  Downloading streamlit-1.42.0-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m941.6 kB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.0-py2.py3-none-any.whl (9.6

In [1]:
%%writefile app.py

Writing app.py


In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls -l ~/.kaggle/kaggle.json
!kaggle datasets download -d hrwarrior/GDSC-Recommender

-rw------- 1 root root 65 Feb 12 08:15 /root/.kaggle/kaggle.json
Dataset URL: https://www.kaggle.com/datasets/hrwarrior/GDSC-Recommender
License(s): CC0-1.0
Downloading GDSC-Recommender.zip to /content
 92% 180M/195M [00:00<00:00, 178MB/s]
100% 195M/195M [00:01<00:00, 197MB/s]


In [4]:
import zipfile
with zipfile.ZipFile('/content/GDSC-Recommender.zip', 'r') as zip_f:
    zip_f.extractall('/content')

In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy
from sklearn.metrics import precision_recall_fscore_support
import streamlit as st

In [6]:
movies = pd.read_csv('movie.csv')
ratings = pd.read_csv('rating.csv')
tags = pd.read_csv('tag.csv')
genome_scores = pd.read_csv('genome_scores.csv')
genome_tags = pd.read_csv('genome_tags.csv')

In [7]:
movies["genres"] = movies["genres"].apply(lambda x: x.split("|"))

In [8]:
movie_tag_matrix = genome_scores.pivot(index="movieId", columns="tagId", values="relevance").fillna(0)

In [9]:
knn_model = NearestNeighbors(metric="cosine", algorithm="brute")
knn_model.fit(movie_tag_matrix)

In [10]:
def get_movie_id(movie_name):
    movie = movies[movies["title"].str.contains(movie_name, case=False, na=False)]
    return movie.iloc[0]["movieId"] if not movie.empty else None

def get_similar_movies(movie_id, k=5):
    if movie_id not in movie_tag_matrix.index:
        return "Movie ID not found"
    distances, indices = knn_model.kneighbors([movie_tag_matrix.loc[movie_id]], n_neighbors=k+1)
    similar_movie_ids = [movie_tag_matrix.index[i] for i in indices.flatten()[1:]]
    return movies[movies["movieId"].isin(similar_movie_ids)][["movieId", "title"]]

In [11]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[["userId", "movieId", "rating"]], reader)
trainset, testset = train_test_split(data, test_size=0.2)

In [12]:
svd_model = SVD()
svd_model.fit(trainset)

def predict_rating(user_id, movie_id):
    return svd_model.predict(user_id, movie_id).est

def hybrid_recommend(user_id, movie_id, k=5, alpha=0.7):
    svd_score = predict_rating(user_id, movie_id)
    similar_movies_df = get_similar_movies(movie_id, k)

    hybrid_scores = []
    for m_id in similar_movies_df["movieId"]:
        sim_movie_svd_score = predict_rating(user_id, m_id)
        hybrid_score = alpha * svd_score + (1 - alpha) * sim_movie_svd_score
        hybrid_scores.append((m_id, hybrid_score))

    hybrid_scores = sorted(hybrid_scores, key=lambda x: x[1], reverse=True)
    recommended_movies = [movies[movies["movieId"] == m_id]["title"].values[0] for m_id, _ in hybrid_scores]
    return recommended_movies

In [13]:
predictions = svd_model.test(testset)
rmse = accuracy.rmse(predictions)

RMSE: 0.7862


In [14]:
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 0.78621779078266


In [15]:
threshold = 3.5
y_true = [1 if true_r >= threshold else 0 for (_, _, true_r, _, _) in predictions]
y_pred = [1 if est >= threshold else 0 for (_, _, _, est, _) in predictions]
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Precision: 0.8172
Recall: 0.7593
F1-Score: 0.7872


In [16]:
st.title('🎬 Movie Recommender System')
st.markdown("""
    Enter a movie title and your user ID to get personalized recommendations!
""")

movie_input = st.text_input('🎥 Enter Movie Title:')
user_input = st.number_input('👤 Enter User ID:', min_value=1, max_value=1000, step=1)

if st.button('✨ Get Recommendations'):
    movie_id = get_movie_id(movie_input)
    if movie_id is None:
        st.error('❌ Movie not found! Please try another title.')
    else:
        st.success('✅ Recommendations generated successfully!')

        st.subheader('📚 Content-Based Recommendations:')
        content_recommendations = get_similar_movies(movie_id)
        st.table(content_recommendations)

        st.subheader('👥 Collaborative Filtering Recommendations:')
        collaborative_recommendations = hybrid_recommend(user_input, movie_id, k=5)
        st.table(collaborative_recommendations)

        st.subheader('🌟 Hybrid Recommendations:')
        hybrid_recommendations = hybrid_recommend(user_input, movie_id, k=5)
        st.table(hybrid_recommendations)

2025-02-12 08:26:04.505 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-02-12 08:26:04.548 Session state does not function when running a script without `streamlit run`


In [17]:
!pip install pyngrok



In [19]:
from pyngrok import ngrok

# Set your authtoken
ngrok.set_auth_token("2svswOOcmwuULhHiZbAp0w8WY85_6ffi7FaLUh2NkuDoNS9rh")

# Define the tunnel configuration explicitly
http_tunnel = ngrok.connect(
    addr="8501",  # The port where Streamlit is running
    proto="http"  # Protocol (HTTP)
)

# Print the public URL
print(f"Streamlit App URL: {http_tunnel.public_url}")

# Run the Streamlit app in the background
!streamlit run app.py &>/dev/null&

Streamlit App URL: https://2f07-34-125-51-6.ngrok-free.app
