<a href="https://colab.research.google.com/github/KillMonga130/daily-ai-ml-projects/blob/main/Recommendation_System_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install scikit-surprise

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/154.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357287 sha256=7fe54bee093bc984a27917db3612a8be27ff19410805f689662d8970bdc335c5
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a

In [4]:
ratings_df = pd.read_csv('ratings.csv')
movies_df = pd.read_csv('movies.csv')

ratings_df.head(), movies_df.head()

(   userId  movieId  rating  timestamp
 0       1        1     4.0  964982703
 1       1        3     4.0  964981247
 2       1        6     4.0  964982224
 3       1       47     5.0  964983815
 4       1       50     5.0  964982931,
    movieId                               title                                       genres
 0        1                    Toy Story (1995)  Adventure|Animation|Children|Comedy|Fantasy
 1        2                      Jumanji (1995)                   Adventure|Children|Fantasy
 2        3             Grumpier Old Men (1995)                               Comedy|Romance
 3        4            Waiting to Exhale (1995)                         Comedy|Drama|Romance
 4        5  Father of the Bride Part II (1995)                                       Comedy)

In [8]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2)

svd_model = SVD()
svd_model.fit(trainset)

predictions = svd_model.test(testset)
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")

RMSE: 0.8779
RMSE: 0.8779305743694709


In [11]:
movies_df['genres'] = movies_df['genres'].str.replace('|', ' ')

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['genres'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [22]:
def get_reco(user_id, model, movies_df, num_reco=5):
  movie_ids = movies_df['movieId'].unique()
  predictions = [model.predict(user_id, movie_id) for movie_id in movie_ids]

  top_predictions = sorted(predictions, key=lambda x: x.est, reverse=True)[:num_reco]
  top_movie_ids = [pred.iid for pred in top_predictions]

  return movies_df[movies_df['movieId'].isin(top_movie_ids)]

recommended_movies = get_reco(1, svd_model, movies_df)
recommended_movies[['title', 'genres']]



def get_content(movie_id, movies_df, cosine_sim, num_reco=5):

  idx = movies_df.index[movies_df['movieId'] == movie_id][0]

  sim_scores = list(enumerate(cosine_sim[idx]))
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

  top_movie_indices = [i[0] for i in sim_scores[1:num_reco + 1]]
  return movies_df.iloc[top_movie_indices]


similar_movies = get_content(1, movies_df, cosine_sim)
similar_movies[['title', 'genres']]

Unnamed: 0,title,genres
1706,Antz (1998),Adventure Animation Children Comedy Fantasy
2355,Toy Story 2 (1999),Adventure Animation Children Comedy Fantasy
2809,"Adventures of Rocky and Bullwinkle, The (2000)",Adventure Animation Children Comedy Fantasy
3000,"Emperor's New Groove, The (2000)",Adventure Animation Children Comedy Fantasy
3568,"Monsters, Inc. (2001)",Adventure Animation Children Comedy Fantasy


In [20]:
!pip install gradio



In [35]:
import gradio as gr

def recommended_movies(recommendation_type, user_id, movie_id):
  if recommendation_type == "Collaborative Filtering":
    recommendations = get_reco(int(user_id), svd_model, movies_df)
  elif recommendation_type == "Content-Based Filtering":
    recommendations = get_content(int(movie_id), movies_df, cosine_sim)
  else:
    return "Please select a valid recommendation type."

  result = recommendations[['title', 'genres']]
  return result.to_string(index=False)

recommendation_type = gr.Radio(["Collaborative Filtering", "Content-Based Filtering"], label="Recommandation Type")
user_id = gr.Textbox(label="User ID (for Collaborative Filtering)", value="1")
movie_id = gr.Textbox(label="Movie ID (for Content-Based Filtering)", value="1")

output = gr.Textbox(label="Recommended Movies")

gr.Interface(
    fn=recommended_movies,
    inputs=[recommendation_type, user_id, movie_id],
    outputs=output,
    title="Movie Recommendation System",
    description="Choose a recommendation type and enter User ID or Movie ID to get recommendations."
).launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://010bacad17c8805329.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


