<a href="https://colab.research.google.com/github/AsraniSanjana/All_Codes/blob/main/All_Semester_Codes/ML_sem7/models/miniproject/ML_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install surprise



In [None]:
import pandas as pd
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Specify the file paths in Google Drive
movies_csv_path = '/content/drive/MyDrive/movies_metadata - movies_metadata.csv'
ratings_csv_path = '/content/drive/MyDrive/ratings_small.csv'

# Read the dataset
movies_data = pd.read_csv(movies_csv_path)
ratings_data = pd.read_csv(ratings_csv_path)

In [None]:
movies_data.columns.tolist()

['id', 'title', 'overview', 'genres', 'vote_average', 'vote_count']

In [None]:
movies_data.head(3)

Unnamed: 0,id,title,overview,genres,vote_average,vote_count
0,862,Toy Story,"Led by Woody, Andy's toys live happily in his ...","[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",7.7,5415.0
1,8844,Jumanji,When siblings Judy and Peter discover an encha...,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",6.9,2413.0
2,15602,Grumpier Old Men,A family wedding reignites the ancient feud be...,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",6.5,92.0


In [None]:
ratings_data.columns.tolist()

['userId', 'movieId', 'rating', 'timestamp']

In [None]:
ratings_data.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182


In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer  # Add this import
from sklearn.metrics.pairwise import linear_kernel
from surprise import Reader, Dataset, SVD

# Convert 'id' columns to string data type
movies_data['id'] = movies_data['id'].astype(str)

# Convert 'vote_average' column to string data type and fill missing values with 0
movies_data['vote_average'] = movies_data['vote_average'].astype(str)
movies_data['vote_average'] = movies_data['vote_average'].replace('nan', '0')

# Create a TF-IDF vectorizer for content-based filtering using 'vote_average'
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Construct the TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_data['vote_average'])

# Compute the cosine similarity matrix for content-based filtering
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Define a reader and load the ratings dataset for collaborative filtering
reader = Reader()
ratings = ratings_data
data_surprise = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Initialize and train the SVD model for collaborative filtering
svd = SVD()
trainset = data_surprise.build_full_trainset()
svd.fit(trainset)

# Function to get collaborative filtering recommendations
def get_collaborative_filtering_recommendations(userId, svd, movies_data):
    movie_ids = movies_data['id'].tolist()
    user_ratings = []

    for movie_id in movie_ids:
        user_ratings.append((movie_id, svd.predict(userId, movie_id).est))

    user_ratings.sort(key=lambda x: x[1], reverse=True)
    top_movies = user_ratings[:10]
    top_movie_ids = [movie[0] for movie in top_movies]

    top_movie_titles = movies_data[movies_data['id'].isin(top_movie_ids)]['title'].tolist()
    return top_movie_titles

# Function to get content-based recommendations using 'vote_average'
def get_content_based_recommendations_by_vote_average(title, cosine_sim, df):
    idx = df.index[df['title'] == title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Function to get hybrid recommendations using 'vote_average'
def hybrid_recommendations_by_vote_average(userId, title, cosine_sim, movies_data, svd):
    content_based_recommendations = get_content_based_recommendations_by_vote_average(title, cosine_sim, movies_data)
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(userId, svd, movies_data)
    return content_based_recommendations, collaborative_filtering_recommendations

# Example: Get hybrid movie recommendations for a user using 'vote_average'
user_id = 3
movie_title = 'Sabrina'
content_based, collaborative_filtering = hybrid_recommendations_by_vote_average(user_id, movie_title, cosine_sim, movies_data, svd)

# print(f"Content-Based Recommendations for User {user_id}:\n{content_based}")
# print(f"Collaborative Filtering Recommendations for User {user_id}:\n{collaborative_filtering}")

# Create a DataFrame to format the output as a table
recommendations_df = pd.DataFrame({'Collaborative': collaborative_filtering, 'Content-Based': content_based})

# Print the DataFrame
print(recommendations_df)


                  Collaborative                Content-Based
1                     Toy Story                      Jumanji
2                       Jumanji             Grumpier Old Men
3              Grumpier Old Men            Waiting to Exhale
4             Waiting to Exhale  Father of the Bride Part II
5   Father of the Bride Part II                         Heat
6                          Heat                      Sabrina
7                       Sabrina                 Tom and Huck
8                  Tom and Huck                 Sudden Death
9                  Sudden Death                    GoldenEye
10                    GoldenEye       The American President




---





---

