Objective 1

In [None]:
import pandas as pd

movies_df = pd.read_csv('movies.csv')
ratings_df = pd.read_csv('ratings.csv')

movie_ratings_df = pd.merge(movies_df, ratings_df, on='movieId')

# Calculate the average rating for each movie
movie_ratings_df = movie_ratings_df.groupby(['movieId', 'title', 'genres'], as_index=False)['rating'].mean()

# Calculate the number of reviews for each movie
movie_ratings_df['num_reviews'] = movie_ratings_df.groupby(['movieId'])['rating'].transform('count')

# Filter movies by genre and minimum number of reviews
genre = 'Comedy'
min_reviews = 100
genre_movies_df = movie_ratings_df[(movie_ratings_df['genres'].str.contains(genre)) & (movie_ratings_df['num_reviews'] >= min_reviews)]

# Sort movies by average rating in descending order
genre_movies_df = genre_movies_df.sort_values(by=['rating'], ascending=False)

# Select top N movies
N = 5
top_N_movies = genre_movies_df.head(N)

# Display the results
print(top_N_movies[['title', 'rating', 'num_reviews']])


Objective 2

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
movies_df = pd.read_csv('movies.csv')
tfidf = TfidfVectorizer(stop_words='english')

# Replace NaN values with an empty string
movies_df['genres'] = movies_df['genres'].fillna('')
# Construct the TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(movies_df['genres'])
# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Define a function to get movie recommendations based on similar genres
def get_recommendations(title, N):
    # Get the index of the movie that matches the title
    idx = movies_df[movies_df['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores in descending order
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the top N most similar movies (excluding itself)
    sim_scores = sim_scores[1:N+1]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top N most similar movies
    return movies_df.iloc[movie_indices][['title', 'genres']]
# Example usage:
title = 'Toy Story (1995)'
N = 5
print(get_recommendations(title, N))


                                               title  \
1815                                     Antz (1998)   
2496                              Toy Story 2 (1999)   
2967  Adventures of Rocky and Bullwinkle, The (2000)   
3166                Emperor's New Groove, The (2000)   
3811                           Monsters, Inc. (2001)   

                                           genres  
1815  Adventure|Animation|Children|Comedy|Fantasy  
2496  Adventure|Animation|Children|Comedy|Fantasy  
2967  Adventure|Animation|Children|Comedy|Fantasy  
3166  Adventure|Animation|Children|Comedy|Fantasy  
3811  Adventure|Animation|Children|Comedy|Fantasy  


Objective 3

In [None]:
pip install surprise


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3163327 sha256=77e81e07e1a4128e87848b7fdeb85c497aed3077fcccdcc507f8ca52c213c9b5
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


In [None]:
import pandas as pd
from surprise import Reader, Dataset, KNNBasic
from surprise.model_selection import train_test_split

# Load the dataset
ratings_df = pd.read_csv('ratings.csv')

# Define the reader object
reader = Reader(rating_scale=(0.5, 5.0))

# Load the dataset into the Surprise format
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

# Define the KNN model
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(k=100, sim_options=sim_options)

# Train the model on the training set
model.fit(trainset)

# Define a function to get movie recommendations for a target user based on similar users
def get_recommendations(user_id, N):
    # Get the list of similar users based on the target user
    similar_users = model.get_neighbors(user_id, k=model.k)

    # Get the list of movies rated by similar users but not by the target user
    similar_ratings = ratings_df[(ratings_df['userId'].isin(similar_users)) & (~ratings_df['movieId'].isin(ratings_df[ratings_df['userId'] == user_id]['movieId']))]

    # Calculate the average rating for each movie
    movie_ratings = similar_ratings.groupby(['movieId'], as_index=False)['rating'].mean()

    # Sort movies by average rating in descending order
    movie_ratings = movie_ratings.sort_values(by=['rating'], ascending=False)

    # Select top N movies
    top_N_movies = movie_ratings.head(N)

    # Merge with movies dataframe to get movie titles and genres
    top_N_movies = pd.merge(top_N_movies, movies_df[['movieId', 'title', 'genres']], on='movieId')

    # Display the results
    return top_N_movies[['title']]

# Example usage:
user_id = 1
N = 5

print(get_recommendations(user_id, N))


Computing the cosine similarity matrix...
Done computing similarity matrix.
                                               title
0                               Iron Eagle II (1988)
1  Sympathy for Mr. Vengeance (Boksuneun naui geo...
2                               Interstate 60 (2002)
3                         Crank: High Voltage (2009)
4  Fast & Furious (Fast and the Furious 4, The) (...


Interface

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Define the input widgets
genre = widgets.Text(description='Genre:', value='Comedy')
threshold = widgets.IntSlider(description='Minimum Reviews Threshold:', min=0, max=1000, step=10, value=100)
num_recommendations = widgets.IntSlider(description='Num Recommendations:', min=1, max=10, step=1, value=5)

# Define the output widget
output = widgets.Output()

# Define the function to handle button click events
def on_button_click(b):
    with output:
        # Call the recommendation function here and display the results
        print('Recommendations for Genre:', genre.value)
        print('Minimum Reviews Threshold:', threshold.value)
        print('Num Recommendations:', num_recommendations.value)
        # Replace this with your recommendation function call

# Define the button widget and attach the click event handler
button = widgets.Button(description='Get Recommendations')
button.on_click(on_button_click)

# Display the widgets
display(genre)
display(threshold)
display(num_recommendations)
display(button)
display(output)


Text(value='Comedy', description='Genre:')

IntSlider(value=100, description='Minimum Reviews Threshold:', max=1000, step=10)

IntSlider(value=5, description='Num Recommendations:', max=10, min=1)

Button(description='Get Recommendations', style=ButtonStyle())

Output()