In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Get the data
movies_df = pd.read_csv("movies.csv")

# Title and Genres combined
movies_df['text'] = movies_df['title'] + ' ' + movies_df['genres']
movies_df.drop(columns=['title', 'genres'], inplace=True)

# Use TfidfVectorizer to convert text data into numerical vectors
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df['text'])

#  the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 5: Implement a function to recommend movies based on similarity scores
def recommend_movies(movie_title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = movies_df[movies_df['text'].str.contains(movie_title)].index[0]
    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]
    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    # Return the top 10 most similar movies
    return movies_df.iloc[movie_indices]

# Generating 10 movies based on Content for a particular movie name
movie_name = 'Avengers'
recommendations = recommend_movies(movie_name)
print(recommendations)


       movieId                                               text
17067    89745  Avengers, The (2012) Action|Adventure|Sci-Fi|IMAX
30431   136257     Avengers Grimm (2015) Action|Adventure|Fantasy
34536   145676               3 Avengers (1964) (no genres listed)
40636   159920                     Shaolin Avengers (1994) Action
25067   122912  Avengers: Infinity War - Part I (2018) Action|...
40637   159922                 The Shaolin Avengers (1976) Action
35372   147657                      Masked Avengers (1981) Action
25068   122914  Avengers: Infinity War - Part II (2019) Action...
45394   170297  Ultimate Avengers 2 (2006) Action|Animation|Sc...
54283   189217  Avengers Grimm: Time Wars (2018) Action|Advent...


In [3]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Ground truth ratings (if available)
ground_truth_ratings = [5, 4, 3, 4, 5]  # Example ground truth ratings for recommended movies

# Content-based recommendations
content_based_recommendations = ['Toy Story (1995)', 'Jumanji (1995)', 'Grumpier Old Men (1995)', 'Waiting to Exhale (1995)', 'Father of the Bride Part II (1995)']

# Assume user ratings for recommended movies
user_ratings = [4, 4, 3, 3, 5]  # Example user ratings for recommended movies

# Evaluate content-based recommendations
precision = precision_score(ground_truth_ratings, user_ratings, average='weighted')
recall = recall_score(ground_truth_ratings, user_ratings, average='weighted')
f1 = f1_score(ground_truth_ratings, user_ratings, average='weighted')

print("Content-Based Recommender Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)  


Content-Based Recommender Evaluation Metrics:
Precision: 0.7
Recall: 0.6
F1-score: 0.6


In [4]:
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import SVD, accuracy

# Load data from ratings.csv
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file('ratings.csv', reader=reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Use the SVD algorithm (Singular Value Decomposition)
algo = SVD()

# Train the algorithm on the trainset
algo.fit(trainset)

# Predict ratings for the testset
predictions = algo.test(testset)

# Compute Mean Absolute Error (MAE)
mae = accuracy.mae(predictions)

# Compute Root Mean Squared Error (RMSE)
rmse = accuracy.rmse(predictions)

print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)



MAE:  0.5870
RMSE: 0.7777
Mean Absolute Error (MAE): 0.587036732063796
Root Mean Squared Error (RMSE): 0.7776794685206999


In [5]:

# Example: Recommend movies for a specific user (e.g., userId 1)
user_id = '1'
user_movies = set(data.raw_ratings[i][1] for i in range(len(data.raw_ratings)) if data.raw_ratings[i][0] == user_id)
all_movies = set(data.raw_ratings[i][1] for i in range(len(data.raw_ratings)))
movies_to_predict = list(all_movies - user_movies)
user_predictions = []

for movie_id in movies_to_predict:
    user_predictions.append((movie_id, algo.predict(user_id, movie_id).est))

# Sort the predictions by estimated rating in descending order
user_predictions.sort(key=lambda x: x[1], reverse=True)

# Print the top recommended movies
top_n = 10
for i, (movie_id, rating) in enumerate(user_predictions[:top_n], 1):
    print(f"{i}. Movie ID: {movie_id}, Estimated Rating: {rating}")


1. Movie ID: 290, Estimated Rating: 4.694044356237093
2. Movie ID: 86377, Estimated Rating: 4.652119741344023
3. Movie ID: 2858, Estimated Rating: 4.610286024957355
4. Movie ID: 1354, Estimated Rating: 4.573060728222595
5. Movie ID: 7767, Estimated Rating: 4.550262772613847
6. Movie ID: 134853, Estimated Rating: 4.547046126183418
7. Movie ID: 116897, Estimated Rating: 4.539214489395047
8. Movie ID: 112552, Estimated Rating: 4.530725849345432
9. Movie ID: 171011, Estimated Rating: 4.530433058924414
10. Movie ID: 86345, Estimated Rating: 4.498310125656087
