# Libraries <br>

- **!pip install scikit-surprise** <br> <br>

- **pip install sentence-transformers** <br> <br>

- **pip install keras or pip install tensorflow depending on the tensorflow version**  <br>


In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

# Read data

In [2]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")
tags = pd.read_csv("tags.csv")
links = pd.read_csv("links.csv")

# View data

In [3]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
links.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [5]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
tags.head() 

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200


In [7]:
print(tags.shape, ratings.shape, movies.shape)

(3683, 4) (100836, 4) (9742, 3)


# Exploratory Data Analysis (EDA)

# Profile data

# Pandas profiling 

In [8]:
import ydata_profiling
from ydata_profiling import ProfileReport

movie_profile = ProfileReport(movies)
ratings_profile = ProfileReport(ratings)
tags_profile = ProfileReport(tags)
# Save the profile report to an HTML file
movie_profile.to_file("movies_profile_report.html")
ratings_profile.to_file("ratings_report.html")
tags_profile.to_file("tags_report.html")


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

# Observations from pandas profiling

**Ratings Distribution** <br>
They have a mean value around 3.5 according to pandas profiling
![Rating Distribution](distribution_ratings.png)


# Most popular genres in movie dataframe

In [9]:
genre_counts = movies['genres'].value_counts()

In [10]:
print(genre_counts)

Drama                                                  1053
Comedy                                                  946
Comedy|Drama                                            435
Comedy|Romance                                          363
Drama|Romance                                           349
                                                       ... 
Action|Crime|Horror|Mystery|Thriller                      1
Adventure|Animation|Children|Comedy|Musical|Romance       1
Action|Adventure|Animation|Comedy|Crime|Mystery           1
Children|Comedy|Fantasy|Sci-Fi                            1
Action|Animation|Comedy|Fantasy                           1
Name: genres, Length: 951, dtype: int64


**Word Cloud genres** <br>
Drama comedy Romance are the most common genres 
![Genre Counts](cloud_word_genres.png)

**Word Cloud tags** <br>
Comedy netflix dark atmospheric are the common tags that users comment 
![tags Counts](word_cloud_tags.png)

# High rating users and Top rated movies 

In [11]:
# Calculate average rating for each user
average_ratings = ratings.groupby('userId')['rating'].mean()

# Identify users with high average ratings
high_rating_users = average_ratings[average_ratings >= 4.5]

# Print the user IDs of users with high average ratings
print(high_rating_users.index.tolist())

[25, 30, 43, 53, 122, 171, 251, 348, 371, 400, 441, 452, 515, 523]


In [12]:


# Calculate average rating for each movie
average_ratings = ratings.groupby('movieId')['rating'].mean()

# Sort movies by average rating in descending order
sorted_movies = average_ratings.sort_values(ascending=False)

# Select top-rated movies (top 5 here)
top_rated_movies = sorted_movies.head(5)



In [13]:
top_rated_movies 

movieId
88448     5.0
100556    5.0
143031    5.0
143511    5.0
143559    5.0
Name: rating, dtype: float64

In [14]:
movies[movies['movieId'] == 88448]

Unnamed: 0,movieId,title,genres
7656,88448,Paper Birds (Pájaros de papel) (2010),Comedy|Drama


In [15]:
# Merge the two DataFrames based on the "movieId" column
top_rated_movies_titles = pd.merge(top_rated_movies, movies, on='movieId')

In [16]:
top_rated_movies_titles

Unnamed: 0,movieId,rating,title,genres
0,88448,5.0,Paper Birds (Pájaros de papel) (2010),Comedy|Drama
1,100556,5.0,"Act of Killing, The (2012)",Documentary
2,143031,5.0,Jump In! (2007),Comedy|Drama|Romance
3,143511,5.0,Human (2015),Documentary
4,143559,5.0,L.A. Slasher (2015),Comedy|Crime|Fantasy


In [17]:
#merged_df.head()

# User - Movie rating dataframe

In [18]:
# Pivot table to create the user-movie rating matrix
rating_matrix = ratings.pivot_table(values='rating', index='userId', columns='movieId')

In [19]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [20]:
rating_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


# Sparsity of the user item matrix

In [21]:
rating_matrix.shape

(610, 9724)

In [22]:
rating_matrix.size

5931640

In [23]:
# Calculate the number of missing values (NaNs)
missing_values = rating_matrix.isnull().sum().sum()
print(missing_values)

5830804


In [24]:
rated_movies = rating_matrix.size - missing_values
rated_movies

100836

In [25]:
# Calculate the total number of possible values
total_possible_values = rating_matrix.size

# Calculate the number of missing values (NaNs)
missing_values = rating_matrix.isnull().sum().sum()

# Calculate sparsity
sparsity = missing_values / total_possible_values

# Print the sparsity
print(f"Sparsity: {sparsity:.4f}")

Sparsity: 0.9830


**User item matrix is a sparse matrix , 2% of the matrix is filled with ratings**

In [26]:
import os
from surprise import Dataset, SVD
from surprise import BaselineOnly, Dataset, Reader
from surprise.model_selection import cross_validate


# Drop timestamp

In [27]:
ratings = ratings.drop("timestamp",axis=1)

In [28]:

reader = Reader(rating_scale=(0.5, 5))


data = Dataset.load_from_df(ratings, reader)



In [29]:
from surprise import KNNBaseline,KNNBasic,KNNWithMeans,KNNWithZScore,NormalPredictor,BaselineOnly


# Baseline Model <br>
# Normal Predictor 

In [30]:
algo3 = NormalPredictor()
scores = cross_validate(algo3, data, measures=["RMSE", "MAE"], cv=5, verbose=True)


#RMSE MAE Evaluation
mean_rmse = np.mean(scores.get("test_rmse"))
mean_mae = np.mean(scores.get("test_mae"))
mean_fit_time = np.mean(scores.get("fit_time"))
mean_test_time = np.mean(scores.get("test_time"))
overall_time = mean_fit_time + mean_test_time
print(f"Mean RMSE is {mean_rmse:.3f}")
print(f"Mean MAE is {mean_mae:.3f}")   
print(f"Mean fit_time is {mean_fit_time:.3f}")   
print(f"Mean test_time is {mean_test_time:.3f}")   
print(f"Mean overall_time is {overall_time:.3f}") 

Evaluating RMSE, MAE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.4222  1.4202  1.4269  1.4249  1.4318  1.4252  0.0040  
MAE (testset)     1.1386  1.1351  1.1382  1.1360  1.1448  1.1385  0.0034  
Fit time          0.08    0.10    0.09    0.09    0.09    0.09    0.01    
Test time         0.23    0.06    0.06    0.06    0.19    0.12    0.07    
Mean RMSE is 1.425
Mean MAE is 1.139
Mean fit_time is 0.092
Mean test_time is 0.121
Mean overall_time is 0.212


# Collaborative Filtering using Matrix Factorization techniques from surprise library<br>
- **SVD**
- **Probabilistic Matrix Factorization**
- **Non-Negative Matrix Factorization** 
- **BaselineOnly**

In [31]:
from surprise import NMF

In [32]:
bsl_options = {
    "method": "sgd"
}
algorithms = [SVD(), SVD(biased=False), NMF(), BaselineOnly(bsl_options=bsl_options)]

rmse_results = []
mae_results = []
fit_time = []
test_time = []
overall_times = []

for algo in algorithms:
    # Run cross-validation and get scores
    scores = cross_validate(algo, data, measures=["RMSE", "MAE"], cv=5, verbose=True)
    ####### Calculate scores  for evaluation ########
    mean_rmse = np.mean(scores.get("test_rmse"))
    mean_mae = np.mean(scores.get("test_mae"))
    
    mean_fit_time = np.mean(scores.get("fit_time"))
    mean_test_time = np.mean(scores.get("test_time"))
    overall_time = mean_fit_time + mean_test_time
    
    ####### Add elements to the lists ########
    rmse_results.append(mean_rmse)
    mae_results.append(mean_mae)
    fit_time.append(mean_fit_time)
    test_time.append(mean_test_time)
    overall_times.append(overall_time)
    
# Evaluation RMSE MAE and time 
for i, algo in enumerate(algorithms):
    curr_rmse = rmse_results[i]
    curr_mae = mae_results[i]
    curr_fit_time = fit_time[i]
    curr_test_time = test_time[i]
    curr_overall_time = overall_times[i]
    
    print("\n\n\n")
    print(f"{algo.__class__.__name__} RMSE {curr_rmse:.3f} \t MAE {curr_mae:.3f} \t Fit time {curr_fit_time:.3f} \t Test time {curr_test_time:.3f} \t Overall time {curr_overall_time:.3f}")






Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8763  0.8776  0.8839  0.8692  0.8606  0.8735  0.0079  
MAE (testset)     0.6737  0.6745  0.6776  0.6706  0.6606  0.6714  0.0058  
Fit time          0.88    1.00    0.93    0.85    0.87    0.90    0.05    
Test time         0.11    0.10    0.10    0.11    0.10    0.10    0.00    
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9966  0.9867  0.9833  0.9803  0.9810  0.9856  0.0060  
MAE (testset)     0.7662  0.7572  0.7567  0.7517  0.7531  0.7570  0.0051  
Fit time          0.84    0.87    0.85    0.86    0.85    0.86    0.01    
Test time         0.09    0.09    0.09    0.09    0.09    0.09    0.00    
Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset

# Plot metrics for each method

In [33]:

import matplotlib.pyplot as plt

metrics = {
        "RMSE": rmse_results,
        "MAE": mae_results,
        "Fit_time": fit_time,
        "Test_time": test_time,
        "Overall_time": overall_times
}
    
metrics_names = list(metrics.keys())

SVD_metrics = {key: value[0] for key, value in metrics.items()}
PMF_metrics = {key: value[1] for key, value in metrics.items()}
NMF_metrics = {key: value[2] for key, value in metrics.items()}
Bsl_metrics = {key: value[3] for key, value in metrics.items()}

SVD_values = list(SVD_metrics.values())
PMF_values = list(PMF_metrics.values())
NMF_values = list(NMF_metrics.values())
Bsl_values = list(Bsl_metrics.values())

###### SVD Metrics #######

colors = ['skyblue', 'red', 'lightblue', 'magenta', 'brown']

plt.figure(figsize=(12, 6))
bars1 = plt.bar(metrics_names, SVD_values, color=colors)
plt.xlabel('Metrics')
plt.title('SVD Metrics')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(bars1, metrics_names, loc='upper center', bbox_to_anchor=(0.7, 1))

for i, value in enumerate(SVD_values):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')

plt.show()

###### PMF Metrics #######
plt.figure(figsize=(12, 6))
bars2 = plt.bar(metrics_names, PMF_values, color=colors)
plt.legend(bars2, metrics_names, loc='upper center', bbox_to_anchor=(0.7, 1))
plt.xlabel('Metrics')
plt.title('PMF Metrics')
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, value in enumerate(PMF_values):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')

plt.show()

###### NMF Metrics #######
plt.figure(figsize=(12, 6))
bars3 = plt.bar(metrics_names, NMF_values, color = colors)
plt.legend(bars3, metrics_names, loc='upper left')
plt.xlabel('Metrics')
plt.title('NMF Metrics')
plt.grid(axis='y', linestyle='--', alpha=0.7)

for i, value in enumerate(NMF_values):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')

plt.show()

###### Matrix Factorization loss function using SGD to minimize it  ######
plt.figure(figsize=(12, 6))
bars9 = plt.bar(metrics_names, Bsl_values, color=colors)
plt.xlabel('Metrics')
plt.title('BaselineOnly metrics')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(bars9, metrics_names, loc='upper center', bbox_to_anchor=(0.7, 1))

for i, value in enumerate(Bsl_values):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')

plt.show()

  plt.show()
  plt.show()
  plt.show()
  plt.show()


# Compare metrics between SVD, PMF, NMF, Bsl

In [34]:
models = ["SVD", "PMF", "NMF", "Baseline"]

colors2 = ['blue', 'orange', 'green', "gray"]

# Plotting the bar chart
plt.figure(figsize=(12, 8))
bars = plt.bar(models, rmse_results, color=colors2)
plt.title('Comparison of RMSE Values for SVD, NMF, PMF and Baseline')
plt.xlabel('Matrix Factorization Models')
plt.ylabel('Root Mean Squared Error (RMSE)')
plt.ylim(0, max(rmse_results) + 0.1)  # Adjust the y-axis limit if needed
plt.grid(axis='y', linestyle='--', alpha=0.7)

###legend
plt.legend(bars, models, loc='upper right')

# Adding text labels with RMSE values on top of the bars
for i, value in enumerate(rmse_results):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')
    
# Show the plot
plt.show()



  plt.show()


# MAE

In [35]:
plt.figure(figsize=(12, 8))
bars4 = plt.bar(models, mae_results, color=colors2)
plt.title('Comparison of MAE Values for SVD, NMF, PMF and Baseline')
plt.xlabel('Matrix Factorization Models')
plt.ylabel('Mean Squared Error (MAE)')
plt.ylim(0, max(mae_results) + 0.1)  # Adjust the y-axis limit if needed
plt.grid(axis='y', linestyle='--', alpha=0.7)

###legend
plt.legend(bars4, models, loc='upper right')

# Adding text labels with RMSE values on top of the bars
for i, value in enumerate(mae_results):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')
    
# Show the plot
plt.show()

  plt.show()


# Fit time

In [36]:
plt.figure(figsize=(12, 8))
bars5 = plt.bar(models, fit_time, color=colors2)
plt.title('Comparison of fit time for SVD, NMF, PMF and Baseline')
plt.xlabel('Matrix Factorization Models')
plt.ylabel('Fit time (seconds)')
plt.grid(axis='y', linestyle='--', alpha=0.7)

###legend
plt.legend(bars5, models, loc='upper left')

# Adding text labels with RMSE values on top of the bars
for i, value in enumerate(fit_time):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')
    
# Show the plot
plt.show()

  plt.show()


# Test time

In [37]:
plt.figure(figsize=(12, 8))
bars6 = plt.bar(models, test_time, color=colors2)
plt.title('Comparison of test time for SVD, NMF, PMF and Baseline')
plt.xlabel('Matrix Factorization Models')
plt.ylabel('Test time (seconds)')
plt.ylim(0, max(test_time) + 0.1)  # Adjust the y-axis limit if needed
plt.grid(axis='y', linestyle='--', alpha=0.7)

###legend
plt.legend(bars6, models, loc='upper right')

# Adding text labels with RMSE values on top of the bars
for i, value in enumerate(test_time):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')
    
# Show the plot
plt.show()

  plt.show()


# Overall time

In [38]:
plt.figure(figsize=(12, 8))
bars7 = plt.bar(models, overall_times, color=colors2)
plt.title('Comparison of overall time for SVD, NMF, PMF and Baseline')
plt.xlabel('Matrix Factorization Models')
plt.ylabel('Overall time (seconds)')
plt.grid(axis='y', linestyle='--', alpha=0.7)

###legend
plt.legend(bars7, models, loc='upper left')

# Adding text labels with RMSE values on top of the bars
for i, value in enumerate(overall_times):
    plt.text(i, value + 0.005, f'{value:.3f}', ha='center', va='bottom')
    
# Show the plot
plt.show()

  plt.show()


In [39]:
from surprise.model_selection import train_test_split

In [40]:
movies_and_ratings = pd.merge(movies, ratings, on='movieId')
#final_df = pd.merge(movies_and_ratings, tags, on=['movieId', 'userId'], how='left')
#final_df = final_df.drop("timestamp", axis=1)

# Get top 10 rated movies from a user

In [41]:

while True:
    user_id = input("Enter a user id (1-610): ")
    
    if not user_id.isdigit() or int(user_id) not in range(1, 611):
        print("Invalid user id. Please enter a valid id\n")
    else:
        user_id = int(user_id)
        break
    
    
    
print(f"Top 10 rated movies from user {user_id}")
# Sort the ratings in descending order for the specific user
user_sorted_ratings = movies_and_ratings[movies_and_ratings['userId'] == user_id].sort_values(by='rating', ascending = False)

# Specify the top N movies you want to retrieve
top_n = 10  

# Get the top N movies based on sorted ratings
top_movies_for_user = user_sorted_ratings.head(top_n)
top_movies_for_user

Enter a user id (1-610): 10
Top 10 rated movies from user 10


Unnamed: 0,movieId,title,genres,userId,rating
99259,140110,The Intern (2015),Comedy,10,5.0
77147,8869,First Daughter (2004),Comedy|Romance,10,5.0
94672,96079,Skyfall (2012),Action|Adventure|Thriller|IMAX,10,5.0
93816,91529,"Dark Knight Rises, The (2012)",Action|Adventure|Crime|IMAX,10,5.0
75192,7458,Troy (2004),Action|Adventure|Drama|War,10,5.0
92100,81845,"King's Speech, The (2010)",Drama,10,5.0
76341,8533,"Notebook, The (2004)",Drama|Romance,10,5.0
91318,79091,Despicable Me (2010),Animation|Children|Comedy|Crime,10,5.0
90172,71579,"Education, An (2009)",Drama|Romance,10,5.0
80014,33794,Batman Begins (2005),Action|Crime|IMAX,10,5.0


# Top 10 predicted ratings for a specific user 

## Fit an algorithm from algorithms in dataset

In [42]:
#####  algorithms = [SVD(), SVD(biased=False), NMF(), BaselineOnly(bsl_options=bsl_options)] ####

bsl_options = {
    "method": "sgd"
}
algorithms = [SVD(), SVD(biased=False), NMF(), BaselineOnly(bsl_options=bsl_options)]

while True:
    print("Algorithms to fit:")
    print("1.  SVD")
    print("2.  PMF")
    print("3.  NMF")
    print("4.  BaselineOnly")
    
    print("\n")
    algo = input("Enter a number (1-4): ")
    
    if not algo.isdigit() or int(algo) not in range(1, len(algorithms) + 1):
        print("Invalid choice. Please enter a valid number\n")
    else:
        algo = int(algo)
        break

trainset, testset = train_test_split(data, test_size=0.25)

algorithms[algo-1].fit(trainset)
predictions = algorithms[algo-1].test(testset)


Algorithms to fit:
1.  SVD
2.  PMF
3.  NMF
4.  BaselineOnly


Enter a number (1-4): 4
Estimating biases using sgd...


## Create testset with the ratings user didnt rate

In [43]:
####### Get unique ids #########
unique_ids = movies["movieId"].unique()
####### Get the movies that a user rated #########
ids_rated = ratings.loc[ratings["userId"] == user_id, "movieId"]
###### Movies to predict #######
movies_to_pred = np.setdiff1d(unique_ids,ids_rated)
###### Rated movies #######
rated_movies = unique_ids.shape[0] - movies_to_pred.shape[0]


testset_user = [[user_id, mv_id, None] for mv_id in movies_to_pred]
predictions_user = algorithms[algo-1].test(testset_user)
predictions_user

[Prediction(uid=10, iid=1, r_ui=None, est=3.484547181119235, details={'was_impossible': False}),
 Prediction(uid=10, iid=2, r_ui=None, est=3.0323104084418433, details={'was_impossible': False}),
 Prediction(uid=10, iid=3, r_ui=None, est=2.9641347183066697, details={'was_impossible': False}),
 Prediction(uid=10, iid=4, r_ui=None, est=2.5514081797044588, details={'was_impossible': False}),
 Prediction(uid=10, iid=5, r_ui=None, est=2.627816060792158, details={'was_impossible': False}),
 Prediction(uid=10, iid=6, r_ui=None, est=3.5866652121000255, details={'was_impossible': False}),
 Prediction(uid=10, iid=7, r_ui=None, est=2.81145371167914, details={'was_impossible': False}),
 Prediction(uid=10, iid=8, r_ui=None, est=2.762310783893719, details={'was_impossible': False}),
 Prediction(uid=10, iid=9, r_ui=None, est=2.963379754651043, details={'was_impossible': False}),
 Prediction(uid=10, iid=10, r_ui=None, est=3.1825308973390327, details={'was_impossible': False}),
 Prediction(uid=10, iid=1

## Sort the predicted ratings in top 10 

In [44]:
sorted_predictions_user = sorted(predictions_user, key=lambda x: x.est, reverse=True)

top_10_predictions_user = sorted_predictions_user[:10]
top_10_predictions_user

[Prediction(uid=10, iid=1204, r_ui=None, est=3.9765353246133452, details={'was_impossible': False}),
 Prediction(uid=10, iid=750, r_ui=None, est=3.975747033883747, details={'was_impossible': False}),
 Prediction(uid=10, iid=318, r_ui=None, est=3.974798966428555, details={'was_impossible': False}),
 Prediction(uid=10, iid=1223, r_ui=None, est=3.9573047670174026, details={'was_impossible': False}),
 Prediction(uid=10, iid=904, r_ui=None, est=3.9018937208855626, details={'was_impossible': False}),
 Prediction(uid=10, iid=1136, r_ui=None, est=3.891416099355222, details={'was_impossible': False}),
 Prediction(uid=10, iid=50, r_ui=None, est=3.8900893100040244, details={'was_impossible': False}),
 Prediction(uid=10, iid=2019, r_ui=None, est=3.8719647491416023, details={'was_impossible': False}),
 Prediction(uid=10, iid=3275, r_ui=None, est=3.8719537804564936, details={'was_impossible': False}),
 Prediction(uid=10, iid=5618, r_ui=None, est=3.868923684061274, details={'was_impossible': False})]

## Create a dataframe that maps movieId to titles genres tags

In [45]:
#### Get top 10 movie ids ####
movie_ids = [prediction.iid for prediction in top_10_predictions_user]


# Movie_ids to dataframe 
movie_ids_df = pd.DataFrame({'movieId': movie_ids})

# Find corresponding titles and genres from the movies DataFrame
movies_pred = pd.merge(movie_ids_df, movies, on='movieId', how='left')

estimated_ratings = [prediction.est for prediction in top_10_predictions_user]
movies_pred['est_rating'] = estimated_ratings

##### change columns ######
movies_pred = movies_pred[['movieId', 'title', 'genres', 'est_rating']]
movies_pred

Unnamed: 0,movieId,title,genres,est_rating
0,1204,Lawrence of Arabia (1962),Adventure|Drama|War,3.976535
1,750,Dr. Strangelove or: How I Learned to Stop Worr...,Comedy|War,3.975747
2,318,"Shawshank Redemption, The (1994)",Crime|Drama,3.974799
3,1223,"Grand Day Out with Wallace and Gromit, A (1989)",Adventure|Animation|Children|Comedy|Sci-Fi,3.957305
4,904,Rear Window (1954),Mystery|Thriller,3.901894
5,1136,Monty Python and the Holy Grail (1975),Adventure|Comedy|Fantasy,3.891416
6,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,3.890089
7,2019,Seven Samurai (Shichinin no samurai) (1954),Action|Adventure|Drama,3.871965
8,3275,"Boondock Saints, The (2000)",Action|Crime|Drama|Thriller,3.871954
9,5618,Spirited Away (Sen to Chihiro no kamikakushi) ...,Adventure|Animation|Fantasy,3.868924


# NEURAL

In [46]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import keras
from keras import layers
import time




In [47]:
ratings_nn = pd.read_csv("ratings.csv")

# Encode users and movies as integer indices.

In [48]:
user_ids = ratings_nn["userId"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
movie_ids = ratings_nn["movieId"].unique().tolist()
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
ratings_nn["user"] = ratings_nn["userId"].map(user2user_encoded)
ratings_nn["movie"] = ratings_nn["movieId"].map(movie2movie_encoded)

num_users = len(user2user_encoded)
num_movies = len(movie_encoded2movie)
ratings_nn["rating"] = ratings_nn["rating"].values.astype(np.float32)

min_rating = min(ratings_nn["rating"])
max_rating = max(ratings_nn["rating"])

# Normalization of ratings

In [49]:
ratings_nn = ratings_nn.sample(frac=1, random_state=42)
x = ratings_nn[["user", "movie"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = ratings_nn["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * ratings_nn.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

# We embed both users and movies in to 50-dimensional vectors.

**The model computes a match score between user and movie embeddings via a dot product, and adds a per-movie and per-user bias. The match score is scaled to the [0, 1] interval via a sigmoid**

In [50]:
import tensorflow as tf
from tensorflow.keras import layers

EMBEDDING_SIZE = 50

class RecommenderNet(tf.keras.Model):
    def __init__(self, num_users, num_movies, embedding_size, **kwargs):
        super().__init__(**kwargs)
        self.num_users = num_users
        self.num_movies = num_movies
        self.embedding_size = embedding_size
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6),
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.movie_embedding = layers.Embedding(
            num_movies,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=tf.keras.regularizers.l2(1e-6),
        )
        self.movie_bias = layers.Embedding(num_movies, 1)

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        movie_vector = self.movie_embedding(inputs[:, 1])
        movie_bias = self.movie_bias(inputs[:, 1])
        dot_user_movie = tf.reduce_sum(user_vector * movie_vector, axis=1, keepdims=True)
        # Add all the components (including bias)
        x = dot_user_movie + user_bias + movie_bias
        # The sigmoid activation forces the rating to between 0 and 1
        return tf.nn.sigmoid(x)
    
    
start_time = time.time()
model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
)

####### Train model ######
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=64,
    epochs=5,
    verbose=1,
    validation_data=(x_val, y_val),
)
end_time = time.time()
total_time = end_time - start_time
print(f"Total time taken: {total_time} seconds")


Epoch 1/5

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Total time taken: 51.39318513870239 seconds


# Plot training and validation loss (binary cross entropy)

In [51]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")  

plt.figure(figsize=(10, 6))  

# Plot training and validation loss
sns.lineplot(x=range(1, len(history.history["loss"]) + 1), y=history.history["loss"], label="Train")
sns.lineplot(x=range(1, len(history.history["val_loss"]) + 1), y=history.history["val_loss"], label="Test")

plt.title("Binary Cross Entropy Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(loc="upper right")
plt.show()


  plt.show()


**We can see that while train test is getting lower every epoch, the validation set is increasing his loss after epoch 2**

In [52]:
mv_df = pd.read_csv("movies.csv")

# Top 10 movie recommendation 

In [53]:
movies_watched_by_user = ratings_nn[ratings_nn.userId == user_id]
movies_not_watched = mv_df[
    ~mv_df["movieId"].isin(movies_watched_by_user.movieId.values)
]["movieId"]
movies_not_watched = list(
    set(movies_not_watched).intersection(set(movie2movie_encoded.keys()))
)
movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]
user_encoder = user2user_encoded.get(user_id)
user_movie_array = np.hstack(
    ([[user_encoder]] * len(movies_not_watched), movies_not_watched)
)
pred_ratings_nn = model.predict(user_movie_array).flatten()
top_ratings_indices = pred_ratings_nn.argsort()[-10:][::-1]
recommended_movie_ids = [
    movie_encoded2movie.get(movies_not_watched[x][0]) for x in top_ratings_indices
]



In [54]:

# Movie_ids to dataframe 
movie_ids_nn = pd.DataFrame({'movieId': recommended_movie_ids})

# Find corresponding titles and genres from the movies DataFrame
movies_pred_nn = pd.merge(movie_ids_nn, movies, on='movieId', how='left')

##### change columns ######
movies_pred_nn = movies_pred_nn
movies_pred_nn

Unnamed: 0,movieId,title,genres
0,168,First Knight (1995),Action|Drama|Romance
1,150,Apollo 13 (1995),Adventure|Drama|IMAX
2,3753,"Patriot, The (2000)",Action|Drama|War
3,3825,Coyote Ugly (2000),Comedy|Drama|Romance
4,736,Twister (1996),Action|Adventure|Romance|Thriller
5,802,Phenomenon (1996),Drama|Romance
6,318,"Shawshank Redemption, The (1994)",Crime|Drama
7,11,"American President, The (1995)",Comedy|Drama|Romance
8,595,Beauty and the Beast (1991),Animation|Children|Fantasy|Musical|Romance|IMAX
9,225,Disclosure (1994),Drama|Thriller


# Content based filtering using titles and genres as features in item profile

In [55]:
import pandas as pd
import numpy as np
import matplotlib as plt
from ast import literal_eval
#pip install sentence-transformers

# Copy of movies in order not to affect the movies dataframe

In [56]:
movies2 = pd.read_csv("movies.csv")

In [57]:
movies_content = movies2.copy()

In [58]:
movies_content

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


# Check in format title to see how many movies do not have year. Format title has either 'title (year) ' or 'title (year1-year2) '

In [59]:
titles_without_year = movies_content[~movies_content['title'].str.contains(r'\(\d{4}(?:–\d{4})?\)')]
titles_without_year

Unnamed: 0,movieId,title,genres
6059,40697,Babylon 5,Sci-Fi
9031,140956,Ready Player One,Action|Sci-Fi|Thriller
9091,143410,Hyena Road,(no genres listed)
9138,147250,The Adventures of Sherlock Holmes and Doctor W...,(no genres listed)
9179,149334,Nocturnal Animals,Drama|Thriller
9259,156605,Paterson,(no genres listed)
9367,162414,Moonlight,Drama
9448,167570,The OA,(no genres listed)
9514,171495,Cosmos,(no genres listed)
9515,171631,Maria Bamford: Old Baby,(no genres listed)


# Extract year from title and add a new column where we store year

In [60]:
movies_content['year'] = movies_content['title'].str.extract(r'\((\d{4}(?:–\d{4})?)\)')

In [61]:
movies_content.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men (1995),Comedy|Romance,1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II (1995),Comedy,1995


# Remove year from the title column

In [62]:
movies_content['title'] = movies_content['title'].str.replace(r'\s*\(\d{4}(?:–\d{4})?\)', '')

  movies_content['title'] = movies_content['title'].str.replace(r'\s*\(\d{4}(?:–\d{4})?\)', '')


In [63]:
movies_content.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji,Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II,Comedy,1995


In [64]:
### Check if a movie doesnt have title ###
movies_content["title"].isnull().sum()

0

# Remove movies that doesnt have genres

In [65]:
movies_content[movies_content["genres"] == '(no genres listed)']

Unnamed: 0,movieId,title,genres,year
8517,114335,La cravate,(no genres listed),1957
8684,122888,Ben-hur,(no genres listed),2016
8687,122896,Pirates of the Caribbean: Dead Men Tell No Tales,(no genres listed),2017
8782,129250,Superfast!,(no genres listed),2015
8836,132084,Let It Be Me,(no genres listed),1995
8902,134861,Trevor Noah: African American,(no genres listed),2013
9033,141131,Guardians,(no genres listed),2016
9053,141866,Green Room,(no genres listed),2015
9070,142456,The Brand New Testament,(no genres listed),2015
9091,143410,Hyena Road,(no genres listed),


In [66]:
movies_content = movies_content[movies_content["genres"] != '(no genres listed)']

# Reset index because some rows deleted

In [67]:
movies_content= movies_content.reset_index(drop=True)

In [68]:
movies_content

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji,Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II,Comedy,1995
...,...,...,...,...
9703,193581,Black Butler: Book of the Atlantic,Action|Animation|Comedy|Fantasy,2017
9704,193583,No Game No Life: Zero,Animation|Comedy|Fantasy,2017
9705,193585,Flint,Drama,2017
9706,193587,Bungo Stray Dogs: Dead Apple,Action|Animation,2018


# Initialize model and remove | from genres in order to process them

In [69]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-mpnet-base-v2')

In [70]:
movies_content['genres_list'] = movies_content['genres'].str.split('|')

In [71]:
movies_content.head()

Unnamed: 0,movieId,title,genres,year,genres_list
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995,"[Adventure, Animation, Children, Comedy, Fantasy]"
1,2,Jumanji,Adventure|Children|Fantasy,1995,"[Adventure, Children, Fantasy]"
2,3,Grumpier Old Men,Comedy|Romance,1995,"[Comedy, Romance]"
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995,"[Comedy, Drama, Romance]"
4,5,Father of the Bride Part II,Comedy,1995,[Comedy]


In [72]:
movies_content["title_list"] = movies_content['title'].apply(lambda x: [x]).tolist()

In [73]:
movies_content.head()

Unnamed: 0,movieId,title,genres,year,genres_list,title_list
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995,"[Adventure, Animation, Children, Comedy, Fantasy]",[Toy Story]
1,2,Jumanji,Adventure|Children|Fantasy,1995,"[Adventure, Children, Fantasy]",[Jumanji]
2,3,Grumpier Old Men,Comedy|Romance,1995,"[Comedy, Romance]",[Grumpier Old Men]
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995,"[Comedy, Drama, Romance]",[Waiting to Exhale]
4,5,Father of the Bride Part II,Comedy,1995,[Comedy],[Father of the Bride Part II]


In [74]:
movies_content['metadata'] = movies_content['title_list'] + movies_content['genres_list']

In [75]:
movies_content.head()

Unnamed: 0,movieId,title,genres,year,genres_list,title_list,metadata
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995,"[Adventure, Animation, Children, Comedy, Fantasy]",[Toy Story],"[Toy Story, Adventure, Animation, Children, Co..."
1,2,Jumanji,Adventure|Children|Fantasy,1995,"[Adventure, Children, Fantasy]",[Jumanji],"[Jumanji, Adventure, Children, Fantasy]"
2,3,Grumpier Old Men,Comedy|Romance,1995,"[Comedy, Romance]",[Grumpier Old Men],"[Grumpier Old Men, Comedy, Romance]"
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995,"[Comedy, Drama, Romance]",[Waiting to Exhale],"[Waiting to Exhale, Comedy, Drama, Romance]"
4,5,Father of the Bride Part II,Comedy,1995,[Comedy],[Father of the Bride Part II],"[Father of the Bride Part II, Comedy]"


# Sentence transformer expects a list of strings <br>
## These strings are the title of the movie and their genres

In [76]:
movies_content['metadata'] = movies_content['metadata'].apply(lambda x: ' '.join(x))

In [77]:
movies_content.head()

Unnamed: 0,movieId,title,genres,year,genres_list,title_list,metadata
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995,"[Adventure, Animation, Children, Comedy, Fantasy]",[Toy Story],Toy Story Adventure Animation Children Comedy ...
1,2,Jumanji,Adventure|Children|Fantasy,1995,"[Adventure, Children, Fantasy]",[Jumanji],Jumanji Adventure Children Fantasy
2,3,Grumpier Old Men,Comedy|Romance,1995,"[Comedy, Romance]",[Grumpier Old Men],Grumpier Old Men Comedy Romance
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995,"[Comedy, Drama, Romance]",[Waiting to Exhale],Waiting to Exhale Comedy Drama Romance
4,5,Father of the Bride Part II,Comedy,1995,[Comedy],[Father of the Bride Part II],Father of the Bride Part II Comedy


In [78]:
meta_data = np.array(movies_content['metadata'])
meta_data

array(['Toy Story Adventure Animation Children Comedy Fantasy',
       'Jumanji Adventure Children Fantasy',
       'Grumpier Old Men Comedy Romance', ..., 'Flint Drama',
       'Bungo Stray Dogs: Dead Apple Action Animation',
       'Andrew Dice Clay: Dice Rules Comedy'], dtype=object)

In [79]:
embeddings = model.encode(meta_data, show_progress_bar=True)

Batches:   0%|          | 0/304 [00:00<?, ?it/s]

In [80]:
embeddings.shape

(9708, 768)

In [81]:
from sklearn.metrics.pairwise import cosine_similarity

In [82]:
sim_matrix = cosine_similarity(embeddings)
sim_matrix

array([[1.0000001 , 0.5237799 , 0.23131318, ..., 0.14419204, 0.3691797 ,
        0.26906765],
       [0.5237799 , 0.99999994, 0.17015618, ..., 0.15566926, 0.24296936,
        0.12710135],
       [0.23131318, 0.17015618, 0.9999999 , ..., 0.14382234, 0.13827826,
        0.24506736],
       ...,
       [0.14419204, 0.15566926, 0.14382234, ..., 0.99999994, 0.09317985,
        0.23325431],
       [0.3691797 , 0.24296936, 0.13827826, ..., 0.09317985, 1.0000002 ,
        0.14965002],
       [0.26906765, 0.12710135, 0.24506736, ..., 0.23325431, 0.14965002,
        1.0000001 ]], dtype=float32)

# Get top 10 similar movies with the one that we prompt. <br>
## The display will be on the movies2 dataframe and not on the processed df.

In [83]:
movies2 = movies2[movies2["genres"] != '(no genres listed)']

In [84]:
movies2= movies2.reset_index(drop=True)

In [85]:
movies2

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9703,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9704,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9705,193585,Flint (2017),Drama
9706,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [86]:
top_movies_for_user

Unnamed: 0,movieId,title,genres,userId,rating
99259,140110,The Intern (2015),Comedy,10,5.0
77147,8869,First Daughter (2004),Comedy|Romance,10,5.0
94672,96079,Skyfall (2012),Action|Adventure|Thriller|IMAX,10,5.0
93816,91529,"Dark Knight Rises, The (2012)",Action|Adventure|Crime|IMAX,10,5.0
75192,7458,Troy (2004),Action|Adventure|Drama|War,10,5.0
92100,81845,"King's Speech, The (2010)",Drama,10,5.0
76341,8533,"Notebook, The (2004)",Drama|Romance,10,5.0
91318,79091,Despicable Me (2010),Animation|Children|Comedy|Crime,10,5.0
90172,71579,"Education, An (2009)",Drama|Romance,10,5.0
80014,33794,Batman Begins (2005),Action|Crime|IMAX,10,5.0


# User 10 liked Dispicable Me and we want to get top 10 movies similar to Dispicable Me that he will probably like too

In [87]:
def get_recommendations(movie_title, similarity_matrix, df):
    idx = df[df['title'] == movie_title].index[0]
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar movies

    movie_indices = [i[0] for i in sim_scores]
    return movies2.loc[movie_indices, ['movieId','title',"genres"]]


movie_title = 'Despicable Me'
recommendations = get_recommendations(movie_title, sim_matrix, movies_content)
recommendations

Unnamed: 0,movieId,title,genres
9508,172547,Despicable Me 3 (2017),Adventure|Animation|Children|Comedy
8200,103335,Despicable Me 2 (2013),Animation|Children|Comedy|IMAX
868,1148,Wallace & Gromit: The Wrong Trousers (1993),Animation|Children|Comedy|Crime
7180,72226,Fantastic Mr. Fox (2009),Adventure|Animation|Children|Comedy|Crime
6112,42734,Hoodwinked! (2005),Animation|Children|Comedy
1517,2048,"Great Mouse Detective, The (1986)",Action|Animation|Children|Crime
1081,1405,Beavis and Butt-Head Do America (1996),Adventure|Animation|Comedy|Crime
4859,7262,Catch That Kid (2004),Action|Adventure|Children|Comedy|Crime
6212,45517,Cars (2006),Animation|Children|Comedy
8918,135887,Minions (2015),Adventure|Animation|Children|Comedy
