# Install and Set Up Environment

In [None]:
# Downgrade NumPy to avoid compatibility issues with scikit-surprise
!pip install numpy==1.24.4

# Install the scikit-surprise library
!pip install scikit-surprise


Collecting numpy==1.24.4
  Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m97.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.24.4 which is incompatible.
xarray-einstats 0.9.1 requires numpy>=1.25, but you have numpy 1.24.4 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.4 which is incompatible.
pymc 5.23.0 requires 

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2469549 sha256=dedeb3f4bf05288445e88445d2f3cba6ca352e87f3a32b4785bbf19d1617ff28
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succes

In [None]:
# Install Kaggle API
!pip install -q kaggle

# Upload your kaggle.json (API token)
from google.colab import files
files.upload()  # Upload kaggle.json file here

# Move kaggle.json to the correct folder and set permissions
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json



Saving kaggle.json to kaggle.json


In [None]:
# Download Anime dataset
!kaggle datasets download -d cooperunion/anime-recommendations-database
!unzip -q anime-recommendations-database.zip

Dataset URL: https://www.kaggle.com/datasets/cooperunion/anime-recommendations-database
License(s): CC0-1.0
Downloading anime-recommendations-database.zip to /content
  0% 0.00/25.0M [00:00<?, ?B/s]
100% 25.0M/25.0M [00:00<00:00, 867MB/s]


# Load the Data

In [None]:
import pandas as pd

# Load the datasets
anime = pd.read_csv("/content/anime.csv")
ratings = pd.read_csv("/content/rating.csv")

# Display samples
print(anime.head())
print(ratings.head())


   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
   user_id  anime_id  rating
0        1        20      -1
1        1        24      -1
2        1        79      -1
3        1       226      -1
4

# Clean the Data

In [None]:
# Remove unrated entries (rating = -1)
ratings_cleaned = ratings[ratings['rating'] != -1]

# Drop rows with missing anime names
anime = anime.dropna(subset=['name'])

# Merge anime names into the ratings dataset using anime_id
ratings_merged = pd.merge(ratings_cleaned, anime[['anime_id', 'name']], on='anime_id')

# Display basic statistics
print(f"Number of users: {ratings_merged['user_id'].nunique()}")
print(f"Number of anime: {ratings_merged['name'].nunique()}")
print(ratings_merged.head())


Number of users: 69600
Number of anime: 9926
   user_id  anime_id  rating                    name
0        1      8074      10  Highschool of the Dead
1        1     11617      10         High School DxD
2        1     11757      10        Sword Art Online
3        1     15451      10     High School DxD New
4        2     11771      10        Kuroko no Basket


# Prepare the Data for the SVD Model

In [None]:
from surprise import Dataset, Reader

# Define the rating scale from 1 to 10
reader = Reader(rating_scale=(1, 10))

# Load the DataFrame into Surprise format
data = Dataset.load_from_df(ratings_merged[['user_id', 'name', 'rating']], reader)


# Split Data and Train the SVD Model

In [None]:
from surprise import SVD
from surprise.model_selection import train_test_split

# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Initialize the SVD model
model = SVD()

# Train the model
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7908ecb95dd0>

# Evaluate the Model

In [None]:
from surprise import accuracy

# Predict on the test set
predictions = model.test(testset)

# Print evaluation metrics
print("Model Evaluation:")
accuracy.rmse(predictions)
accuracy.mae(predictions)


Model Evaluation:
RMSE: 1.1320
MAE:  0.8443


0.8442800708070825

# Recommend Top Anime for a Specific User

In [None]:
# Choose a user ID
user_id = 5000

# Get all anime names the user has already rated
rated_anime = ratings_merged[ratings_merged['user_id'] == user_id]['name'].tolist()

# Get all anime names
all_anime = anime['name'].unique()

# Get list of anime not rated by the user
unrated_anime = [name for name in all_anime if name not in rated_anime]

# Predict ratings for a sample of unrated anime (for speed, use a subset)
recommendations = []
for name in unrated_anime[:1000]:  # limit to first 1000
    pred = model.predict(user_id, name)
    recommendations.append((name, pred.est))

# Sort by predicted rating
top_5 = sorted(recommendations, key=lambda x: x[1], reverse=True)[:5]

# Display top 5 recommendations
print(f"\nTop 5 anime recommendations for user {user_id}:")
for name, rating in top_5:
    print(f"{name} - Predicted Rating: {rating:.2f}")



Top 5 anime recommendations for user 5000:
Ginga Eiyuu Densetsu - Predicted Rating: 10.00
Evangelion: 2.0 You Can (Not) Advance - Predicted Rating: 10.00
Kimi no Na wa. - Predicted Rating: 9.98
Gintama&#039;: Enchousen - Predicted Rating: 9.91
Gintama&#039; - Predicted Rating: 9.90
