# Machine Learning Models

## Importing Libraries 📚

In [543]:
import pandas as pd  
import numpy as np
from tabulate import tabulate
import html

import warnings
warnings.filterwarnings('ignore')

## Loading Data 📕

In [544]:
# Loading Data
anime_df = pd.read_csv('anime.csv')
anime_df_id_genre = anime_df[['anime_id', 'genre']]

## Functions 📝

In [545]:
# Converts the anime id to the name of the anime
def anime_id_to_name(anime_id):
    anime_name = anime_df[anime_df['anime_id'] == anime_id]['name'].values[0]
    return anime_name

# Recommending anime based on the watched anime id
def recommend_new_anime(watched_anime_ids, model, recommendation_amount = 10):
    watched_anime = anime_df[anime_df['anime_id'].isin(watched_anime_ids)]
    watched_anime_features = watched_anime[['genre', 'type', 'episodes', 'members']]
    watched_anime_features[['episodes', 'members']] = scaler.transform(watched_anime_features[['episodes', 'members']])
    predicted_ratings = model.predict(watched_anime_features)
    recommended_anime_ids = anime_df[~anime_df['anime_id'].isin(watched_anime_ids)].sort_values(by='rating', ascending=False).head(recommendation_amount)['anime_id'].values
    return recommended_anime_ids


def get_anime_recommendation(list, model, recommendation_amount = 10):
    # Gets names of watched anime
    watched_anime_names = []
    for anime_id in watched_anime_ids:
        watched_anime_names.append(anime_id_to_name(anime_id))

    # Gets watched anime data
    watched_anime_data = []
    for anime_id in watched_anime_ids:
        anime_name = anime_id_to_name(anime_id)
        anime_rating = anime_df[anime_df['anime_id'] == anime_id]['rating'].values[0]
        anime_genre = anime_df_id_genre[anime_df_id_genre['anime_id'] == anime_id]['genre'].values[0]
        watched_anime_data.append([anime_id, anime_name, anime_rating, anime_genre])

    # Printing watched anime table
    print("Watched anime:")
    print(tabulate(watched_anime_data, headers=['Anime ID', 'Anime Name', 'Rating', 'Genere'], tablefmt='pipe'))

    # Getting recommended anime data
    recommended_anime_ids = recommend_new_anime(list, model, recommendation_amount)
    recommended_anime_data = []
    for anime_id in recommended_anime_ids:
        anime_name = anime_id_to_name(anime_id)
        anime_rating = anime_df[anime_df['anime_id'] == anime_id]['rating'].values[0]
        anime_genere = anime_df_id_genre[anime_df_id_genre['anime_id'] == anime_id]['genre'].values[0]
        recommended_anime_data.append([anime_id, anime_name, anime_rating, anime_genere])

    # Printing recommended anime table
    print("\nRecommended anime:")
    print(tabulate(recommended_anime_data, headers=['Anime ID', 'Anime Name', 'Rating', 'Genere'], tablefmt='pipe'))

## Preprocessing Data 🔬

### Decoding HTML Characters

In [546]:
# Copying names before converting
defective_list = anime_df.copy()

# After conversion
anime_df['name'] = anime_df['name'].apply(html.unescape) 

# Making a table to compare the names before and after conversion
unescape_data = []
for i in range(10):
    unescape_data.append([defective_list['name'][i], anime_df['name'][i]])
    
print(tabulate(unescape_data, headers=['Before', 'After'], tablefmt='orgtbl'))

| Before                                                    | After                                                     |
|-----------------------------------------------------------+-----------------------------------------------------------|
| Kimi no Na wa.                                            | Kimi no Na wa.                                            |
| Fullmetal Alchemist: Brotherhood                          | Fullmetal Alchemist: Brotherhood                          |
| Gintama°                                                  | Gintama°                                                  |
| Steins;Gate                                               | Steins;Gate                                               |
| Gintama&#039;                                             | Gintama'                                                  |
| Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou | Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou |
| Hunter x Hunter (2011)

### Filtering N/A, Unkown, and Empty Values

In [547]:
# Replacing "Unknown" with NaN
anime_df.replace("Unknown", np.nan, inplace=True)

# Filling NaN values with some other value, such as the mean
anime_df.fillna(anime_df.mean(), inplace=True)
anime_df = anime_df.dropna()
anime_df = anime_df[anime_df['type'].isin(['TV', 'Movie', 'OVA'])]
anime_df = anime_df[anime_df['members'] > 100]

## K-Nearest Neighbors (KNN) 🏃

In [548]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [549]:
# Scaling numerical variables
scaler = StandardScaler()
anime_df[['episodes', 'members']] = scaler.fit_transform(anime_df[['episodes', 'members']])

# Encoding categorical variables
le = LabelEncoder()
anime_df['genre'] = le.fit_transform(anime_df['genre'])
anime_df['type'] = le.fit_transform(anime_df['type'])

# Scaling numerical variables
scaler = StandardScaler()
anime_df[['episodes', 'members']] = scaler.fit_transform(anime_df[['episodes', 'members']])

# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(anime_df[['genre', 'type', 'episodes', 'members']], anime_df['rating'], test_size=0.2, random_state=42)

### Training Model 💪

In [550]:
# Training the KNN model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)

### Testing Model 🧪

#### Random Selection

In [551]:
watched_anime_ids = [15335, 15417, 4181, 28851, 918, 2904, 28891, 199, 23273, 24701, 12355, 1575, 263]
get_anime_recommendation(watched_anime_ids, knn_model)

Watched anime:
|   Anime ID | Anime Name                                          |   Rating | Genere                                                                       |
|-----------:|:----------------------------------------------------|---------:|:-----------------------------------------------------------------------------|
|      15335 | Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare |     9.1  | Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen                 |
|      15417 | Gintama': Enchousen                                 |     9.11 | Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen                 |
|       4181 | Clannad: After Story                                |     9.06 | Drama, Fantasy, Romance, Slice of Life, Supernatural                         |
|      28851 | Koe no Katachi                                      |     9.05 | Drama, School, Shounen                                                       |
|        918 | Gintama         

#### Action Anime Selection

In [552]:
watched_anime_ids = [20, 4155, 13601, 18115, 205, 6594, 136, 30503, 467, 486]
get_anime_recommendation(watched_anime_ids, knn_model)

Watched anime:
|   Anime ID | Anime Name                              |   Rating | Genere                                                  |
|-----------:|:----------------------------------------|---------:|:--------------------------------------------------------|
|         20 | Naruto                                  |     7.81 | Action, Comedy, Martial Arts, Shounen, Super Power      |
|       4155 | One Piece Film: Strong World            |     8.42 | Action, Adventure, Comedy, Drama, Fantasy, Shounen      |
|      13601 | Psycho-Pass                             |     8.5  | Action, Police, Psychological, Sci-Fi                   |
|      18115 | Magi: The Kingdom of Magic              |     8.5  | Action, Adventure, Fantasy, Magic, Shounen              |
|        205 | Samurai Champloo                        |     8.5  | Action, Adventure, Comedy, Historical, Samurai, Shounen |
|       6594 | Katanagatari                            |     8.49 | Action, Adventure, Historical, Mart

## Decision Tree 🌳

In [553]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

### Training Model 💪

In [554]:
# Scaling numerical variables
scaler = StandardScaler()
anime_df[['episodes', 'members']] = scaler.fit_transform(anime_df[['episodes', 'members']])
# Encoding categorical variables
le = LabelEncoder()
anime_df['genre'] = le.fit_transform(anime_df['genre'])
anime_df['type'] = le.fit_transform(anime_df['type'])
    
# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(anime_df[['genre', 'type', 'episodes', 'members']], anime_df['rating'], test_size=0.5, random_state=48)
 
# Training the Decision Tree model
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)

### Testing Model 🧪

#### Random Selection

In [561]:
watched_anime_ids = [12355, 1575, 263, 28851, 918, 2904, 28891, 199, 23273, 24701]
get_anime_recommendation(watched_anime_ids, dt_model)

Watched anime:
|   Anime ID | Anime Name                         |   Rating | Genere                                                                       |
|-----------:|:-----------------------------------|---------:|:-----------------------------------------------------------------------------|
|      12355 | Ookami Kodomo no Ame to Yuki       |     8.84 | Fantasy, Slice of Life                                                       |
|       1575 | Code Geass: Hangyaku no Lelouch    |     8.83 | Action, Mecha, Military, School, Sci-Fi, Super Power                         |
|        263 | Hajime no Ippo                     |     8.83 | Comedy, Drama, Shounen, Sports                                               |
|      28851 | Koe no Katachi                     |     9.05 | Drama, School, Shounen                                                       |
|        918 | Gintama                            |     9.04 | Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen          

#### Adventure Anime Selection

In [556]:
watched_anime_ids = [47, 589, 17265, 535, 9963, 32551]
get_anime_recommendation(watched_anime_ids, dt_model)

Watched anime:
|   Anime ID | Anime Name                                   |   Rating | Genere                                                    |
|-----------:|:---------------------------------------------|---------:|:----------------------------------------------------------|
|         47 | Akira                                        |     8.15 | Action, Adventure, Horror, Military, Sci-Fi, Supernatural |
|        589 | Ginga Nagareboshi Gin                        |     8.14 | Action, Adventure, Drama, Shounen                         |
|      17265 | Log Horizon                                  |     8.14 | Action, Adventure, Fantasy, Game, Magic, Shounen          |
|        535 | Slayers Next                                 |     8.14 | Adventure, Comedy, Demons, Fantasy, Magic                 |
|       9963 | Detective Conan Movie 15: Quarter of Silence |     8.13 | Adventure, Comedy, Mystery, Police, Shounen               |
|      32551 | Digimon Adventure tri. 3: Kokuhaku     

## Support Vector Machine (SVM) 🤖

In [557]:
from sklearn.svm import SVR
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

### Training Model 💪

In [558]:
# Scaling numerical variables
scaler = StandardScaler()
anime_df[['episodes', 'members']] = scaler.fit_transform(anime_df[['episodes', 'members']])

# Encoding categorical variables
le = LabelEncoder()
anime_df['genre'] = le.fit_transform(anime_df['genre'])
anime_df['type'] = le.fit_transform(anime_df['type'])
    
# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(anime_df[['genre', 'type', 'episodes', 'members']], anime_df['rating'], test_size=0.2, random_state=58)
 
# Training the Support Vector Machine model
svm_model = SVR()
svm_model.fit(X_train, y_train)

### Testing Model 🧪

#### Random Selection

In [559]:
watched_anime_ids = [15335, 15417, 4181, 28851, 918, 2904, 28891, 199, 23273, 24701, 12355, 1575, 263]
get_anime_recommendation(watched_anime_ids, svm_model)

Watched anime:
|   Anime ID | Anime Name                                          |   Rating | Genere                                                                       |
|-----------:|:----------------------------------------------------|---------:|:-----------------------------------------------------------------------------|
|      15335 | Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare |     9.1  | Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen                 |
|      15417 | Gintama': Enchousen                                 |     9.11 | Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen                 |
|       4181 | Clannad: After Story                                |     9.06 | Drama, Fantasy, Romance, Slice of Life, Supernatural                         |
|      28851 | Koe no Katachi                                      |     9.05 | Drama, School, Shounen                                                       |
|        918 | Gintama         

#### Supernatural Anime Selection

In [560]:
watched_anime_ids = [21855, 2154, 861, 12113, 122, 1594, 3588]
get_anime_recommendation(watched_anime_ids, svm_model)

Watched anime:
|   Anime ID | Anime Name                                     |   Rating | Genere                                                                            |
|-----------:|:-----------------------------------------------|---------:|:----------------------------------------------------------------------------------|
|      21855 | Hanamonogatari                                 |     8.11 | Comedy, Mystery, Supernatural                                                     |
|       2154 | Tekkon Kinkreet                                |     8.11 | Action, Adventure, Psychological, Supernatural                                    |
|        861 | xxxHOLiC                                       |     8.11 | Comedy, Drama, Mystery, Psychological, Supernatural                               |
|      12113 | Berserk: Ougon Jidai-hen II - Doldrey Kouryaku |     8.09 | Action, Adventure, Demons, Drama, Fantasy, Horror, Military, Seinen, Supernatural |
|        122 | Full Moon wo Sag