<a href="https://colab.research.google.com/github/TeeshynaJ/-anime_forecasting/blob/main/anime_clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#first we import libraries to use
import pandas as pd
from google.colab import auth
import pandas_gbq
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import random as random
from scipy import stats

# Will collect credentials
auth.authenticate_user()

In [None]:
query = "SELECT * FROM `anime-forecasting.main_table_joined_not_exploaded.left_join_mega_table_exclude_eps_not_exploaded`"

In [None]:
table = pandas_gbq.read_gbq(query, project_id="anime-forecasting")

Downloading: 100%|[32m██████████[0m|


#Machine Learning - Clustering

In [None]:
#scalers
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler
#encoders
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [None]:
#neighbors
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.impute import KNNImputer
from sklearn.metrics import classification_report
from sklearn.cluster import KMeans

In [None]:
#validating
from sklearn.model_selection import cross_validate, cross_val_score, cross_val_predict

In [None]:
table.drop_duplicates(subset='idMal',inplace=True)

In [None]:
table.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2521 entries, 0 to 2537
Data columns (total 27 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   idMal                      2521 non-null   Int64  
 1   title                      2521 non-null   object 
 2   format                     2521 non-null   object 
 3   episodes                   2521 non-null   Int64  
 4   duration                   2521 non-null   Int64  
 5   status                     2521 non-null   object 
 6   startDay                   2521 non-null   Int64  
 7   startMonth                 2521 non-null   Int64  
 8   startYear                  2521 non-null   Int64  
 9   meanScore                  2521 non-null   float64
 10  favourites                 2521 non-null   Int64  
 11  genres                     2521 non-null   object 
 12  scoreDistribution_score    2521 non-null   Int64  
 13  statusDistribution_score   2521 non-null   Int64  
 1

In [None]:
table['idMal'].nunique()

2521

In [None]:
table.columns

Index(['idMal', 'title', 'format', 'episodes', 'duration', 'status',
       'startDay', 'startMonth', 'startYear', 'meanScore', 'favourites',
       'genres', 'scoreDistribution_score', 'statusDistribution_score',
       'statusDistribution_status', 'rankings_context', 'tags', 'start_date',
       'external_sites', 'Score', 'Premiered', 'Producers', 'Licensors',
       'Studios', 'Popularity', 'Favorites', 'Scored_by'],
      dtype='object')

In [None]:
table = table.drop(['format','startDay', 'startMonth', 'startYear','scoreDistribution_score', 'statusDistribution_score',
       'statusDistribution_status', 'rankings_context','start_date',
       'external_sites', 'Score', 'Premiered', 'Producers', 'Licensors',
       'Studios','Scored_by'], axis=1)

In [None]:
table = table.drop(['Popularity', 'Favorites'], axis=1)

In [None]:
table.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2521 entries, 0 to 2537
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   idMal       2521 non-null   Int64  
 1   title       2521 non-null   object 
 2   episodes    2521 non-null   Int64  
 3   duration    2521 non-null   Int64  
 4   status      2521 non-null   object 
 5   meanScore   2521 non-null   float64
 6   favourites  2521 non-null   Int64  
 7   genres      2521 non-null   object 
 8   tags        2521 non-null   object 
dtypes: Int64(4), float64(1), object(4)
memory usage: 206.8+ KB


In [None]:
table['genres'] = table['genres'].apply(lambda x: x.strip('][').split(', '))

In [None]:
table.explode('genres')

Unnamed: 0,idMal,title,episodes,duration,status,meanScore,favourites,genres,tags
0,55848,Suicide Squad ISEKAI,10,24,FINISHED,63.0,841,'Action',"['Isekai', 'Anti-Hero', 'Crime', 'Crossover', ..."
0,55848,Suicide Squad ISEKAI,10,24,FINISHED,63.0,841,'Adventure',"['Isekai', 'Anti-Hero', 'Crime', 'Crossover', ..."
0,55848,Suicide Squad ISEKAI,10,24,FINISHED,63.0,841,'Fantasy',"['Isekai', 'Anti-Hero', 'Crime', 'Crossover', ..."
0,55848,Suicide Squad ISEKAI,10,24,FINISHED,63.0,841,'Sci-Fi',"['Isekai', 'Anti-Hero', 'Crime', 'Crossover', ..."
1,35333,Hitori no Shita - The Outcast 2,24,24,FINISHED,71.0,218,'Action',"['Super Power', 'Urban Fantasy', 'Martial Arts..."
...,...,...,...,...,...,...,...,...,...
2536,960,Legend of the Condor Hero II,26,20,FINISHED,68.0,6,'Adventure',['Historical']
2536,960,Legend of the Condor Hero II,26,20,FINISHED,68.0,6,'Drama',['Historical']
2536,960,Legend of the Condor Hero II,26,20,FINISHED,68.0,6,'Romance',['Historical']
2537,50185,Salaryman's Club,12,23,FINISHED,72.0,338,'Drama',"['Badminton', 'Work', 'Primarily Male Cast', '..."


In [None]:
from collections import Counter
genres_table = (table
 .set_index('idMal')
 .genres
 .map(Counter)
 .apply(pd.Series)
 .fillna(0, downcast='infer')
 .reset_index()
)

  genres_table = (table


In [None]:
table['tags'] = table['tags'].apply(lambda x: x.strip('][').split(', '))

In [None]:
tags_table = (table
 .set_index('idMal')
 .tags
 .map(Counter)
 .apply(pd.Series)
 .fillna(0, downcast='infer')
 .reset_index()
)

  tags_table = (table


In [None]:
genres_table

Unnamed: 0,idMal,'Action','Adventure','Fantasy','Sci-Fi','Supernatural','Drama','Mystery','Comedy','Sports','Horror','Ecchi','Romance','Psychological','Music','Mahou Shoujo','Slice of Life','Mecha','Thriller',Unnamed: 20
0,55848,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,35333,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,54492,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
3,43989,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,44863,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2516,257,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2517,1142,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0
2518,16353,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0
2519,960,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0


In [None]:
tags_table

Unnamed: 0,idMal,'Isekai','Anti-Hero','Crime','Crossover','Female Protagonist','Super Power','War','Magic','Primarily Adult Cast',...,'Irrumatio','Virginity','Jazz Music','Nakadashi','Anal Sex','Defloration','Flat Chest','Blackmail','Asphyxiation','Sex Toys'
0,55848,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,35333,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,54492,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,43989,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,44863,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2516,257,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2517,1142,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2518,16353,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2519,960,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
tags_table.value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0,Unnamed: 22_level_0,Unnamed: 23_level_0,Unnamed: 24_level_0,Unnamed: 25_level_0,Unnamed: 26_level_0,Unnamed: 27_level_0,Unnamed: 28_level_0,Unnamed: 29_level_0,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,Unnamed: 34_level_0,Unnamed: 35_level_0,Unnamed: 36_level_0,Unnamed: 37_level_0,Unnamed: 38_level_0,Unnamed: 39_level_0,Unnamed: 40_level_0,Unnamed: 41_level_0,Unnamed: 42_level_0,Unnamed: 43_level_0,Unnamed: 44_level_0,Unnamed: 45_level_0,Unnamed: 46_level_0,Unnamed: 47_level_0,Unnamed: 48_level_0,Unnamed: 49_level_0,Unnamed: 50_level_0,Unnamed: 51_level_0,Unnamed: 52_level_0,Unnamed: 53_level_0,Unnamed: 54_level_0,Unnamed: 55_level_0,Unnamed: 56_level_0,Unnamed: 57_level_0,Unnamed: 58_level_0,Unnamed: 59_level_0,Unnamed: 60_level_0,Unnamed: 61_level_0,Unnamed: 62_level_0,Unnamed: 63_level_0,Unnamed: 64_level_0,Unnamed: 65_level_0,Unnamed: 66_level_0,Unnamed: 67_level_0,Unnamed: 68_level_0,Unnamed: 69_level_0,Unnamed: 70_level_0,Unnamed: 71_level_0,Unnamed: 72_level_0,Unnamed: 73_level_0,Unnamed: 74_level_0,Unnamed: 75_level_0,Unnamed: 76_level_0,Unnamed: 77_level_0,Unnamed: 78_level_0,Unnamed: 79_level_0,Unnamed: 80_level_0,Unnamed: 81_level_0,Unnamed: 82_level_0,Unnamed: 83_level_0,Unnamed: 84_level_0,Unnamed: 85_level_0,Unnamed: 86_level_0,Unnamed: 87_level_0,Unnamed: 88_level_0,Unnamed: 89_level_0,Unnamed: 90_level_0,Unnamed: 91_level_0,Unnamed: 92_level_0,Unnamed: 93_level_0,Unnamed: 94_level_0,Unnamed: 95_level_0,Unnamed: 96_level_0,Unnamed: 97_level_0,Unnamed: 98_level_0,Unnamed: 99_level_0,Unnamed: 100_level_0,Unnamed: 101_level_0,Unnamed: 102_level_0,Unnamed: 103_level_0,Unnamed: 104_level_0,Unnamed: 105_level_0,Unnamed: 106_level_0,Unnamed: 107_level_0,Unnamed: 108_level_0,Unnamed: 109_level_0,Unnamed: 110_level_0,Unnamed: 111_level_0,Unnamed: 112_level_0,Unnamed: 113_level_0,Unnamed: 114_level_0,Unnamed: 115_level_0,Unnamed: 116_level_0,Unnamed: 117_level_0,Unnamed: 118_level_0,Unnamed: 119_level_0,Unnamed: 120_level_0,Unnamed: 121_level_0,Unnamed: 122_level_0,Unnamed: 123_level_0,Unnamed: 124_level_0,Unnamed: 125_level_0,Unnamed: 126_level_0,Unnamed: 127_level_0,Unnamed: 128_level_0,Unnamed: 129_level_0,Unnamed: 130_level_0,Unnamed: 131_level_0,Unnamed: 132_level_0,Unnamed: 133_level_0,Unnamed: 134_level_0,Unnamed: 135_level_0,Unnamed: 136_level_0,Unnamed: 137_level_0,Unnamed: 138_level_0,Unnamed: 139_level_0,Unnamed: 140_level_0,Unnamed: 141_level_0,Unnamed: 142_level_0,Unnamed: 143_level_0,Unnamed: 144_level_0,Unnamed: 145_level_0,Unnamed: 146_level_0,Unnamed: 147_level_0,Unnamed: 148_level_0,Unnamed: 149_level_0,Unnamed: 150_level_0,Unnamed: 151_level_0,Unnamed: 152_level_0,Unnamed: 153_level_0,Unnamed: 154_level_0,Unnamed: 155_level_0,Unnamed: 156_level_0,Unnamed: 157_level_0,Unnamed: 158_level_0,Unnamed: 159_level_0,Unnamed: 160_level_0,Unnamed: 161_level_0,Unnamed: 162_level_0,Unnamed: 163_level_0,Unnamed: 164_level_0,Unnamed: 165_level_0,Unnamed: 166_level_0,Unnamed: 167_level_0,Unnamed: 168_level_0,Unnamed: 169_level_0,Unnamed: 170_level_0,Unnamed: 171_level_0,Unnamed: 172_level_0,Unnamed: 173_level_0,Unnamed: 174_level_0,Unnamed: 175_level_0,Unnamed: 176_level_0,Unnamed: 177_level_0,Unnamed: 178_level_0,Unnamed: 179_level_0,Unnamed: 180_level_0,Unnamed: 181_level_0,Unnamed: 182_level_0,Unnamed: 183_level_0,Unnamed: 184_level_0,Unnamed: 185_level_0,Unnamed: 186_level_0,Unnamed: 187_level_0,Unnamed: 188_level_0,Unnamed: 189_level_0,Unnamed: 190_level_0,Unnamed: 191_level_0,Unnamed: 192_level_0,Unnamed: 193_level_0,Unnamed: 194_level_0,Unnamed: 195_level_0,Unnamed: 196_level_0,Unnamed: 197_level_0,Unnamed: 198_level_0,Unnamed: 199_level_0,Unnamed: 200_level_0,Unnamed: 201_level_0,Unnamed: 202_level_0,Unnamed: 203_level_0,Unnamed: 204_level_0,Unnamed: 205_level_0,Unnamed: 206_level_0,Unnamed: 207_level_0,Unnamed: 208_level_0,Unnamed: 209_level_0,Unnamed: 210_level_0,Unnamed: 211_level_0,Unnamed: 212_level_0,Unnamed: 213_level_0,Unnamed: 214_level_0,Unnamed: 215_level_0,Unnamed: 216_level_0,Unnamed: 217_level_0,Unnamed: 218_level_0,Unnamed: 219_level_0,Unnamed: 220_level_0,Unnamed: 221_level_0,Unnamed: 222_level_0,Unnamed: 223_level_0,Unnamed: 224_level_0,Unnamed: 225_level_0,Unnamed: 226_level_0,Unnamed: 227_level_0,Unnamed: 228_level_0,Unnamed: 229_level_0,Unnamed: 230_level_0,Unnamed: 231_level_0,Unnamed: 232_level_0,Unnamed: 233_level_0,Unnamed: 234_level_0,Unnamed: 235_level_0,Unnamed: 236_level_0,Unnamed: 237_level_0,Unnamed: 238_level_0,Unnamed: 239_level_0,Unnamed: 240_level_0,Unnamed: 241_level_0,Unnamed: 242_level_0,Unnamed: 243_level_0,Unnamed: 244_level_0,Unnamed: 245_level_0,Unnamed: 246_level_0,Unnamed: 247_level_0,Unnamed: 248_level_0,Unnamed: 249_level_0,Unnamed: 250_level_0,Unnamed: 251_level_0,Unnamed: 252_level_0,Unnamed: 253_level_0,Unnamed: 254_level_0,Unnamed: 255_level_0,Unnamed: 256_level_0,Unnamed: 257_level_0,Unnamed: 258_level_0,Unnamed: 259_level_0,Unnamed: 260_level_0,Unnamed: 261_level_0,Unnamed: 262_level_0,Unnamed: 263_level_0,Unnamed: 264_level_0,Unnamed: 265_level_0,Unnamed: 266_level_0,Unnamed: 267_level_0,Unnamed: 268_level_0,Unnamed: 269_level_0,Unnamed: 270_level_0,Unnamed: 271_level_0,Unnamed: 272_level_0,Unnamed: 273_level_0,Unnamed: 274_level_0,Unnamed: 275_level_0,Unnamed: 276_level_0,Unnamed: 277_level_0,Unnamed: 278_level_0,Unnamed: 279_level_0,Unnamed: 280_level_0,Unnamed: 281_level_0,Unnamed: 282_level_0,Unnamed: 283_level_0,Unnamed: 284_level_0,Unnamed: 285_level_0,Unnamed: 286_level_0,Unnamed: 287_level_0,Unnamed: 288_level_0,Unnamed: 289_level_0,Unnamed: 290_level_0,Unnamed: 291_level_0,Unnamed: 292_level_0,Unnamed: 293_level_0,Unnamed: 294_level_0,Unnamed: 295_level_0,Unnamed: 296_level_0,Unnamed: 297_level_0,Unnamed: 298_level_0,Unnamed: 299_level_0,Unnamed: 300_level_0,Unnamed: 301_level_0,Unnamed: 302_level_0,Unnamed: 303_level_0,Unnamed: 304_level_0,Unnamed: 305_level_0,Unnamed: 306_level_0,Unnamed: 307_level_0,Unnamed: 308_level_0,Unnamed: 309_level_0,Unnamed: 310_level_0,Unnamed: 311_level_0,Unnamed: 312_level_0,Unnamed: 313_level_0,Unnamed: 314_level_0,Unnamed: 315_level_0,Unnamed: 316_level_0,Unnamed: 317_level_0,Unnamed: 318_level_0,Unnamed: 319_level_0,Unnamed: 320_level_0,Unnamed: 321_level_0,Unnamed: 322_level_0,Unnamed: 323_level_0,Unnamed: 324_level_0,Unnamed: 325_level_0,Unnamed: 326_level_0,Unnamed: 327_level_0,Unnamed: 328_level_0,Unnamed: 329_level_0,Unnamed: 330_level_0,Unnamed: 331_level_0,Unnamed: 332_level_0,Unnamed: 333_level_0,Unnamed: 334_level_0,Unnamed: 335_level_0,Unnamed: 336_level_0,Unnamed: 337_level_0,Unnamed: 338_level_0,Unnamed: 339_level_0,Unnamed: 340_level_0,Unnamed: 341_level_0,Unnamed: 342_level_0,Unnamed: 343_level_0,Unnamed: 344_level_0,Unnamed: 345_level_0,Unnamed: 346_level_0,Unnamed: 347_level_0,Unnamed: 348_level_0,Unnamed: 349_level_0,Unnamed: 350_level_0,Unnamed: 351_level_0,Unnamed: 352_level_0,Unnamed: 353_level_0,Unnamed: 354_level_0,Unnamed: 355_level_0,Unnamed: 356_level_0,Unnamed: 357_level_0,Unnamed: 358_level_0,Unnamed: 359_level_0,Unnamed: 360_level_0,Unnamed: 361_level_0,Unnamed: 362_level_0,Unnamed: 363_level_0,Unnamed: 364_level_0,Unnamed: 365_level_0,count
idMal,'Isekai','Anti-Hero','Crime','Crossover','Female Protagonist','Super Power','War','Magic','Primarily Adult Cast','Conspiracy','Guns','Prison','Language Barrier','Shapeshifting','Meta','Swordplay','Primarily Male Cast','Henshin','Dragons','CGI','Necromancy','Skeleton','Elf','Heterosexual','Nudity','Urban Fantasy','Martial Arts','Male Protagonist','Wuxia','Medicine','Historical','Detective','Foreign','Politics','Ancient China','Drugs','Work','Royal Affairs','Adoption','Unrequited Love','Alchemy','Maids','Tragedy','Food','Slavery','Chibi','Asexual','Episodic','Go',Unnamed: 50_level_1,'Cars','Classic Literature','Motorcycles','Kids','Time Manipulation','Travel','Ships','Ghost','Primarily Child Cast','School','Youkai','Afterlife','Parody','Demons','Animals','Ensemble Cast','Tanned Skin','Found Family','Military','Robots','Aliens','Teacher','Seinen','Time Skip','Space','Environmental','Slapstick','Rural','Outdoor','Super Robot','Proxy Battle','Denpa','Dissociative Identities','Cosmic Horror','Rehabilitation','Cult','Mythology','Bullying','Philosophy','Love Triangle','Kuudere','Card Battle','Idol','Cute Girls Doing Cute Things','Amnesia','Gore','Survival','Body Horror','Dancing','Vampire','Coming of Age','Incest','Feet','Snowscape','Primarily Female Cast','Achronological Order','Gods','Primarily Teen Cast','Post-Apocalyptic','School Club','Memory Manipulation','Religion','Drawing','Photography','Filmmaking','Archery','Cute Boys Doing Cute Things','Tsundere','Twins','Anachronism','Bisexual','Video Games','Full CGI','Urban','Mopeds','Disability','Dungeon','Kemonomimi','Zombie','Veterinarian','Reincarnation','Anthology','Anthropomorphism','Shounen','Cultivation','Superhero','Gender Bending','Revenge','Cyborg','Dinosaurs','Body Swapping','Battle Royale','Police','Wrestling','Surreal Comedy','Dystopian','Real Robot','Shoujo','Tomboy','Tokusatsu','LGBTQ+ Themes','Age Gap','Yuri','Alternate Universe','Shrine Maiden','Makeup','Gambling','Volleyball','Female Harem','College','Death Game','Fairy Tale','Terrorism','MILF','Boarding School','Arranged Marriage','Band','Hip-hop Music','Bar','Rock Music','Fugitive','Monster Boy','Estranged Family','Kingdom Management','Primarily Animal Cast','Kaiju','Artificial Intelligence','Delinquents','Cyberpunk','Monster Girl','Yandere','Rape','Family Life','Coastal','Iyashikei','Mafia','Trains','Noir','Vore','Samurai','Cannibalism','Mermaid','Watersports','Bondage','Masochism','Tentacles','Gyaru','Chuunibyou','Musical Theater','Goblin','Rugby','Educational','Orphan','Ninja','Transgender','Clone','Lost Civilization','Pandemic','Angels','Aviation','Classical Music','Ice Skating','Athletics','Butler','Psychosexual','Board Game','Villainess','Josei','Ojou-sama','Shogi','Parkour','Achromatic','Homeless','Espionage','Suicide','Nun','Stop Motion','Otaku Culture','Astronomy',"""Boys' Love""",'Virtual World','Crossdressing','Restaurant','Torture','Time Loop','Witch','Assassins','Large Breasts','Fitness','Agriculture','Fairy','Nekomimi','Fake Relationship','Parenthood','Tennis','Football','Boxing','Femboy','Natural Disaster','Space Opera','Desert','Steampunk','Fashion','Pirates','Male Harem','Class Struggle','Matriarchy','Cheerleading','Age Regression','Werewolf','Satire','Baseball','Spearplay','Sadism','Poker','Lacrosse','Acting','Polyamorous','Chimera','Economics','Gangs','Succubus','Fishing','Aromantic','Writing','Calligraphy','Agender','Dullahan','Basketball','Mixed Gender Harem','Hikikomori','Cosplay','Yakuza','Circus','Inn','Advertisement','POV','Badminton','Marriage','Criminal Organization','Swimming','Human Pet','Tanks','Femdom','Rotoscoping','Rakugo','Masturbation','Office Lady','Office','Autobiographical','Konbini','E-Sports','Exhibitionism','Handjob','Threesome','Judo','Biographical','Triads','Software Development','Mahjong','Oiran','Vikings','Skateboarding','Cowboys','Acrobatics','Group Sex','Airsoft','Inseki','Augmented Reality','Karuta','Puppetry','Netorare','Mountaineering','Ahegao','Horticulture','Hypersexuality','Bowling','Firefighters','VTuber','Ero Guro','Cycling','Centaur','Sumo','Fencing','Surfing','Golf','Table Tennis','American Football','Pregnant','Boobjob','Fellatio','Squirting','Metal Music','Elderly Protagonist','Scuba Diving','Prostitution','Scat','Omegaverse','Eco-Horror','Cunnilingus','Handball','Lactation','Amputation','Facial','Futanari','Irrumatio','Virginity','Jazz Music','Nakadashi','Anal Sex','Defloration','Flat Chest','Blackmail','Asphyxiation','Sex Toys',Unnamed: 366_level_1
1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
37029,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
36906,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
36934,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
36949,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6512,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
6547,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
6573,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
6574,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [None]:
tags_remove = pd.DataFrame(tags_table.sum(axis=0))

In [None]:
tags_remove

Unnamed: 0,0
idMal,60398653
'Isekai',263
'Anti-Hero',216
'Crime',165
'Crossover',16
...,...
'Defloration',1
'Flat Chest',1
'Blackmail',1
'Asphyxiation',1


In [None]:
col_remove =list(tags_remove[tags_remove[0] < 6].index)

In [None]:
tags_table=tags_table.drop(columns=col_remove)

In [None]:
tags_table

Unnamed: 0,idMal,'Isekai','Anti-Hero','Crime','Crossover','Female Protagonist','Super Power','War','Magic','Primarily Adult Cast',...,'Airsoft','Inseki','Augmented Reality','Karuta','Netorare','VTuber','Cycling','Centaur','American Football','Elderly Protagonist'
0,55848,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,35333,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,54492,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,43989,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,44863,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2516,257,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2517,1142,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2518,16353,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2519,960,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
joined_table = pd.concat([table,genres_table,tags_table],axis=1)

In [None]:
joined_table = joined_table.drop(['genres','tags'], axis=1)

In [None]:
joined_table

Unnamed: 0,idMal,title,episodes,duration,status,meanScore,favourites,idMal.1,'Action','Adventure',...,'Airsoft','Inseki','Augmented Reality','Karuta','Netorare','VTuber','Cycling','Centaur','American Football','Elderly Protagonist'
0,55848,Suicide Squad ISEKAI,10,24,FINISHED,63.0,841,55848,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,35333,Hitori no Shita - The Outcast 2,24,24,FINISHED,71.0,218,35333,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,54492,The Apothecary Diaries,24,23,FINISHED,88.0,10978,54492,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,43989,Pleasant Goat and Big Big Wolf: Paddi the Amaz...,52,25,FINISHED,35.0,0,43989,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,44863,Magic Wonderland,26,22,FINISHED,51.0,0,44863,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1685,,,,,,,,50346,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2154,,,,,,,,38853,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2155,,,,,,,,39535,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2468,,,,,,,,48555,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
model_table = joined_table.drop(['idMal','duration','title','status','favourites'], axis=1)

In [None]:
new_table = table.groupby('idMal').count()

In [None]:
new_table[new_table['title']>1]

Unnamed: 0_level_0,title,episodes,duration,status,meanScore,favourites,genres,tags
idMal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [None]:
model_table.isnull().sum()

Unnamed: 0,0
episodes,17
meanScore,17
'Action',17
'Adventure',17
'Fantasy',17
...,...
'VTuber',17
'Cycling',17
'Centaur',17
'American Football',17


In [None]:
joined_table.isnull().sum()

Unnamed: 0,0
idMal,17
title,17
episodes,17
duration,17
status,17
...,...
'VTuber',17
'Cycling',17
'Centaur',17
'American Football',17


In [None]:
new_table.isnull().sum()

Unnamed: 0,0
title,0
episodes,0
duration,0
status,0
meanScore,0
favourites,0
genres,0
tags,0


In [None]:
model_table.dropna(inplace=True)

In [None]:
joined_table.dropna(inplace=True)

In [None]:
table.reset_index(inplace=True)

In [None]:
table[table['idMal']== 2521]

Unnamed: 0,index,idMal,title,episodes,duration,status,meanScore,favourites,genres,tags
902,908,2521,Great Dangaioh,13,23,FINISHED,52.0,9,"['Action', 'Mecha', 'Sci-Fi']","['Robots', 'Male Protagonist', 'Female Protago..."


#Predicting

In [None]:
model_table

Unnamed: 0,episodes,meanScore,'Action','Adventure','Fantasy','Sci-Fi','Supernatural','Drama','Mystery','Comedy',...,'Airsoft','Inseki','Augmented Reality','Karuta','Netorare','VTuber','Cycling','Centaur','American Football','Elderly Protagonist'
0,10,63.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,24,71.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,24,88.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,52,35.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,26,51.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2516,12,68.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2517,22,56.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2518,22,61.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2519,26,62.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
joined_table

Unnamed: 0,idMal,title,episodes,duration,status,meanScore,favourites,idMal.1,'Action','Adventure',...,'Airsoft','Inseki','Augmented Reality','Karuta','Netorare','VTuber','Cycling','Centaur','American Football','Elderly Protagonist'
0,55848,Suicide Squad ISEKAI,10,24,FINISHED,63.0,841,55848,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,35333,Hitori no Shita - The Outcast 2,24,24,FINISHED,71.0,218,35333,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,54492,The Apothecary Diaries,24,23,FINISHED,88.0,10978,54492,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,43989,Pleasant Goat and Big Big Wolf: Paddi the Amaz...,52,25,FINISHED,35.0,0,43989,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,44863,Magic Wonderland,26,22,FINISHED,51.0,0,44863,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2516,56838,"Studio Apartment, Good Lighting, Angel Included",12,24,FINISHED,68.0,497,257,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2517,1537,Ryu the Primitive Boy,22,25,FINISHED,56.0,8,1142,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2518,2703,Super YoYo,22,23,FINISHED,61.0,11,16353,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2519,2501,Papuwa,26,25,FINISHED,62.0,5,960,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Define X and y
X = model_table #the genres and tags
y = model_table['meanScore'] #"meanScore", "Popularity", "favourites" -- possible y columns?

# Scale the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

knn_model = KNeighborsRegressor().fit(X_scaled,y) # Instanciate and train model

In [None]:
knn_stored = {}
knn_list = []

In [None]:
def find_nearest(idMal):
    index = np.where(joined_table['idMal'] == idMal)[0][0]
    test_item = joined_table.iloc[index:(index+1)]
    X_new = test_item.drop(['idMal','duration','title','status','favourites'], axis=1)
    X_new_scaled = scaler.transform(X_new)
    ind_list = list(knn_model.kneighbors(X_new_scaled,n_neighbors=10)[1][0])[1:]
    ids = joined_table.iloc[ind_list]
    knn_list.append(ids)
    return ids


In [None]:
new_joined_table = joined_table.loc[:, ~joined_table.columns.duplicated()]

In [None]:
new_joined_table[new_joined_table['idMal'] == 38084]

Unnamed: 0,idMal,title,episodes,duration,status,meanScore,favourites,'Action','Adventure','Fantasy',...,'Airsoft','Inseki','Augmented Reality','Karuta','Netorare','VTuber','Cycling','Centaur','American Football','Elderly Protagonist'
879,38084,Fate/Grand Order Absolute Demonic Front: Babyl...,21,24,FINISHED,78.0,1808,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
find_nearest(38084) #20 - naruto, 1535 - death note

Unnamed: 0,idMal,title,episodes,duration,status,meanScore,favourites,idMal.1,'Action','Adventure',...,'Airsoft','Inseki','Augmented Reality','Karuta','Netorare','VTuber','Cycling','Centaur','American Football','Elderly Protagonist'
504,11837,Zetman,13,24,FINISHED,63.0,180,32608,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1655,50307,TONIKAWA: Over The Moon For You Season 2,12,24,FINISHED,76.0,1430,4415,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1061,52990,Our Dating Story: The Experienced You and The ...,12,24,FINISHED,66.0,1183,2826,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1698,50346,Call of the Night,13,23,FINISHED,79.0,6690,2980,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
468,1161,MAZE (TV),25,24,FINISHED,62.0,6,1005,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60,31427,Zinba,52,22,FINISHED,63.0,11,31427,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1303,50204,Tokyo 24th Ward,12,24,FINISHED,61.0,194,56420,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1528,30895,Haruchika - Haruta & Chika,12,24,FINISHED,58.0,79,37576,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1854,25283,Sky Wizards Academy,12,24,FINISHED,58.0,347,51064,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
