## Importing Required Modules

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt     
import seaborn as sns
from google.colab import files
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
import tensorflow as tf

print(tf.__version__)

2.3.0


## Fetching the Dataset as a Pandas DataFrame

In [2]:
anime_df = pd.read_csv('anime.csv')
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


Analysing all the types in dataset , replacing the nan type is required

In [3]:
types = anime_df['type'].unique().tolist()
print("Number of Types : {} \nTypes: \n".format(len(types)) , types)

Number of Types : 7 
Types: 
 ['Movie', 'TV', 'OVA', 'Special', 'Music', 'ONA', nan]


## Tuning the Dataset
First , We will fill all the empty/'Unknown data' data cells with data individually.
Like for the type movie , the episodes should be 1 so I will fill all the empty episode cells of type movie with 1
Then , for Hentai also it has mostly 1 ep so I will do the same thing for hentai as well.

Getting the number of None type datacells in each column

In [4]:
anime_df.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [5]:
anime_df.loc[(anime_df['genre'] == 'Hentai') & (anime_df['episodes'] == 'Unknown')] = 1

In [6]:
anime_df.loc[(anime_df['genre'] == 'Movie') & (anime_df['episodes'] == 'Unknown')] = 1

As you can see below songs also have only one episode so we will replace the empty datacells in episodes column of Music type by 1

In [7]:
anime_df[anime_df['type'] == 'Music'].head(3)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
169,34240,Shelter,"Music, Sci-Fi",Music,1,8.38,71136
336,731,Interstella5555: The 5tory of The 5ecret 5tar ...,"Adventure, Drama, Music, Sci-Fi",Music,1,8.17,31464
533,17949,The Everlasting Guilty Crown,Music,Music,1,8.0,11663


In [8]:
anime_df.loc[(anime_df['type'] == 'Music') & (anime_df['episodes'] == 'Unknown')] = 1

#### Replacing the remaining 'Unknown' data cells in episodes by nan type

In [9]:
anime_df['episodes'] = anime_df['episodes'].map(lambda x: np.nan if x == 'Unknown' else x)

Filling all the nan types by the episodes column's median

In [10]:
anime_df['episodes'].fillna(anime_df['episodes'].median() , inplace = True)

In [11]:
anime_df['episodes'].isnull().sum()

0

In [12]:
anime_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12069 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


Changing data types of columns

In [13]:
anime_df['name'] == anime_df['name'].replace(['Itadaki! Seieki♥'] , 'Itadaki! Seieki')

0        True
1        True
2        True
3        True
4        True
         ... 
12289    True
12290    True
12291    True
12292    True
12293    True
Name: name, Length: 12294, dtype: bool

In [14]:
anime_df['members'] = anime_df['members'].astype(float)

In [15]:
anime_df['rating'].fillna(anime_df['rating'].median() , inplace = True)

In [16]:
anime_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12294 non-null  float64
 6   members   12294 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 672.5+ KB


In [17]:
anime_df.replace(to_replace ="Itadaki! Seieki♥", 
                 value ='Itadaki! Seieki' , inplace = True) 

## Creating another dataset with Relevant Features and one hot encoding using get_dummies on genre and type column

In [18]:
anime_data = pd.concat([anime_df['genre'].str.get_dummies(sep = ','),
                         pd.get_dummies(anime_df['type']),
                         anime_df['episodes'],
                         anime_df['rating'],
                         anime_df['members'],
                         ] , axis = 1)

## How the Dataset looks after one hot encoding 

In [19]:
anime_data.head()

Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,Hentai,Historical,Horror,Josei,Kids,Magic,Martial Arts,Mecha,Military,Music,Mystery,Parody,Police,Psychological,Romance,Samurai,School,Sci-Fi,Seinen,Shoujo,Shoujo Ai,Shounen,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,...,Harem.1,Hentai.1,Historical.1,Horror.1,Josei.1,Kids.1,Magic.1,Martial Arts.1,Mecha.1,Military.1,Music.1,Mystery.1,Parody.1,Police.1,Psychological.1,Romance.1,Samurai.1,School.1,Sci-Fi.1,Seinen.1,Shoujo.1,Shounen.1,Slice of Life.1,Space.1,Sports.1,Super Power.1,Supernatural.1,Thriller.1,Vampire.1,Yaoi,1,Movie,Music.2,ONA,OVA,Special,TV,episodes,rating,members
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,9.37,200630.0
1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,64,9.26,793665.0
2,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,51,9.25,114262.0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,24,9.17,673572.0
4,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,51,9.16,151266.0


In [20]:
scaler = MinMaxScaler(feature_range=(0,1))

anime_data_scaled = scaler.fit_transform(anime_data)
np.round(anime_data_scaled , decimals = 2)
anime_data_scaled

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 9.30000000e-01, 1.97875366e-01],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        3.46725371e-02, 9.17777778e-01, 7.82770959e-01],
       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00, ...,
        2.75178866e-02, 9.16666667e-01, 1.12692767e-01],
       ...,
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        1.65107320e-03, 4.31111111e-01, 2.15007949e-04],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 4.42222222e-01, 1.71611850e-04],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 4.95555556e-01, 1.39064775e-04]])

In [21]:
anime_data_scaled.shape

(12294, 93)

## Using Nearest Neighbours unsupervised learning with ball_tree algorithm and 6 nearest neighbors

Here the nearest neighbours will represent the animes that are similar to an anime

### Fitting Data

In [22]:
nn_bt = NearestNeighbors(n_neighbors=6 , algorithm='ball_tree').fit(anime_data_scaled)

### Taking Distances and Indices of the 5 closest Animes and itself (5+1 = 6) from it

In [23]:
distances , indices = nn_bt.kneighbors(anime_data_scaled)

In [24]:
print("Distances shape : {} \nIndices Shape: {} \nDistances data overview : {} \nIndices data overview : {}".format(distances.shape , indices.shape , distances[0], indices[0]))

Distances shape : (12294, 6) 
Indices Shape: (12294, 6) 
Distances data overview : [0.         1.01633857 1.03484164 1.03547556 1.41673596 1.43504703] 
Indices data overview : [   0  208 1494 1959   60  894]


### Creating a function that would return the index of the anime when provided its name

In [25]:
def get_index(name):
  try:
    index = anime_df[anime_df['name'] == name].index.tolist()[0]
  except:
    return "Could not find the Anime"
  return index

get_index('Steins;Gate')

3

### Creating a function that would return the name of the anime when provided its index

In [26]:
def get_name(id):
  try:
    name = anime_df[anime_df.index == id]['name'].tolist()[0]
  except:
    return "Could not find the Anime"
  return name

get_name(2)

'Gintama°'

## Creating a function that prints all the relevant data about the anime

In [27]:
def get_info(id):
  print("Name :" , anime_df[anime_df.index == id]['name'].tolist()[0])
  print("Rating :" , anime_df[anime_df.index == id]['rating'].tolist()[0])
  print("Number of Episodes :" , anime_df[anime_df.index == id]['episodes'].tolist()[0])
  print("Genre :" , anime_df[anime_df.index == id]['genre'].tolist()[0])
  print("Type :" , anime_df[anime_df.index == id]['type'].tolist()[0])
  print("Number of Members : " , anime_df[anime_df.index == id]['members'].tolist()[0])

get_info(3)

Name : Steins;Gate
Rating : 9.17
Number of Episodes : 24
Genre : Sci-Fi, Thriller
Type : TV
Number of Members :  673572.0


## Creating a function that will reccomend the user anime based on an anime the user likes

In [28]:
def reccomend_me(name = None , id = None):
  if name != None:
    id = get_index(name)
  print("Here are some of the Animes you would like to watch :")

  for index in indices[id][1:]:   #the first index in indices will be the anime itself so we have to print [1:] i.e. the other animes
    print("------------------------------------------------------------------")
    get_info(index)

reccomend_me('Shingeki no Kyojin')

Here are some of the Animes you would like to watch :
------------------------------------------------------------------
Name : Shingeki no Kyojin Season 2
Rating : 6.56
Number of Episodes : 2.0
Genre : Action, Drama, Fantasy, Shounen, Super Power
Type : TV
Number of Members :  170054.0
------------------------------------------------------------------
Name : One Piece
Rating : 8.58
Number of Episodes : 2.0
Genre : Action, Adventure, Comedy, Drama, Fantasy, Shounen, Super Power
Type : TV
Number of Members :  504862.0
------------------------------------------------------------------
Name : Shingeki no Kyojin OVA
Rating : 7.88
Number of Episodes : 3
Genre : Action, Drama, Fantasy, Shounen, Super Power
Type : OVA
Number of Members :  121063.0
------------------------------------------------------------------
Name : Utawarerumono: Itsuwari no Kamen
Rating : 7.35
Number of Episodes : 25
Genre : Action, Drama, Fantasy
Type : TV
Number of Members :  55851.0
----------------------------------

## Run this cell if you want Reccomendations

In [29]:
anime_name = input("Enter Name of an anime you like : ")

try:
  anime_df[anime_df['name'] == anime_name]
except:
  print("NO SUCH ANIME FOUND")

reccomend_me(anime_name)

Enter Name of an anime you like : Kimi no Na wa.
Here are some of the Animes you would like to watch :
------------------------------------------------------------------
Name : Kokoro ga Sakebitagatterunda.
Rating : 8.32
Number of Episodes : 1
Genre : Drama, Romance, School
Type : Movie
Number of Members :  59652.0
------------------------------------------------------------------
Name : Harmonie
Rating : 7.52
Number of Episodes : 1
Genre : Drama, School, Supernatural
Type : Movie
Number of Members :  29029.0
------------------------------------------------------------------
Name : Air Movie
Rating : 7.39
Number of Episodes : 1
Genre : Drama, Romance, Supernatural
Type : Movie
Number of Members :  44179.0
------------------------------------------------------------------
Name : Hotarubi no Mori e
Rating : 8.61
Number of Episodes : 1
Genre : Drama, Romance, Shoujo, Supernatural
Type : Movie
Number of Members :  197439.0
------------------------------------------------------------------


## Trying the reccomendation system on Dragon Ball Z (shounen , action)

In [30]:
reccomend_me('Dragon Ball Z')

Here are some of the Animes you would like to watch :
------------------------------------------------------------------
Name : Dragon Ball Kai
Rating : 7.95
Number of Episodes : 97
Genre : Action, Adventure, Comedy, Fantasy, Martial Arts, Shounen, Super Power
Type : TV
Number of Members :  116832.0
------------------------------------------------------------------
Name : Dragon Ball Super
Rating : 7.4
Number of Episodes : 2.0
Genre : Action, Adventure, Comedy, Fantasy, Martial Arts, Shounen, Super Power
Type : TV
Number of Members :  111443.0
------------------------------------------------------------------
Name : Dragon Ball Kai (2014)
Rating : 8.01
Number of Episodes : 61
Genre : Action, Adventure, Comedy, Fantasy, Martial Arts, Shounen, Super Power
Type : TV
Number of Members :  42666.0
------------------------------------------------------------------
Name : One Piece
Rating : 8.58
Number of Episodes : 2.0
Genre : Action, Adventure, Comedy, Drama, Fantasy, Shounen, Super Power
Ty

## Trying reccomendation system on Haikyuu!! (Sports)

In [31]:
reccomend_me('Haikyuu!!')

Here are some of the Animes you would like to watch :
------------------------------------------------------------------
Name : Haikyuu!! Second Season
Rating : 8.93
Number of Episodes : 25
Genre : Comedy, Drama, School, Shounen, Sports
Type : TV
Number of Members :  179342.0
------------------------------------------------------------------
Name : Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou
Rating : 9.15
Number of Episodes : 10
Genre : Comedy, Drama, School, Shounen, Sports
Type : TV
Number of Members :  93351.0
------------------------------------------------------------------
Name : Slam Dunk
Rating : 8.56
Number of Episodes : 101
Genre : Comedy, Drama, School, Shounen, Sports
Type : TV
Number of Members :  82570.0
------------------------------------------------------------------
Name : Kuroko no Basket 2nd Season
Rating : 8.58
Number of Episodes : 25
Genre : Comedy, School, Shounen, Sports
Type : TV
Number of Members :  243325.0
--------------------------------------

## Trying reccomendation system on Hentai genre

In [32]:
reccomend_me('Itadaki! Seieki')

Here are some of the Animes you would like to watch :
------------------------------------------------------------------
Name : Brandish
Rating : 6.89
Number of Episodes : 2
Genre : Hentai, Supernatural
Type : OVA
Number of Members :  6742.0
------------------------------------------------------------------
Name : Bible Black Gaiden
Rating : 6.89
Number of Episodes : 2
Genre : Hentai, Supernatural
Type : OVA
Number of Members :  14478.0
------------------------------------------------------------------
Name : Aku no Onna Kanbu: Full Moon Night
Rating : 6.85
Number of Episodes : 1
Genre : Hentai, Supernatural
Type : OVA
Number of Members :  3613.0
------------------------------------------------------------------
Name : Megachu!
Rating : 6.61
Number of Episodes : 3
Genre : Hentai, Supernatural
Type : OVA
Number of Members :  3991.0
------------------------------------------------------------------
Name : Hachishaku Hachiwa Keraku Meguri: Igyou Kaikitan The Animation
Rating : 6.59
Number