# Anime Analysis: What to Watch Next? 🧐
- [ ] Clean up the one hot encoded dataframe to support multilevel indexing
- [ ] Find a way to fetch ratings of anime's with NaN as rating value

In [53]:
import ast
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [174]:
anime_df = pd.read_csv("./data/anime_world.csv").drop(["Unnamed: 0"], axis=1)
anime_df.head()

Unnamed: 0,Anime,Genre,Description,Studio,Year,Rating
0,Re:Zero kara Hajimeru Isekai Seikatsu 2nd Season,"['Drama', 'Fantasy', 'Suspense']",A reunion that was supposed to spell the arriv...,White Fox,"Jul 8, 2020",8.37
1,"Kanojo, Okarishimasu","['Comedy', 'Romance']",Kazuya Kinoshita is a 20-year-old college stud...,TMS Entertainment,"Jul 11, 2020",
2,The God of High School,"['Action', 'Fantasy', 'Sci-Fi', 'Supernatural']","The ""God of High School"" tournament has begun,...",MAPPA,"Jul 6, 2020",
3,Maou Gakuin no Futekigousha: Shijou Saikyou no...,"['Action', 'Fantasy']","In the distant past, a war between humans and ...",SILVER LINK.,"Jul 4, 2020",
4,Enen no Shouboutai: Ni no Shou,"['Action', 'Supernatural']",After his confrontation in the Nether with his...,David Production,"Jul 4, 2020",


## Let's Fix Up Those Genres
The genre header has information represented as a tuple. Let's one hot encode this column. We need to get the number of unique genres and update the dataframe to have the list representation of the genres for one hot encoding.

In [175]:
UNKNOWN_GENRE = "misc"

def process_genre(g):
    """Process each genre string as the inherent underlying type."""
    if g == "[]": return [UNKNOWN_GENRE]
    if g.count(",") > 0: return ast.literal_eval(g)
    return [g]

In [176]:
unique_genres = set()

for row in anime_df.itertuples():
    genre = anime_df.at[row.Index, "Genre"]
    genre = process_genre(genre)
    [unique_genres.add(g) for g in genre]
    anime_df.at[row.Index, "Genre"] = genre
    
unique_genres = sorted(list(unique_genres))
unique_genres = [("Genre", ug) for ug in unique_genres]

In [181]:
genres_index = pd.MultiIndex.from_tuples(unique_genres)
anime_df[genres_index] = 0

# One hot encode the dataframe based on values in each row's genre
for row in anime_df.itertuples():
    genre = anime_df.at[row.Index, "Genre"]
    
    for g in genre:
        onehot_index = ("Genre", g)
        anime_df.at[row.Index, onehot_index] = 1
        
# Drop the genre column
anime_df.drop(["Genre"], axis=1, inplace=True)

Unnamed: 0,Anime,Description,Studio,Year,Rating,"(Genre, Action)","(Genre, Adventure)","(Genre, Avant Garde)","(Genre, Boys Love)","(Genre, Comedy)",...,"(Genre, Gourmet)","(Genre, Horror)","(Genre, Mystery)","(Genre, Romance)","(Genre, Sci-Fi)","(Genre, Slice of Life)","(Genre, Sports)","(Genre, Supernatural)","(Genre, Suspense)","(Genre, misc)"
0,Re:Zero kara Hajimeru Isekai Seikatsu 2nd Season,A reunion that was supposed to spell the arriv...,White Fox,"Jul 8, 2020",8.37,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,"Kanojo, Okarishimasu",Kazuya Kinoshita is a 20-year-old college stud...,TMS Entertainment,"Jul 11, 2020",,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
2,The God of High School,"The ""God of High School"" tournament has begun,...",MAPPA,"Jul 6, 2020",,1,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
3,Maou Gakuin no Futekigousha: Shijou Saikyou no...,"In the distant past, a war between humans and ...",SILVER LINK.,"Jul 4, 2020",,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Enen no Shouboutai: Ni no Shou,After his confrontation in the Nether with his...,David Production,"Jul 4, 2020",,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
