In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

In [36]:
# set categorical types
category_cols = ['Rating','Type','Status','Source','Premiered']
number_cols = ['Score','Rank','Episodes','Scored By']

types = dict([(i,'category') for i in category_cols])

df = pd.read_csv('anime-dataset-2023.csv')
df = df.astype(types)
df.dtypes

anime_id           int64
Name              object
English name      object
Other name        object
Score             object
Genres            object
Synopsis          object
Type            category
Episodes          object
Aired             object
Premiered       category
Status          category
Producers         object
Licensors         object
Studios           object
Source          category
Duration          object
Rating          category
Rank              object
Popularity         int64
Favorites          int64
Scored By         object
Members            int64
Image URL         object
dtype: object

In [37]:
for cat in category_cols:
    print(cat)
    print(df[cat].cat.categories)

Rating
Index(['G - All Ages', 'PG - Children', 'PG-13 - Teens 13 or older',
       'R - 17+ (violence & profanity)', 'R+ - Mild Nudity', 'Rx - Hentai',
       'UNKNOWN'],
      dtype='object')
Type
Index(['Movie', 'Music', 'ONA', 'OVA', 'Special', 'TV', 'UNKNOWN'], dtype='object')
Status
Index(['Currently Airing', 'Finished Airing', 'Not yet aired'], dtype='object')
Source
Index(['4-koma manga', 'Book', 'Card game', 'Game', 'Light novel', 'Manga',
       'Mixed media', 'Music', 'Novel', 'Original', 'Other', 'Picture book',
       'Radio', 'Unknown', 'Visual novel', 'Web manga', 'Web novel'],
      dtype='object')
Premiered
Index(['UNKNOWN', 'fall 1963', 'fall 1965', 'fall 1966', 'fall 1967',
       'fall 1968', 'fall 1969', 'fall 1970', 'fall 1971', 'fall 1972',
       ...
       'winter 2016', 'winter 2017', 'winter 2018', 'winter 2019',
       'winter 2020', 'winter 2021', 'winter 2022', 'winter 2023',
       'winter 2024', 'winter 2025'],
      dtype='object', length=244)


## Clean Numeric Columns

In [38]:
for col in number_cols:
    df[col] = pd.to_numeric(df[col],errors='coerce').fillna(-1).astype('int64')
df.dtypes

anime_id           int64
Name              object
English name      object
Other name        object
Score              int64
Genres            object
Synopsis          object
Type            category
Episodes           int64
Aired             object
Premiered       category
Status          category
Producers         object
Licensors         object
Studios           object
Source          category
Duration          object
Rating          category
Rank               int64
Popularity         int64
Favorites          int64
Scored By          int64
Members            int64
Image URL         object
dtype: object

In [39]:
df.head()

Unnamed: 0,anime_id,Name,English name,Other name,Score,Genres,Synopsis,Type,Episodes,Aired,...,Studios,Source,Duration,Rating,Rank,Popularity,Favorites,Scored By,Members,Image URL
0,1,Cowboy Bebop,Cowboy Bebop,カウボーイビバップ,8,"Action, Award Winning, Sci-Fi","Crime is timeless. By the year 2071, humanity ...",TV,26,"Apr 3, 1998 to Apr 24, 1999",...,Sunrise,Original,24 min per ep,R - 17+ (violence & profanity),41,43,78525,914193,1771505,https://cdn.myanimelist.net/images/anime/4/196...
1,5,Cowboy Bebop: Tengoku no Tobira,Cowboy Bebop: The Movie,カウボーイビバップ 天国の扉,8,"Action, Sci-Fi","Another day, another bounty—such is the life o...",Movie,1,"Sep 1, 2001",...,Bones,Original,1 hr 55 min,R - 17+ (violence & profanity),189,602,1448,206248,360978,https://cdn.myanimelist.net/images/anime/1439/...
2,6,Trigun,Trigun,トライガン,8,"Action, Adventure, Sci-Fi","Vash the Stampede is the man with a $$60,000,0...",TV,26,"Apr 1, 1998 to Sep 30, 1998",...,Madhouse,Manga,24 min per ep,PG-13 - Teens 13 or older,328,246,15035,356739,727252,https://cdn.myanimelist.net/images/anime/7/203...
3,7,Witch Hunter Robin,Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),7,"Action, Drama, Mystery, Supernatural",Robin Sena is a powerful craft user drafted in...,TV,26,"Jul 3, 2002 to Dec 25, 2002",...,Sunrise,Original,25 min per ep,PG-13 - Teens 13 or older,2764,1795,613,42829,111931,https://cdn.myanimelist.net/images/anime/10/19...
4,8,Bouken Ou Beet,Beet the Vandel Buster,冒険王ビィト,6,"Adventure, Fantasy, Supernatural",It is the dark century and the people are suff...,TV,52,"Sep 30, 2004 to Sep 29, 2005",...,Toei Animation,Manga,23 min per ep,PG - Children,4240,5126,14,6413,15001,https://cdn.myanimelist.net/images/anime/7/215...
