In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/myanimelist/UserAnimeList.csv
/kaggle/input/myanimelist/animelists_cleaned.csv
/kaggle/input/myanimelist/users_filtered.csv
/kaggle/input/myanimelist/anime_filtered.csv
/kaggle/input/myanimelist/AnimeList.csv
/kaggle/input/myanimelist/users_cleaned.csv
/kaggle/input/myanimelist/UserList.csv
/kaggle/input/myanimelist/animelists_filtered.csv
/kaggle/input/myanimelist/anime_cleaned.csv


In [2]:
import cudf

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
%%time
users_df = cudf.read_csv('/kaggle/input/myanimelist/users_cleaned.csv')

CPU times: user 726 ms, sys: 503 ms, total: 1.23 s
Wall time: 2.49 s


In [5]:
%%time
lists_df = cudf.read_csv('/kaggle/input/myanimelist/animelists_cleaned.csv')

CPU times: user 1.43 s, sys: 948 ms, total: 2.38 s
Wall time: 26 s


In [6]:
%%time
anime_df = cudf.read_csv('/kaggle/input/myanimelist/anime_cleaned.csv')

CPU times: user 482 ms, sys: 3.75 ms, total: 485 ms
Wall time: 562 ms


In [7]:
# Add aired_from_year
lists_df = cudf.merge(lists_df, anime_df[['anime_id', 'aired_from_year']], how='left', on='anime_id')

In [8]:
%%time
group_by_username = lists_df.groupby(by='username')
agg_by_username = group_by_username.agg({'aired_from_year': ['std', 'mean'], 'my_score': ['std', 'mean']})

CPU times: user 4.68 s, sys: 81.8 ms, total: 4.77 s
Wall time: 4.97 s


## Reindex user id in users_df and lists_df

In [9]:
mi = agg_by_username.columns
mi.tolist()
ind = cudf.Index([e[0] + "_" + e[1] for e in mi.tolist()])
agg_by_username.columns = ind

agg_by_username = agg_by_username.reset_index()
users_df = cudf.merge(users_df, agg_by_username, how='left', on='username')

In [10]:
users_df = users_df.sort_values('user_id')

In [11]:
lists_df = lists_df[~lists_df.username.isnull()]

users_df = users_df[~users_df.username.isnull()]

users_df = users_df[users_df['stats_mean_score']*users_df['stats_rewatched']*users_df['stats_episodes'] != 0]

In [12]:
print(users_df.user_id.unique().shape)
print(users_df.username.unique().shape)
print(users_df.user_id.min(), users_df.user_id.max())
print(users_df.shape)

print(lists_df.username.unique().shape)
print(lists_df.shape)

(54997,)
(54997,)
1 7235844
(54997, 21)
(108709,)
(31283787, 12)


In [13]:
# after selecting
# users_df has 54997 id (1 to 7235844)and 54997 usernames
# lists_df has 108709 usernames


# index_to_usersid_mapping: (using user_df) mapping 0~45k to 0~700k
# usersid_to_index_mapping: (using user_df) mapping 0~700k to 0~45k

index_to_usersid_mapping = users_df.user_id.unique().to_pandas().to_dict()
usersid_to_index_mapping = {v: k for k, v in index_to_usersid_mapping.items()}

print(len(usersid_to_index_mapping) == len(index_to_usersid_mapping))

# users_df['user_id_reindexed']: 0~45k

users_df['user_id_reindexed'] = users_df.user_id.replace(usersid_to_index_mapping)

# select those who occurs in 55k id
# now lists_df has only 55k usernames

username_to_userid_mapping = users_df.set_index('username').to_pandas().to_dict()['user_id']

lists_df = lists_df[lists_df.username.isin(username_to_userid_mapping.keys())]

# but actually, we want id , not usernames
# so we turn those usernames into reindexed-id(0~55k)

username_to_index_mapping = users_df.set_index('username').to_pandas().to_dict()['user_id_reindexed']
lists_df['user_id_reindexed'] = lists_df.username.replace(username_to_index_mapping)
lists_df['user_id_reindexed'] = lists_df['user_id_reindexed'].astype('int64')

print(lists_df['user_id_reindexed'].max())

True
54996


## Simple FE

In [14]:
lists_df.info()

<class 'cudf.core.dataframe.DataFrame'>
Int64Index: 18149481 entries, 32 to 31284017
Data columns (total 13 columns):
 #   Column               Dtype
---  ------               -----
 0   username             object
 1   anime_id             int64
 2   my_watched_episodes  int64
 3   my_start_date        object
 4   my_finish_date       object
 5   my_score             int64
 6   my_status            int64
 7   my_rewatching        float64
 8   my_rewatching_ep     int64
 9   my_last_updated      object
 10  my_tags              object
 11  aired_from_year      float64
 12  user_id_reindexed    int64
dtypes: float64(2), int64(6), object(5)
memory usage: 2.4+ GB


In [15]:
users_df.info()

<class 'cudf.core.dataframe.DataFrame'>
Int64Index: 54997 entries, 54166 to 67452
Data columns (total 22 columns):
 #   Column                    Non-Null Count  Dtype
---  ------                    --------------  -----
 0   username                  54997 non-null  object
 1   user_id                   54997 non-null  int64
 2   user_watching             54997 non-null  int64
 3   user_completed            54997 non-null  int64
 4   user_onhold               54997 non-null  int64
 5   user_dropped              54997 non-null  int64
 6   user_plantowatch          54997 non-null  int64
 7   user_days_spent_watching  54997 non-null  float64
 8   gender                    54997 non-null  object
 9   location                  54994 non-null  object
 10  birth_date                54997 non-null  object
 11  access_rank               0 non-null      int8
 12  join_date                 54997 non-null  object
 13  last_online               54997 non-null  object
 14  stats_mean_score         

In [16]:
users_df.birth_date = cudf.to_datetime(users_df.birth_date)
users_df['user_age'] = (2021 - users_df.birth_date.dt.year)

In [17]:
gender_map = {'Female':0, 'Male':1, 'Non-Binary':0.5}
users_df.gender.replace(gender_map, inplace=True)
users_df.gender = users_df.gender.astype('float')

In [18]:
useless_columns = ['username', 'location', 'birth_date', 'join_date', 'last_online', 'user_id', 'access_rank']

users_df.drop(columns=useless_columns, inplace=True)

In [19]:
useless_columns = ['username', 'my_start_date', 'my_finish_date', 'my_rewatching_ep', 'my_last_updated', 'my_tags', 'aired_from_year']

lists_df.drop(columns=useless_columns, inplace=True)

In [20]:
users_df.info()

<class 'cudf.core.dataframe.DataFrame'>
Int64Index: 54997 entries, 54166 to 67452
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype
---  ------                    --------------  -----
 0   user_watching             54997 non-null  int64
 1   user_completed            54997 non-null  int64
 2   user_onhold               54997 non-null  int64
 3   user_dropped              54997 non-null  int64
 4   user_plantowatch          54997 non-null  int64
 5   user_days_spent_watching  54997 non-null  float64
 6   gender                    54997 non-null  float64
 7   stats_mean_score          54997 non-null  float64
 8   stats_rewatched           54997 non-null  float64
 9   stats_episodes            54997 non-null  int64
 10  aired_from_year_std       54932 non-null  float64
 11  aired_from_year_mean      54997 non-null  float64
 12  my_score_std              54932 non-null  float64
 13  my_score_mean             54997 non-null  float64
 14  user_id_reindexe

In [21]:
lists_df.info()

<class 'cudf.core.dataframe.DataFrame'>
Int64Index: 18149481 entries, 32 to 31284017
Data columns (total 6 columns):
 #   Column               Dtype
---  ------               -----
 0   anime_id             int64
 1   my_watched_episodes  int64
 2   my_score             int64
 3   my_status            int64
 4   my_rewatching        float64
 5   user_id_reindexed    int64
dtypes: float64(1), int64(5)
memory usage: 971.5 MB


In [22]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object and col_type != str:
            if str(col_type)[:3] == 'int':
                df[col] = df[col].astype('int32')
            else:
                df[col] = df[col].astype('float32')
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [23]:
users_df = reduce_mem_usage(users_df)

Memory usage of dataframe is 6.94 MB
Memory usage after optimization is: 3.79 MB
Decreased by 45.4%


In [24]:
lists_df = reduce_mem_usage(lists_df)

Memory usage of dataframe is 971.45 MB
Memory usage after optimization is: 556.04 MB
Decreased by 42.8%


In [25]:
rating_df = cudf.merge(lists_df, users_df, how='left', on='user_id_reindexed')

In [26]:
rating_df.head()

Unnamed: 0,anime_id,my_watched_episodes,my_score,my_status,my_rewatching,user_id_reindexed,user_watching,user_completed,user_onhold,user_dropped,user_plantowatch,user_days_spent_watching,gender,stats_mean_score,stats_rewatched,stats_episodes,aired_from_year_std,aired_from_year_mean,my_score_std,my_score_mean,user_age
0,9367,12,9,2,0.0,33850,12,259,0,0,14,80.509262,1.0,8.96,5.0,4861,2.949448,2008.533569,2.910566,8.108209,23
1,11319,12,9,2,0.0,33850,12,259,0,0,14,80.509262,1.0,8.96,5.0,4861,2.949448,2008.533569,2.910566,8.108209,23
2,10076,2,8,2,0.0,33850,12,259,0,0,14,80.509262,1.0,8.96,5.0,4861,2.949448,2008.533569,2.910566,8.108209,23
3,22689,2,3,2,0.0,33406,14,525,23,21,141,210.357635,1.0,6.12,1.0,12482,7.688925,2007.827759,3.384025,3.432706,25
4,9331,1,9,6,0.0,33850,12,259,0,0,14,80.509262,1.0,8.96,5.0,4861,2.949448,2008.533569,2.910566,8.108209,23


In [27]:
rating_df.info()

<class 'cudf.core.dataframe.DataFrame'>
RangeIndex: 18149481 entries, 0 to 18149480
Data columns (total 21 columns):
 #   Column                    Dtype
---  ------                    -----
 0   anime_id                  int32
 1   my_watched_episodes       int32
 2   my_score                  int32
 3   my_status                 int32
 4   my_rewatching             float32
 5   user_id_reindexed         int32
 6   user_watching             int32
 7   user_completed            int32
 8   user_onhold               int32
 9   user_dropped              int32
 10  user_plantowatch          int32
 11  user_days_spent_watching  float32
 12  gender                    float32
 13  stats_mean_score          float32
 14  stats_rewatched           float32
 15  stats_episodes            int32
 16  aired_from_year_std       float32
 17  aired_from_year_mean      float32
 18  my_score_std              float32
 19  my_score_mean             float32
 20  user_age                  int32
dtypes: float3

In [28]:
rating_df = rating_df[rating_df['my_score']!=0]

In [29]:
rating_df.info()

<class 'cudf.core.dataframe.DataFrame'>
Int64Index: 11559808 entries, 0 to 18149480
Data columns (total 21 columns):
 #   Column                    Dtype
---  ------                    -----
 0   anime_id                  int32
 1   my_watched_episodes       int32
 2   my_score                  int32
 3   my_status                 int32
 4   my_rewatching             float32
 5   user_id_reindexed         int32
 6   user_watching             int32
 7   user_completed            int32
 8   user_onhold               int32
 9   user_dropped              int32
 10  user_plantowatch          int32
 11  user_days_spent_watching  float32
 12  gender                    float32
 13  stats_mean_score          float32
 14  stats_rewatched           float32
 15  stats_episodes            int32
 16  aired_from_year_std       float32
 17  aired_from_year_mean      float32
 18  my_score_std              float32
 19  my_score_mean             float32
 20  user_age                  int32
dtypes: float3

In [30]:
rating_df.describe()

Unnamed: 0,anime_id,my_watched_episodes,my_score,my_status,my_rewatching,user_id_reindexed,user_watching,user_completed,user_onhold,user_dropped,user_plantowatch,user_days_spent_watching,gender,stats_mean_score,stats_rewatched,stats_episodes,aired_from_year_std,aired_from_year_mean,my_score_std,my_score_mean,user_age
count,11559810.0,11559810.0,11559810.0,11559810.0,8255373.0,11559810.0,11559810.0,11559810.0,11559810.0,11559810.0,11559810.0,11559810.0,11559810.0,11559810.0,11559810.0,11559810.0,11559740.0,11559810.0,11559740.0,11559810.0,11559810.0
mean,11355.67,17.56306,7.520859,2.088284,0.001527,27894.83,24.7861,492.6323,23.12436,28.05222,148.4212,127.5673,0.750732,7.477248,46.58499,7892.247,5.707382,2008.907,3.198062,5.472729,28.6147
std,11052.58,41.2086,1.728516,0.515968,0.039042,15823.38,45.28359,489.143,51.92553,64.23506,329.0654,97.13303,0.430768,0.893066,150.3749,6193.61,1.459668,2.843697,0.772082,1.5736,4.687518
min,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.500694,0.0,1.0,1.0,7.0,0.0,1977.0,0.0,0.001629,13.0
25%,1569.0,2.0,7.0,2.0,0.0,14288.0,6.0,195.0,3.0,2.0,22.0,63.51597,0.5,6.98,5.0,3862.0,4.699649,2007.223,2.702725,4.439286,25.0
50%,7791.0,12.0,8.0,2.0,0.0,28458.0,13.0,350.0,10.0,10.0,66.0,104.5736,1.0,7.53,16.0,6401.0,5.574741,2009.403,3.334392,5.627397,28.0
75%,18767.0,20.0,9.0,2.0,0.0,41586.0,27.0,617.0,27.0,30.0,160.0,163.5142,1.0,8.03,46.0,10062.0,6.561675,2011.011,3.783448,6.612805,31.0
max,37860.0,6724.0,10.0,33.0,1.0,54996.0,1567.0,5121.0,2016.0,2457.0,11729.0,952.6546,1.0,10.0,9404.0,58658.0,16.98405,2016.327,7.071068,10.0,52.0


In [31]:
rating_target = rating_df.pop('my_score')

In [32]:
anime_id_col = rating_df.pop('anime_id')

In [33]:
user_id_col = rating_df.pop('user_id_reindexed')

In [34]:
rating_df.info()

<class 'cudf.core.dataframe.DataFrame'>
Int64Index: 11559808 entries, 0 to 18149480
Data columns (total 18 columns):
 #   Column                    Dtype
---  ------                    -----
 0   my_watched_episodes       int32
 1   my_status                 int32
 2   my_rewatching             float32
 3   user_watching             int32
 4   user_completed            int32
 5   user_onhold               int32
 6   user_dropped              int32
 7   user_plantowatch          int32
 8   user_days_spent_watching  float32
 9   gender                    float32
 10  stats_mean_score          float32
 11  stats_rewatched           float32
 12  stats_episodes            int32
 13  aired_from_year_std       float32
 14  aired_from_year_mean      float32
 15  my_score_std              float32
 16  my_score_mean             float32
 17  user_age                  int32
dtypes: float32(9), int32(9)
memory usage: 886.1 MB


In [35]:
anime_df.head()

Unnamed: 0,anime_id,title,title_english,title_japanese,title_synonyms,image_url,type,source,episodes,status,airing,aired_string,aired,duration,rating,score,scored_by,rank,popularity,members,favorites,background,premiered,broadcast,related,producer,licensor,studio,genre,opening_theme,ending_theme,duration_min,aired_from_year
0,11013,Inu x Boku SS,Inu X Boku Secret Service,妖狐×僕SS,Youko x Boku SS,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,12,Finished Airing,False,"Jan 13, 2012 to Mar 30, 2012","{'from': '2012-01-13', 'to': '2012-03-30'}",24 min. per ep.,PG-13 - Teens 13 or older,7.63,139250,1274.0,231,283882,2809,Inu x Boku SS was licensed by Sentai Filmworks...,Winter 2012,Fridays at Unknown,"{'Adaptation': [{'mal_id': 17207, 'type': 'man...","Aniplex, Square Enix, Mainichi Broadcasting Sy...",Sentai Filmworks,David Production,"Comedy, Supernatural, Romance, Shounen","['""Nirvana"" by MUCC']","['#1: ""Nirvana"" by MUCC (eps 1, 11-12)', '#2: ...",24.0,2012.0
1,2104,Seto no Hanayome,My Bride is a Mermaid,瀬戸の花嫁,The Inland Sea Bride,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,26,Finished Airing,False,"Apr 2, 2007 to Oct 1, 2007","{'from': '2007-04-02', 'to': '2007-10-01'}",24 min. per ep.,PG-13 - Teens 13 or older,7.89,91206,727.0,366,204003,2579,,Spring 2007,Unknown,"{'Adaptation': [{'mal_id': 759, 'type': 'manga...","TV Tokyo, AIC, Square Enix, Sotsu",Funimation,Gonzo,"Comedy, Parody, Romance, School, Shounen","['""Romantic summer"" by SUN&LUNAR']","['#1: ""Ashita e no Hikari (明日への光)"" by Asuka Hi...",24.0,2007.0
2,5262,Shugo Chara!! Doki,Shugo Chara!! Doki,しゅごキャラ！！どきっ,"Shugo Chara Ninenme, Shugo Chara! Second Year",https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,51,Finished Airing,False,"Oct 4, 2008 to Sep 25, 2009","{'from': '2008-10-04', 'to': '2009-09-25'}",24 min. per ep.,PG - Children,7.55,37129,1508.0,1173,70127,802,,Fall 2008,Unknown,"{'Adaptation': [{'mal_id': 101, 'type': 'manga...","TV Tokyo, Sotsu",,Satelight,"Comedy, Magic, School, Shoujo","['#1: ""Minna no Tamago (みんなのたまご)"" by Shugo Cha...","['#1: ""Rottara Rottara (ロッタラ ロッタラ)"" by Buono! ...",24.0,2008.0
3,721,Princess Tutu,Princess Tutu,プリンセスチュチュ,,https://myanimelist.cdn-dena.com/images/anime/...,TV,Original,38,Finished Airing,False,"Aug 16, 2002 to May 23, 2003","{'from': '2002-08-16', 'to': '2003-05-23'}",16 min. per ep.,PG-13 - Teens 13 or older,8.21,36501,307.0,916,93312,3344,Princess Tutu aired in two parts. The first pa...,Summer 2002,Fridays at Unknown,"{'Adaptation': [{'mal_id': 1581, 'type': 'mang...","Memory-Tech, GANSIS, Marvelous AQL",ADV Films,Hal Film Maker,"Comedy, Drama, Magic, Romance, Fantasy","['""Morning Grace"" by Ritsuko Okazaki']","['""Watashi No Ai Wa Chiisaikeredo"" by Ritsuko ...",16.0,2002.0
4,12365,Bakuman. 3rd Season,Bakuman.,バクマン。,Bakuman Season 3,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,25,Finished Airing,False,"Oct 6, 2012 to Mar 30, 2013","{'from': '2012-10-06', 'to': '2013-03-30'}",24 min. per ep.,PG-13 - Teens 13 or older,8.67,107767,50.0,426,182765,2082,,Fall 2012,Unknown,"{'Adaptation': [{'mal_id': 9711, 'type': 'mang...","NHK, Shueisha",,J.C.Staff,"Comedy, Drama, Romance, Shounen","['#1: ""Moshimo no Hanashi (もしもの話)"" by nano.RIP...","['#1: ""Pride on Everyday"" by Sphere (eps 1-13)...",24.0,2012.0


In [36]:
anime_df.genre.str.split(', ', expand=True)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,Comedy,Supernatural,Romance,Shounen,,,,,,,,,
1,Comedy,Parody,Romance,School,Shounen,,,,,,,,
2,Comedy,Magic,School,Shoujo,,,,,,,,,
3,Comedy,Drama,Magic,Romance,Fantasy,,,,,,,,
4,Comedy,Drama,Romance,Shounen,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6663,Hentai,,,,,,,,,,,,
6664,Slice of Life,Drama,Romance,,,,,,,,,,
6665,Music,Kids,,,,,,,,,,,
6666,Kids,,,,,,,,,,,,


In [37]:
from sklearn.preprocessing import MultiLabelBinarizer

In [38]:
anime_df.genre.fillna('NA', inplace=True)

In [39]:
mlb = MultiLabelBinarizer()
genre_mlb = mlb.fit_transform(anime_df.to_pandas()['genre'].str.split(', '))

In [40]:
anime_df = cudf.DataFrame(pd.concat([anime_df.to_pandas(), pd.DataFrame(genre_mlb, columns=mlb.classes_).add_prefix('genre_')], axis=1))

In [41]:
anime_df[anime_df['genre_Hentai']==1].head(3)

Unnamed: 0,anime_id,title,title_english,title_japanese,title_synonyms,image_url,type,source,episodes,status,airing,aired_string,aired,duration,rating,score,scored_by,rank,popularity,members,favorites,genre_Police,genre_Psychological,genre_Romance,genre_Samurai,genre_School,genre_Sci-Fi,genre_Seinen,genre_Shoujo,genre_Shoujo Ai,genre_Shounen,genre_Shounen Ai,genre_Slice of Life,genre_Space,genre_Sports,genre_Super Power,genre_Supernatural,genre_Thriller,genre_Vampire,genre_Yaoi,genre_Yuri
72,9721,Aisai Nikki,,愛妻日記,,https://myanimelist.cdn-dena.com/images/anime/...,OVA,Visual novel,1,Finished Airing,False,"Dec 22, 2010","{'from': '2010-12-22', 'to': '2010-12-22'}",22 min.,Rx - Hentai,6.54,2749,,4398,6450,39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
85,36755,Juvenile Pornography The Animation,,ジュヴナイル・ポルノグラフィ THE ANIMATION,,https://myanimelist.cdn-dena.com/images/anime/...,OVA,Manga,1,Finished Airing,False,"Dec 1, 2017","{'from': '2017-12-01', 'to': '2017-12-01'}",16 min.,Rx - Hentai,6.49,682,,7015,1872,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
108,11827,Kuroinu: Kedakaki Seijo wa Hakudaku ni Somaru,,黒獣 [クロイヌ] ～気高き聖女は白濁に染まる～,,https://myanimelist.cdn-dena.com/images/anime/...,OVA,Visual novel,4,Finished Airing,False,"Jan 27, 2012 to Jan 31, 2014","{'from': '2012-01-27', 'to': '2014-01-31'}",30 min. per ep.,Rx - Hentai,7.56,6748,,3031,15385,437,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
anime_df[anime_df['genre_Ecchi']==1].head(3)

Unnamed: 0,anime_id,title,title_english,title_japanese,title_synonyms,image_url,type,source,episodes,status,airing,aired_string,aired,duration,rating,score,scored_by,rank,popularity,members,favorites,genre_Police,genre_Psychological,genre_Romance,genre_Samurai,genre_School,genre_Sci-Fi,genre_Seinen,genre_Shoujo,genre_Shoujo Ai,genre_Shounen,genre_Shounen Ai,genre_Slice of Life,genre_Space,genre_Sports,genre_Super Power,genre_Supernatural,genre_Thriller,genre_Vampire,genre_Yaoi,genre_Yuri
15,59,Chobits,Chobits,ちょびっツ,,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,26,Finished Airing,False,"Apr 3, 2002 to Sep 25, 2002","{'from': '2002-04-03', 'to': '2002-09-25'}",24 min. per ep.,PG-13 - Teens 13 or older,7.53,175388,1546.0,188,317641,3271,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
30,7817,B-gata H-kei,Yamada&#039;s First Time: B Gata H Kei,B型H系,,https://myanimelist.cdn-dena.com/images/anime/...,TV,4-koma manga,12,Finished Airing,False,"Apr 2, 2010 to Jun 18, 2010","{'from': '2010-04-02', 'to': '2010-06-18'}",24 min. per ep.,R+ - Mild Nudity,7.1,142827,3193.0,285,244791,1641,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
31,966,Crayon Shin-chan,Shin Chan,クレヨンしんちゃん,,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,0,Currently Airing,True,"Apr 13, 1992 to ?","{'from': '1992-04-13', 'to': None}",21 min.,PG - Children,7.73,17683,1032.0,1937,34788,600,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0


In [43]:
anime_df.anime_id.max()

37896