# Import Library


In [1]:
import pandas as pd
from datetime import datetime
import json

# Opening raw data

In [2]:
with open('../dataset/raw/anime_jikan.json', 'r') as f:
    raw_data = json.load(f)

In [3]:
raw_data[0]

{'mal_id': 1,
 'url': 'https://myanimelist.net/anime/1/Cowboy_Bebop',
 'images': {'jpg': {'image_url': 'https://cdn.myanimelist.net/images/anime/4/19644.jpg',
   'small_image_url': 'https://cdn.myanimelist.net/images/anime/4/19644t.jpg',
   'large_image_url': 'https://cdn.myanimelist.net/images/anime/4/19644l.jpg'},
  'webp': {'image_url': 'https://cdn.myanimelist.net/images/anime/4/19644.webp',
   'small_image_url': 'https://cdn.myanimelist.net/images/anime/4/19644t.webp',
   'large_image_url': 'https://cdn.myanimelist.net/images/anime/4/19644l.webp'}},
 'trailer': {'youtube_id': 'gY5nDXOtv_o',
  'url': 'https://www.youtube.com/watch?v=gY5nDXOtv_o',
  'embed_url': 'https://www.youtube.com/embed/gY5nDXOtv_o?enablejsapi=1&wmode=opaque&autoplay=1',
  'images': {'image_url': 'https://img.youtube.com/vi/gY5nDXOtv_o/default.jpg',
   'small_image_url': 'https://img.youtube.com/vi/gY5nDXOtv_o/sddefault.jpg',
   'medium_image_url': 'https://img.youtube.com/vi/gY5nDXOtv_o/mqdefault.jpg',
   'la

# Getting only required data

- mal_id
- title
- title_english
- type
- source
- episodes
- status
- airing
- rating
- score
- rank
- synopsis
- season
- year
- studios
- genres

In [4]:
combined = []
total_anime = len(raw_data)
last_ranking = total_anime

print(f'Total anime: {len(raw_data)}')
for anime in raw_data:
    anime_info = {
        'id': anime['mal_id'],
        'title': anime['title'],
        'title_english': anime['title_english'] if anime['title_english'] else '',
        'type': anime['type'] if anime['type'] else 'unknown',
        'source': anime['source'],
        'episodes': anime['episodes'] if anime['episodes'] else 0,
        'status': anime['status'],
        'airing': anime['airing'],
        'rating': anime['rating'] if anime['rating'] else 'no_rating',
        'score': anime['score'] if anime['score'] else 0,
        'rank': anime['rank'],
        'synopsis': anime['synopsis'] if anime['synopsis'] else '',
        'season': anime['season'] if anime['season'] else '',
        'year': anime['year'] if anime['year'] else anime['aired']['prop']['from']['year'], # Year is none? get from aired
        'start': anime['aired']['from'] if anime['aired']['from'] else '',
        'ending': anime['aired']['to'] if anime['aired']['to'] else '',
        'studios': ';'.join([studio['name'] for studio in anime['studios']]), # a;b;c;d;e;f;g
        'genres': ';'.join([genre['name'] for genre in anime['genres']]) # a;b;c;d;e;f;g
    }

    if not anime_info['year']:
        anime_info['year'] = '0'

    if not anime_info['rank']:
        anime_info['rank'] = last_ranking
        last_ranking += 1
        
    combined.append(anime_info)

Total anime: 27970


In [5]:
df = pd.DataFrame(combined)

In [6]:
print('Missing values: ')
df.isna().sum()

Missing values: 


id               0
title            0
title_english    0
type             0
source           0
episodes         0
status           0
airing           0
rating           0
score            0
rank             0
synopsis         0
season           0
year             0
start            0
ending           0
studios          0
genres           0
dtype: int64

In [7]:
def format_time(iso_date):
    if iso_date:
        return datetime.fromisoformat(iso_date).strftime("%Y-%m-%d")

df['start'] = df['start'].apply(format_time)
df['ending'] = df['ending'].apply(format_time)

In [8]:
df.head(1)

Unnamed: 0,id,title,title_english,type,source,episodes,status,airing,rating,score,rank,synopsis,season,year,start,ending,studios,genres
0,1,Cowboy Bebop,Cowboy Bebop,TV,Original,26,Finished Airing,False,R - 17+ (violence & profanity),8.75,46,"Crime is timeless. By the year 2071, humanity ...",spring,1998,1998-04-03,1999-04-24,Sunrise,Action;Award Winning;Sci-Fi


# Export data

In [9]:
df.to_csv('../dataset/anime.csv', index=False)
print(f'Exported {len(df)} Animes!')

Exported 27970 Animes!
