## Manage json files to extract relevant feature of Anime/Animated Movies

In [1]:
config_file = '../config.json'

importing and read config files

In [2]:
import os
import json
import pymongo
import bson.json_util as json_util

with open(config_file) as f:
    config = json.load(f)
    client_id = config['MyAnimeList']['client_id']
    mongo_cred = config['MongoDB']

create a MongoDB connection to read data from MyAnimeList db, src_anime_details collection

In [3]:
connection_string = f"mongodb+srv://{mongo_cred['username']}:{mongo_cred['password']}@{mongo_cred['database']}.{mongo_cred['cluster']}.mongodb.net/"
client = pymongo.MongoClient(connection_string)
db = client['MyAnimeList']

extract relevant feature from each anime

In [None]:
anime_details = []
anime_info = {}
count_eliminated = 0

src_jsons = db.src_anime_details.find({})
for anime_detail in src_jsons:
    if 'mean' in anime_detail.keys() and 'title' in anime_detail.keys() and anime_detail['title']:
                anime_info['_id'] = anime_detail['id']
                anime_info['title'] = anime_detail['title']
                if 'alternative_titles' in anime_detail.keys():
                    if 'en' in anime_detail['alternative_titles'].keys() and anime_detail['alternative_titles']['en'] != '':
                        anime_info['title'] = anime_detail['alternative_titles']['en']
                anime_info['start_date'] = anime_detail.get('start_date', "1900-01-01")
                anime_info['end_date'] = anime_detail.get('end_date', "1900-01-01")
                anime_info['vote'] = anime_detail['mean']
                anime_info['rank'] = anime_detail.get('rank', -1)
                anime_info['num_scoring_users'] = anime_detail['num_scoring_users']
                anime_info['updated_at'] = anime_detail['updated_at']
                anime_info['media_type'] = anime_detail['media_type']
                anime_info['status'] = anime_detail['status']
                anime_info['synopsis'] = anime_detail['synopsis']
                anime_info['genres'] = []
                if 'genres' in anime_detail.keys():
                    for genre in anime_detail['genres']:
                        anime_info['genres'].append(genre['name'])
                anime_info['num_episode'] = anime_detail['num_episodes']
                anime_info['source'] = anime_detail.get('source', None)
                anime_info['audience_rating'] = anime_detail.get('rating', "ur")
                anime_info['studios'] = []
                if 'studios' in anime_detail.keys():
                    for studio in anime_detail['studios']:
                        anime_info['studios'].append(studio['name'])
                anime_info['watching'] = anime_detail['statistics']['status']['watching']
                anime_info['completed'] = anime_detail['statistics']['status']['completed']
                anime_info['on_hold'] = anime_detail['statistics']['status']['on_hold']
                anime_info['dropped'] = anime_detail['statistics']['status']['dropped']
                anime_info['plan_to_watch'] = anime_detail['statistics']['status']['plan_to_watch']
                anime_details.append(anime_info)
    else:
        count_eliminated += 1
        continue

save staging data on MongoDB, collection stg_anime_details

In [None]:
db.stg_anime_details.insert_many(anime_details)

save staging data locally (data/stg/anime_details folder) as [anime_id].json

In [None]:
anime_details_folder = '../data/stg/anime_details/'

if not os.path.exists(anime_details_folder):
    os.makedirs(anime_details_folder)

for _, anime in enumerate(anime_details):
    file_name = os.path.join(anime_details_folder, f'anime_{anime["id"]}.json')
    with open(file_name, 'w') as f:
        print(json_util.dumps(anime, indent = 4), file=f)