In [4]:
!pip install PyMySQL



In [5]:
import os

import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError

# Environment variables or direct credentials setup
DB_HOST = os.getenv('MYSQL_HOST')
DB_USER = os.getenv('MYSQL_USER')
DB_PASSWORD = os.getenv('MYSQL_PASSWORD')
DB_NAME = 'anime-atlas'

# Establishing the database connection
engine = create_engine(f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}')

In [6]:
def fetch_animelists_chunk(username_gt, limit=100_000):
    try:
        with engine.connect() as connection:
            query = f"SELECT * FROM `mal-user-animelists` WHERE username > %(username_gt)s ORDER BY username LIMIT {limit};"
            if username_gt == "" or username_gt is None:
                query = f"SELECT * FROM `mal-user-animelists` ORDER BY username LIMIT {limit};"
            print(f"Pulling up to {username_gt}...")
            return pd.read_sql(query, connection, params={'username_gt': username_gt})
    except SQLAlchemyError as e:
        print(f"Error fetching data: {e}")

def fetch_animelists(filepath = './work/data/mal-user-animelists.csv'):
    username_gt = None
    limit = 1_000
    # write chunks to file as they come in since they might not fit in memory
    chunk_ix = 0
    total_rows = 0
    while True:
        data = fetch_animelists_chunk(username_gt, limit)
        total_rows += len(data)
        if data.empty:
            break
        # truncate if first chunk, append otherwise
        mode = 'w' if chunk_ix == 0 else 'a'
        data.to_csv(filepath, mode=mode, header=chunk_ix == 0, index=False)
        username_gt = data.iloc[-1]['username']
        chunk_ix += 1
        print(f"Processed {total_rows} animelists")

    print(f"Successfully wrote {total_rows} animelists to {filepath}")

fetch_animelists()


Pulling up to None...
Processed 1000 animelists
Pulling up to __se__b...
Processed 2000 animelists
Pulling up to _Angel0_...
Processed 3000 animelists
Pulling up to _carbonteddy...
Processed 4000 animelists
Pulling up to _Eli_...
Processed 5000 animelists
Pulling up to _Hitoriy...
Processed 6000 animelists
Pulling up to _Kel_...
Processed 7000 animelists
Pulling up to _maaliik_...
Processed 8000 animelists
Pulling up to _navis_...
Processed 9000 animelists
Pulling up to _Rampo_san_...
Processed 10000 animelists
Pulling up to _sHy...
Processed 11000 animelists
Pulling up to _TripleSeven_...
Processed 12000 animelists
Pulling up to _Zanco_...
Processed 13000 animelists
Pulling up to -Asteria...
Processed 14000 animelists
Pulling up to -hotaru009-...
Processed 15000 animelists
Pulling up to -Navjeet-...
Processed 16000 animelists
Pulling up to -Tavares-...
Processed 17000 animelists
Pulling up to 00coco31...
Processed 18000 animelists
Pulling up to 08176496590...
Processed 19000 animelist