# Obtain reviews from MyAnimeList website through jikanpy API

In [1]:
from jikanpy import Jikan
import numpy as np
import pandas as pd
import time
import re

In [2]:
jikan = Jikan()

### Helper method to scrape per anime (given its ID)

In [3]:
def get_anime_reviews_per_page(anime_id, last_page=1):
    assert(anime_id > 0)
    assert(last_page >= 1)

    reviews = []
    review_count = 0

    for page in range(1, last_page + 1):
        print("Getting page " + str(page) + " of " + str(last_page) + "...")

        anime_review = jikan.anime(id=anime_id, extension="reviews", page=page)["reviews"]
        review_count = review_count + len(anime_review)
        reviews.append(anime_review)

        time.sleep(4)

    return reviews, review_count

### Helper method to convert the reviews into DataFrame

In [4]:
def convert_reviews_into_df(reviews):
    print("Converting reviews into df...")
    df = pd.DataFrame.from_dict(reviews[0])

    for review in reviews[1:]: 
        df = df.append(pd.DataFrame.from_dict(review))

    return df.reset_index(drop=True) # Prevents from inserting column "index"

### Helper method to save a review to file

In [5]:
def save_to_file(df, filename, path="data/", to_csv=True, to_pickle=True):
    if to_csv:
        print("Saving " + str(len(df)) + " reviews into " + path + filename + ".csv...")
        df.to_csv(path + filename + ".csv")
    if to_pickle:
        print("Pickling " + str(len(df)) + " reviews into " + path + filename + ".pkl...")
        df.to_pickle(path + filename + ".pkl")

### Creating DF of reviews per anime, saves to csv and/or pickle files

In [11]:
def scrape_and_save(anime_list, path="data/", to_csv=True, to_pickle=True):
    df_list = []
    
    for anime in anime_list:
        
        if anime["last"] > 0:
            print(anime["title"] + ", Season " + str(anime["season"]) + ":")

            reviews, count = get_anime_reviews_per_page(anime["id"], anime["last"])
            df = convert_reviews_into_df(reviews)
            df["Anime"] = anime["title"]
            df["Season"] = anime["season"]
            df["Done"] = anime["done"]

            filename = "-".join(re.sub(r"[^\w\d]+", " ", anime["title"].lower()).split())
            save_to_file(df, filename, path, to_csv, to_pickle)

            df_list.append({"id": anime["id"],
                            "df": df,
                            "title": anime["title"],
                            "filename": filename,
                            "count": count})

    return df_list

# ! ! ! Start editing here
# Replace the anime list below depending on your assignment



### Declaring list of anime to scrape, where to save the produced df's, and in what format
### Make sure that the path you declare is an existing directory in your machine

In [16]:
# Replace the items in this list by using the format
anime_list = [
    {"id": 11061, "title": "Hunter x Hunter (2011)", "season": 1, "done": "Done", "last": 34 },
    {"id": 918, "title": "Gintama", "season": 1, "done": "Done", "last": 12 },
    {"id": 9969, "title": "Gintama'", "season": 2, "done": "Done", "last": 2 },
    {"id": 15417, "title": "Gintama': Enchousen", "season": 3, "done": "Done", "last": 1 },
    {"id": 28977, "title": "Gintama°", "season": 4, "done": "Done", "last": 3 },
    {"id": 34096, "title": "Gintama.", "season": 5, "done": "Done", "last": 2 },
    {"id": 121, "title": "Fullmetal Alchemist", "season": 1, "done": "Done", "last": 12 },
    {"id": 5114, "title": "Fullmetal Alchemist: Brotherhood", "season": 2, "done": "Done", "last": 40 },
    {"id": 31646, "title": "3-gatsu no Lion", "season": 1, "done": "Done", "last": 9 },
    {"id": 35180, "title": "3-gatsu no Lion 2nd Season", "season": 2, "done": "Done", "last": 6 },
    {"id": 2167, "title": "Clannad", "season": 1, "done": "Done", "last": 23 },
    {"id": 4181, "title": "Clannad: After Story", "season": 2, "done": "Done", "last": 33 },
    {"id": 32182, "title": "Mob Psycho 100", "season": 1, "done": "Done", "last": 10 },
    {"id": 37510, "title": "Mob Psycho 100 II", "season": 2, "done": "Done", "last": 9 },
    {"id": 45, "title": "Rurouni Kenshin: Meiji Kenkaku Romantan", "season": 1, "done": "Done", "last": 5 },
    {"id": 31181, "title": "Owarimonogatari", "season": 1, "done": "Done", "last": 2 },
    {"id": 35247, "title": "Owarimonogatari 2nd Season", "season": 2, "done": "Done", "last": 3 },
    {"id": 0, "title": "", "season": 1, "done": "Done", "last": 0 },
    {"id": 0, "title": "", "season": 1, "done": "Not Done", "last": 0 },
]

# Make sure to create this before running the next cell
path = "new-data/"
to_csv = False
to_pickle = True

In [17]:
df_list = scrape_and_save(anime_list, path, to_csv, to_pickle)

Hunter x Hunter (2011), Season 1:
Getting page 1 of 34...
Getting page 2 of 34...
Getting page 3 of 34...
Getting page 4 of 34...
Getting page 5 of 34...
Getting page 6 of 34...
Getting page 7 of 34...
Getting page 8 of 34...
Getting page 9 of 34...
Getting page 10 of 34...
Getting page 11 of 34...
Getting page 12 of 34...
Getting page 13 of 34...
Getting page 14 of 34...
Getting page 15 of 34...
Getting page 16 of 34...
Getting page 17 of 34...
Getting page 18 of 34...
Getting page 19 of 34...
Getting page 20 of 34...
Getting page 21 of 34...
Getting page 22 of 34...
Getting page 23 of 34...
Getting page 24 of 34...
Getting page 25 of 34...
Getting page 26 of 34...
Getting page 27 of 34...
Getting page 28 of 34...
Getting page 29 of 34...
Getting page 30 of 34...
Getting page 31 of 34...
Getting page 32 of 34...
Getting page 33 of 34...
Getting page 34 of 34...
Converting reviews into df...
Pickling 677 reviews into new-data/hunter-x-hunter-2011.pkl...
Gintama, Season 1:
Getting page 

In [18]:
len(df_list), len(anime_list)

(17, 19)

In [24]:
merged_df = df_list[0]["df"]

for df_to_append in df_list[1:]: 
    merged_df = merged_df.append(df_to_append["df"])

merged_df = merged_df.reset_index(drop="index")
merged_df

Unnamed: 0,mal_id,url,type,helpful_count,date,reviewer,content,Anime,Season,Done
0,163333,https://myanimelist.net/reviews.php?id=163333,,2900,2014-10-02T17:27:00+00:00,{'url': 'https://myanimelist.net/profile/Cresh...,The lack of popularity surrounding Hunter x Hu...,Hunter x Hunter (2011),1,Done
1,161751,https://myanimelist.net/reviews.php?id=161751,,1320,2014-09-23T12:33:00+00:00,{'url': 'https://myanimelist.net/profile/_juno...,---The review contains spoilers---\r\n\r\n“You...,Hunter x Hunter (2011),1,Done
2,189776,https://myanimelist.net/reviews.php?id=189776,,917,2015-06-07T12:15:00+00:00,{'url': 'https://myanimelist.net/profile/RedIn...,"At first, when I read the synopsis of hunter x...",Hunter x Hunter (2011),1,Done
3,162739,https://myanimelist.net/reviews.php?id=162739,,886,2014-09-28T10:56:00+00:00,{'url': 'https://myanimelist.net/profile/Kyoum...,"""I see now... Yes... I see... You had me in ch...",Hunter x Hunter (2011),1,Done
4,156637,https://myanimelist.net/reviews.php?id=156637,,505,2014-08-15T04:51:00+00:00,{'url': 'https://myanimelist.net/profile/DarkD...,After reading the many overwhelmingly positive...,Hunter x Hunter (2011),1,Done
...,...,...,...,...,...,...,...,...,...,...
3910,290795,https://myanimelist.net/reviews.php?id=290795,,1,2018-10-03T09:57:00+00:00,{'url': 'https://myanimelist.net/profile/Kiryu...,I did not think that this story turned out to ...,Owarimonogatari 2nd Season,2,Done
3911,274566,https://myanimelist.net/reviews.php?id=274566,,1,2018-03-22T11:32:00+00:00,{'url': 'https://myanimelist.net/profile/Valeo...,Such a great year for Anime!\r\n\r\nUnfortunat...,Owarimonogatari 2nd Season,2,Done
3912,267112,https://myanimelist.net/reviews.php?id=267112,,1,2018-01-01T12:50:00+00:00,{'url': 'https://myanimelist.net/profile/Umaig...,The monogatari series is known for having an u...,Owarimonogatari 2nd Season,2,Done
3913,371280,https://myanimelist.net/reviews.php?id=371280,,0,2021-01-02T14:18:00+00:00,{'url': 'https://myanimelist.net/profile/Mixal...,"Finally, a conclusion to the adventure of arar...",Owarimonogatari 2nd Season,2,Done


In [25]:
save_to_file(merged_df, filename="new-merged", path="new-data/", to_csv=False, to_pickle=True)

Pickling 3915 reviews into new-data/new-merged.pkl...


# Run the following code if you have an existing df_list already
### (e.g. you still have this notebook running but you decide to scrape additional anime)

In [122]:
new_anime_list = [
    {"id": 11061, "title": "Hunter x Hunter (2011)", "last": 34 }
]

path = "data/"
to_csv = True
to_pickle = True

In [123]:
new_df_list = scrape_and_save(new_anime_list, path, to_csv, to_pickle)

Hunter x Hunter (2011):
Getting page 1 of 34...
Getting page 2 of 34...
Getting page 3 of 34...
Getting page 4 of 34...
Getting page 5 of 34...
Getting page 6 of 34...
Getting page 7 of 34...
Getting page 8 of 34...
Getting page 9 of 34...
Getting page 10 of 34...
Getting page 11 of 34...
Getting page 12 of 34...
Getting page 13 of 34...
Getting page 14 of 34...
Getting page 15 of 34...
Getting page 16 of 34...
Getting page 17 of 34...
Getting page 18 of 34...
Getting page 19 of 34...
Getting page 20 of 34...
Getting page 21 of 34...
Getting page 22 of 34...
Getting page 23 of 34...
Getting page 24 of 34...
Getting page 25 of 34...
Getting page 26 of 34...
Getting page 27 of 34...
Getting page 28 of 34...
Getting page 29 of 34...
Getting page 30 of 34...
Getting page 31 of 34...
Getting page 32 of 34...
Getting page 33 of 34...
Getting page 34 of 34...
Converting reviews into df...
Saving 675 reviews into data/hunter-x-hunter-2011.csv...
Pickling 675 reviews into data/hunter-x-hunter-2

In [126]:
df_list = df_list + new_df_list

In [127]:
len(df_list), len(new_anime_list)

(9, 1)





# Run the following code if you need to save the DF into a certain format again

In [128]:
for item in df_list:
    print(item["title"] + ":")
    save_to_file(item["df"], item["filename"], path, to_csv=True, to_pickle=True)

Durarara!!:
Saving 350 reviews into data/durarara.csv...
Pickling 350 reviews into data/durarara.pkl...
Durarara!!x2 Ten:
Saving 25 reviews into data/durarara-x2-ten.csv...
Pickling 25 reviews into data/durarara-x2-ten.pkl...
Durarara!!x2 Shou:
Saving 35 reviews into data/durarara-x2-shou.csv...
Pickling 35 reviews into data/durarara-x2-shou.pkl...
Durarara!!x2 Ketsu:
Saving 20 reviews into data/durarara-x2-ketsu.csv...
Pickling 20 reviews into data/durarara-x2-ketsu.pkl...
Boku no Hero Academia:
Saving 363 reviews into data/boku-no-hero-academia.csv...
Pickling 363 reviews into data/boku-no-hero-academia.pkl...
Boku no Hero Academia 2nd Season:
Saving 148 reviews into data/boku-no-hero-academia-2nd-season.csv...
Pickling 148 reviews into data/boku-no-hero-academia-2nd-season.pkl...
Boku no Hero Academia 3rd Season:
Saving 112 reviews into data/boku-no-hero-academia-3rd-season.csv...
Pickling 112 reviews into data/boku-no-hero-academia-3rd-season.pkl...
Boku no Hero Academia 4th Season

In [129]:
csv_df = pd.read_csv("data/" + df_list[0]["filename"] + ".csv", encoding="utf-8", index_col=0)
csv_df

Unnamed: 0,mal_id,url,type,helpful_count,date,reviewer,content
0,27162,https://myanimelist.net/reviews.php?id=27162,,1164,2010-06-14T22:19:00+00:00,{'url': 'https://myanimelist.net/profile/Alayn...,"This anime is... for lack of a better term, th..."
1,34917,https://myanimelist.net/reviews.php?id=34917,,454,2011-03-05T18:00:00+00:00,{'url': 'https://myanimelist.net/profile/daimi...,"I finally finished ""Durarara!!"" and while ther..."
2,171215,https://myanimelist.net/reviews.php?id=171215,,366,2014-12-06T09:36:00+00:00,{'url': 'https://myanimelist.net/profile/Aerel...,TL;DR watch Baccano instead\r\n\r\nDurarara li...
3,36712,https://myanimelist.net/reviews.php?id=36712,,278,2011-05-03T10:24:00+00:00,{'url': 'https://myanimelist.net/profile/Andri...,The world of anime never ceases to come up wit...
4,27619,https://myanimelist.net/reviews.php?id=27619,,182,2010-06-26T05:41:00+00:00,{'url': 'https://myanimelist.net/profile/Lindl...,"From the very get-go, even from the opening ti..."
...,...,...,...,...,...,...,...
345,110315,https://myanimelist.net/reviews.php?id=110315,,0,2013-08-19T00:42:00+00:00,{'url': 'https://myanimelist.net/profile/Jerbe...,"This anime may seem slow at first, but it take..."
346,373325,https://myanimelist.net/reviews.php?id=373325,,0,2021-01-13T16:52:00+00:00,{'url': 'https://myanimelist.net/profile/kakeg...,This anime is confusing I'm telling you. Liter...
347,131081,https://myanimelist.net/reviews.php?id=131081,,0,2014-02-01T20:48:00+00:00,{'url': 'https://myanimelist.net/profile/boyso...,I'm usually not that good when it comes to wri...
348,125147,https://myanimelist.net/reviews.php?id=125147,,0,2013-12-22T22:52:00+00:00,{'url': 'https://myanimelist.net/profile/lambo...,"Before going into Durarara, i honestly didn't ..."


In [130]:
pkl_df = pd.read_pickle("data/" + df_list[0]["filename"] + ".pkl")
pkl_df

Unnamed: 0,mal_id,url,type,helpful_count,date,reviewer,content
0,27162,https://myanimelist.net/reviews.php?id=27162,,1164,2010-06-14T22:19:00+00:00,{'url': 'https://myanimelist.net/profile/Alayn...,"This anime is... for lack of a better term, th..."
1,34917,https://myanimelist.net/reviews.php?id=34917,,454,2011-03-05T18:00:00+00:00,{'url': 'https://myanimelist.net/profile/daimi...,"I finally finished ""Durarara!!"" and while ther..."
2,171215,https://myanimelist.net/reviews.php?id=171215,,366,2014-12-06T09:36:00+00:00,{'url': 'https://myanimelist.net/profile/Aerel...,TL;DR watch Baccano instead\r\n\r\nDurarara li...
3,36712,https://myanimelist.net/reviews.php?id=36712,,278,2011-05-03T10:24:00+00:00,{'url': 'https://myanimelist.net/profile/Andri...,The world of anime never ceases to come up wit...
4,27619,https://myanimelist.net/reviews.php?id=27619,,182,2010-06-26T05:41:00+00:00,{'url': 'https://myanimelist.net/profile/Lindl...,"From the very get-go, even from the opening ti..."
...,...,...,...,...,...,...,...
345,110315,https://myanimelist.net/reviews.php?id=110315,,0,2013-08-19T00:42:00+00:00,{'url': 'https://myanimelist.net/profile/Jerbe...,"This anime may seem slow at first, but it take..."
346,373325,https://myanimelist.net/reviews.php?id=373325,,0,2021-01-13T16:52:00+00:00,{'url': 'https://myanimelist.net/profile/kakeg...,This anime is confusing I'm telling you. Liter...
347,131081,https://myanimelist.net/reviews.php?id=131081,,0,2014-02-01T20:48:00+00:00,{'url': 'https://myanimelist.net/profile/boyso...,I'm usually not that good when it comes to wri...
348,125147,https://myanimelist.net/reviews.php?id=125147,,0,2013-12-22T22:52:00+00:00,{'url': 'https://myanimelist.net/profile/lambo...,"Before going into Durarara, i honestly didn't ..."
