In [1]:
import pandas as pd
import networkx as nx
import itertools
import collections
import tqdm

### Load the Dataset

In [2]:
animes_df = pd.read_csv("Anime.csv")
animes_df

Unnamed: 0,ID,Title,Synonyms,Japanese,English,Synopsis,Type,Episodes,Status,Start_Aired,...,Themes,Demographics,Duration_Minutes,Rating,Score,Scored_Users,Ranked,Popularity,Members,Favorites
0,16498,Shingeki no Kyojin,"AoT, SnK",進撃の巨人,Attack on Titan,"Centuries ago, mankind was slaughtered to near...",TV,25.0,Finished Airing,"Apr 7, 2013",...,"Gore, Military, Survival",Shounen,24.0,R - 17+ (violence & profanity),8.531,519803.0,1002.0,1,3524109,155695
1,1535,Death Note,DN,デスノート,Death Note,"Brutal murders, petty thefts, and senseless vi...",TV,37.0,Finished Airing,"Oct 4, 2006",...,Psychological,Shounen,23.0,R - 17+ (violence & profanity),8.621,485487.0,732.0,2,3504535,159701
2,5114,Fullmetal Alchemist: Brotherhood,"Hagane no Renkinjutsushi Fullmetal Alchemist, ...",鋼の錬金術師 FULLMETAL ALCHEMIST,Fullmetal Alchemist Brotherhood,After a horrific alchemy experiment goes wrong...,TV,64.0,Finished Airing,"Apr 5, 2009",...,Military,Shounen,24.0,R - 17+ (violence & profanity),9.131,900398.0,12.0,3,2978455,207772
3,30276,One Punch Man,"One Punch-Man, One-Punch Man, OPM",ワンパンマン,One Punch Man,The seemingly unimpressive Saitama has a rathe...,TV,12.0,Finished Airing,"Oct 5, 2015",...,"Parody, Super Power",Seinen,24.0,R - 17+ (violence & profanity),8.511,19066.0,1112.0,4,2879907,59651
4,11757,Sword Art Online,"S.A.O, SAO",ソードアート・オンライン,Sword Art Online,Ever since the release of the innovative Nerve...,TV,25.0,Finished Airing,"Jul 8, 2012",...,"Love Polygon, Video Game",Unknown,23.0,PG-13 - Teens 13 or older,7.201,990254.0,29562.0,5,2813565,64997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21455,45064,Xiao Gongzhu Ai Wei La Yu Shenmi Wangguo,Unknown,小公主艾薇拉与神秘王国,Princess and the Kingdom,Unknown,Movie,1.0,Finished Airing,"May 12, 2018",...,Unknown,Kids,78.0,G - All Ages,,,160892.0,21487,7,0
21456,45065,Xiao Gongzhu Ai Wei La Yu Shenmi Wangguo 2,Unknown,小公主艾薇拉与神秘王国2,Avera and the Mystical Kingdom 2,Unknown,Movie,1.0,Finished Airing,"Aug 22, 2020",...,Unknown,Kids,80.0,G - All Ages,,,160902.0,21488,6,0
21457,45059,Yongzhe Chuang Mo Cheng,Unknown,勇者闯魔城,Brave City,Unknown,Movie,1.0,Finished Airing,"Mar 16, 2018",...,Unknown,Unknown,80.0,G - All Ages,,,164032.0,21489,6,0
21458,45063,Yu Shen Chuan: Wangzhe Shaonian,Unknown,禹神传之王者少年,The Juvenile of King Yu,Unknown,Movie,1.0,Finished Airing,"Aug 15, 2018",...,Unknown,Unknown,90.0,G - All Ages,,,164462.0,21490,6,0


In [3]:
animes_df.columns

Index(['ID', 'Title', 'Synonyms', 'Japanese', 'English', 'Synopsis', 'Type',
       'Episodes', 'Status', 'Start_Aired', 'End_Aired', 'Premiered',
       'Broadcast', 'Producers', 'Licensors', 'Studios', 'Source', 'Genres',
       'Themes', 'Demographics', 'Duration_Minutes', 'Rating', 'Score',
       'Scored_Users', 'Ranked', 'Popularity', 'Members', 'Favorites'],
      dtype='object')

### Producers Network

In [4]:
# split the producers
animes_producers_df = animes_df.assign(Producers=animes_df["Producers"].str.split(", ")).explode("Producers")
# remove the unknown producer rows
animes_producers_df = animes_producers_df[
    (animes_producers_df["Producers"]!="Unknown") & 
    (animes_producers_df["Producers"]!="Ltd.")
    ]
# find the producer cooperation pairs， weight refers to the number of animes that producer pairs cooperates
all_producer_pairs = []
animes_producers_only_df = animes_producers_df.groupby("ID")["Producers"].agg([lambda x: set(x)]).reset_index().rename(columns={"<lambda>":"Producer Set"})
for i in list(animes_producers_only_df["Producer Set"]):
    all_producer_pairs.extend([frozenset(j) for j in itertools.combinations(i, 2)])
all_producer_pairs_weight_dict = dict(collections.Counter(all_producer_pairs))
# create the producer dataframe used for network
producers_weighted_df = pd.DataFrame.from_dict(all_producer_pairs_weight_dict, orient="index").reset_index().rename(columns={"index":"tuple",0:"weight"})
producers_weighted_df[["Src", "Dest"]] = pd.DataFrame(producers_weighted_df["tuple"].tolist(), index=producers_weighted_df.index)
# create the network
producers_network = nx.from_pandas_edgelist(producers_weighted_df, "Src", "Dest", ["weight"])
for producer in animes_producers_df["Producers"]:
    if producer not in producers_network.nodes():
        producers_network.add_node(producer)
print(nx.info(producers_network))
nx.write_gml(producers_network, "networks/producers_network.gml")

Name: 
Type: Graph
Number of nodes: 1464
Number of edges: 13430
Average degree:  18.3470


### Themes Network

In [5]:
# split the themes
animes_themes_df = animes_df.assign(Themes=animes_df["Themes"].str.split(", ")).explode("Themes")
# remove the unknown producer rows
animes_themes_df = animes_themes_df[animes_themes_df["Themes"]!="Unknown"]
# find the theme pairs
all_theme_pairs = []
animes_themes_only_df = animes_themes_df.groupby("ID")["Themes"].agg([lambda x: set(x)]).reset_index().rename(columns={"<lambda>":"Theme Set"})
for i in list(animes_themes_only_df["Theme Set"]):
    all_theme_pairs.extend([frozenset(j) for j in itertools.combinations(i, 2)])
all_theme_pairs_weight_dict = dict(collections.Counter(all_theme_pairs))
# create the theme dataframe used for network
themes_weighted_df = pd.DataFrame.from_dict(all_theme_pairs_weight_dict, orient="index").reset_index().rename(columns={"index":"tuple",0:"weight"})
themes_weighted_df[["Src", "Dest"]] = pd.DataFrame(themes_weighted_df["tuple"].tolist(), index=themes_weighted_df.index)
# create the network
themes_network = nx.from_pandas_edgelist(themes_weighted_df, "Src", "Dest", ["weight"])
print(nx.info(themes_network))
nx.write_gml(themes_network, "networks/themes_network.gml")

### Animes Network (theme weight)

In [None]:
animes_themes_only_df.head()

Unnamed: 0,ID,Theme Set
0,1,"{Adult Cast, Space}"
1,5,"{Adult Cast, Space}"
2,6,{Adult Cast}
3,7,{Detective}
4,15,{Team Sports}


In [None]:
# find the animes pairs
all_animes_theme_weight_pairs = []
animes_theme_weight_only_df = animes_themes_df.groupby("Themes")["Title"].agg([lambda x: set(x)]).reset_index().rename(columns={"<lambda>":"Anime Set"})
for i in tqdm.tqdm(list(animes_theme_weight_only_df["Anime Set"])):
    all_animes_theme_weight_pairs.extend([frozenset(j) for j in itertools.combinations(i, 2)])
    all_animes_theme_weight_pairs_weight_dict = dict(collections.Counter(all_animes_theme_weight_pairs))
# create the anime theme weight dataframe used for network
animes_theme_weighted_df = pd.DataFrame.from_dict(all_animes_theme_weight_pairs_weight_dict, orient="index").reset_index().rename(columns={"index":"tuple",0:"weight"})
animes_theme_weighted_df[["Src", "Dest"]] = pd.DataFrame(animes_theme_weighted_df["tuple"].tolist(), index=animes_theme_weighted_df.index)
# create the network
animes_theme_weight_network = nx.from_pandas_edgelist(animes_theme_weighted_df, "Src", "Dest", ["weight"])
print(nx.info(animes_theme_weight_network))
nx.write_gml(animes_theme_weight_network, "networks/animes_theme_weight_network.gml")

100%|██████████| 50/50 [01:53<00:00,  2.27s/it]


Name: 
Type: Graph
Number of nodes: 11812
Number of edges: 8861742
Average degree: 1500.4643


### Animes Network (producer weight)

In [None]:
# find the animes pairs
all_animes_producer_weight_pairs = []
animes_producer_weight_only_df = animes_producers_df.groupby("Producers")["Title"].agg([lambda x: set(x)]).reset_index().rename(columns={"<lambda>":"Producer Set"})
for i in tqdm.tqdm(list(animes_producer_weight_only_df["Producer Set"])):
    all_animes_producer_weight_pairs.extend([frozenset(j) for j in itertools.combinations(i, 2)])
    all_animes_producer_weight_pairs_weight_dict = dict(collections.Counter(all_animes_producer_weight_pairs))
# create the anime theme weight dataframe used for network
animes_producer_weighted_df = pd.DataFrame.from_dict(all_animes_producer_weight_pairs_weight_dict, orient="index").reset_index().rename(columns={"index":"tuple",0:"weight"})
animes_producer_weighted_df[["Src", "Dest"]] = pd.DataFrame(animes_producer_weighted_df["tuple"].tolist(), index=animes_producer_weighted_df.index)
# create the network
animes_producer_weight_network = nx.from_pandas_edgelist(animes_producer_weighted_df, "Src", "Dest", ["weight"])
print(nx.info(animes_producer_weight_network))
nx.write_gml(animes_producer_weight_network, "networks/animes_producer_weight_network.gml")

100%|██████████| 1465/1465 [04:58<00:00,  4.90it/s]


Name: 
Type: Graph
Number of nodes: 10717
Number of edges: 1788620
Average degree: 333.7912
