In [1]:
# Restrictions: ONLY named individuals

import pandas as pd

json_file_path = './ice-and-fire.json'
df = pd.read_json(json_file_path)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1345 entries, 0 to 1344
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           1345 non-null   object
 1   allegiance     1186 non-null   object
 2   father         942 non-null    object
 3   predecessor    201 non-null    object
 4   successor      194 non-null    object
 5   url            1345 non-null   object
 6   fatherRumored  11 non-null     object
 7   mother         490 non-null    object
 8   spouse         524 non-null    object
 9   motherRumored  2 non-null      object
dtypes: object(10)
memory usage: 105.2+ KB


In [2]:
# Remove datapoints that have no relation at all.
# Old code and technically not necessarily anymore, already done in node.js
data_without_relation = df[(df['father'].isnull()) \
                           & (df['mother'].isnull()) \
                           & (df['successor'].isnull()) \
                           & (df['spouse'].isnull())]

data_with_relation = df[~df['name'].isin(data_without_relation['name'])]
data_with_relation.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1316 entries, 0 to 1344
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           1316 non-null   object
 1   allegiance     1173 non-null   object
 2   father         942 non-null    object
 3   predecessor    178 non-null    object
 4   successor      194 non-null    object
 5   url            1316 non-null   object
 6   fatherRumored  11 non-null     object
 7   mother         490 non-null    object
 8   spouse         524 non-null    object
 9   motherRumored  2 non-null      object
dtypes: object(10)
memory usage: 113.1+ KB


In [3]:
duplicate_names = df[df['name'].duplicated()]
duplicate_names['name'].value_counts()

Series([], Name: name, dtype: int64)

In [4]:
# Weed out double fathers.
# Some are actually double, because they are rumoured
multiple_fathers = df[df['father'].apply(lambda x: isinstance(x, list) and len(x) > 1)]
multiple_fathers['father']

Series([], Name: father, dtype: object)

In [5]:
# Weed out double moms.
# Some are actually double, because they are rumoured
multiple_mothers = df[df['mother'].apply(lambda x: isinstance(x, list) and len(x) > 1)]
multiple_mothers['mother']

Series([], Name: mother, dtype: object)

In [6]:
# Weed out erratic spouses.
multiple_spouses = df[df['spouse'].apply(lambda x: isinstance(x, list) and len(x) > 1)]
multiple_spouses[['name', 'spouse']]

Unnamed: 0,name,spouse
5,Adrack Humble,"[Rock wife, Salt wife]"
82,Alys Karstark,"[Sigorn, Thenns]"
103,Alyssa Velaryon,"[Aenys I Targaryen, Rogar Baratheon]"
108,Amerei Frey,"[Pate of the Blue Fork, Lancel Lannister]"
290,Craster,"[Dyah, Ferny, Nella, Gilly]"
293,Cregan Stark,"[Arra Norrey, Alysanne Blackwood, Lynara Stark]"
310,Daemon Targaryen,"[Rhea Royce, Laena Velaryon, Rhaenyra Targaryen]"
328,Dalton Greyjoy,"[Tess, Kayce, Lysa Farman]"
342,Davos Dayne,"[Nymeria, Ny Sar]"
369,Donella Hornwood,"[Halys Hornwood, Ramsay Snow]"


In [7]:
# Datapoints with children
data_with_children = data_with_relation[
    (~data_with_relation['successor'].isnull()) 
    | (data_with_relation['name'].isin(data_with_relation['mother']))
    | (data_with_relation['name'].isin(data_with_relation['father']))]

data_with_children.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 462 entries, 0 to 1343
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           462 non-null    object
 1   allegiance     374 non-null    object
 2   father         255 non-null    object
 3   predecessor    140 non-null    object
 4   successor      194 non-null    object
 5   url            462 non-null    object
 6   fatherRumored  2 non-null      object
 7   mother         148 non-null    object
 8   spouse         290 non-null    object
 9   motherRumored  0 non-null      object
dtypes: object(10)
memory usage: 39.7+ KB


In [8]:
# Datapoints for whom spouse is the only relationship

data_with_spouses = data_with_relation[(~data_with_relation['spouse'].isnull()) 
                                      & (df['father'].isnull()) 
                                       & (df['mother'].isnull()) 
                                       & (df['successor'].isnull())]

data_with_spouses.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 257 entries, 5 to 1343
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           257 non-null    object
 1   allegiance     242 non-null    object
 2   father         0 non-null      object
 3   predecessor    2 non-null      object
 4   successor      0 non-null      object
 5   url            257 non-null    object
 6   fatherRumored  0 non-null      object
 7   mother         0 non-null      object
 8   spouse         257 non-null    object
 9   motherRumored  0 non-null      object
dtypes: object(10)
memory usage: 22.1+ KB


  data_with_spouses = data_with_relation[(~data_with_relation['spouse'].isnull())


In [9]:
data_with_spouses.head()

Unnamed: 0,name,allegiance,father,predecessor,successor,url,fatherRumored,mother,spouse,motherRumored
5,Adrack Humble,"[House Humble, House Greyjoy]",,,,http://awoiaf.westeros.org/index.php/Adrack_Hu...,,,"[Rock wife, Salt wife]",
22,Aelinor Penrose,"[House Penrose, House Targaryen]",,,,http://awoiaf.westeros.org/index.php/Aelinor_P...,,,[Aerys I Targaryen],
47,Alarra Massey,"[House Massey, House Velaryon]",,,,http://awoiaf.westeros.org/index.php/Alarra_Ma...,,,[Aethan Velaryon],
50,Alayne,[House Baelish],,,,http://awoiaf.westeros.org/index.php/Alayne_Ba...,,,[Baelish (lord)],
74,Alyn Tarbeck,[House Tarbeck],,,,http://awoiaf.westeros.org/index.php/Alyn_Tarbeck,,,[Jeyne Westerling (wife of Maegor I)],


In [10]:
# Get spouses that have a spouse present in the dataset with all 
# parent-child relationships

parent_child_spouses = df.explode('spouse').dropna(subset=['spouse'])['spouse'].unique()
data_spouse_family = data_with_spouses[data_with_spouses['name'].isin(parent_child_spouses)]
data_spouse_family.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 192 entries, 47 to 1343
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           192 non-null    object
 1   allegiance     182 non-null    object
 2   father         0 non-null      object
 3   predecessor    2 non-null      object
 4   successor      0 non-null      object
 5   url            192 non-null    object
 6   fatherRumored  0 non-null      object
 7   mother         0 non-null      object
 8   spouse         192 non-null    object
 9   motherRumored  0 non-null      object
dtypes: object(10)
memory usage: 16.5+ KB


In [11]:
data_spouse_family.head()

Unnamed: 0,name,allegiance,father,predecessor,successor,url,fatherRumored,mother,spouse,motherRumored
47,Alarra Massey,"[House Massey, House Velaryon]",,,,http://awoiaf.westeros.org/index.php/Alarra_Ma...,,,[Aethan Velaryon],
74,Alyn Tarbeck,[House Tarbeck],,,,http://awoiaf.westeros.org/index.php/Alyn_Tarbeck,,,[Jeyne Westerling (wife of Maegor I)],
78,Alys Arryn (wife of Rhaegel),"[House Arryn, House Targaryen]",,,,http://awoiaf.westeros.org/index.php/Alys_Arry...,,,[Rhaegel Targaryen],
79,Alys Beesbury,"[House Beesbury, House Tyrell]",,,,http://awoiaf.westeros.org/index.php/Alys_Bees...,,,[Leo Tyrell (son of Victor)],
80,Alys Frey,[House Frey],,,,http://awoiaf.westeros.org/index.php/Alys_Frey,,,[Jared Frey],


In [12]:
all_usable_data = pd.concat([data_with_children, data_spouse_family])
len(all_usable_data)

654

In [13]:
all_usable_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 654 entries, 0 to 1343
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           654 non-null    object
 1   allegiance     556 non-null    object
 2   father         255 non-null    object
 3   predecessor    142 non-null    object
 4   successor      194 non-null    object
 5   url            654 non-null    object
 6   fatherRumored  2 non-null      object
 7   mother         148 non-null    object
 8   spouse         482 non-null    object
 9   motherRumored  0 non-null      object
dtypes: object(10)
memory usage: 56.2+ KB


In [14]:
first_layer = all_usable_data[(all_usable_data['father'].isnull()) & (all_usable_data['mother'].isnull()) & (all_usable_data['predecessor'].isnull())]
first_layer.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 362 entries, 47 to 1343
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           362 non-null    object
 1   allegiance     324 non-null    object
 2   father         0 non-null      object
 3   predecessor    0 non-null      object
 4   successor      54 non-null     object
 5   url            362 non-null    object
 6   fatherRumored  0 non-null      object
 7   mother         0 non-null      object
 8   spouse         316 non-null    object
 9   motherRumored  0 non-null      object
dtypes: object(10)
memory usage: 31.1+ KB


In [15]:
first_layer[first_layer['successor'].notna()]['successor'].head(100)

58       [Alekyne Florent, Davos Seaworth]
107                         [Lord Hayford]
131                    [Arlan V Durrandon]
180                  [Benedict II Justman]
186         [Rickon Stark (son of Benjen)]
196                        [Catelyn Stark]
220               [Brandon Stark (Burner)]
296                                [Pylos]
374                      [Damon Hightower]
377                     [Gerold Hightower]
381                  [Durran II Durrandon]
383                  [Monfryd I Durrandon]
481                     [Merle I Gardener]
482                     [Mern VI Gardener]
496                      [Barristan Selmy]
499                    [Joffrey Lannister]
534                     [Donella Hornwood]
539                          [Theo Tyrell]
543                     [Harmund II Hoare]
555                         [Criston Cole]
566                      [Eddara Tallhart]
569                     [Ormund Hightower]
592                    [Humfrey II Teague]
616        

In [None]:
# Save final dataset
all_usable_data.to_json('./ice_and_fire_final.json', orient='records')