In [1]:
# Restrictions: ONLY named individuals

import pandas as pd

json_file_path = './ice-and-fire.json'
df = pd.read_json(json_file_path)

# Really only focus on family relations
df.drop('predecessor', axis=1, inplace=True)
df.drop('successor', axis=1, inplace=True)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1344 entries, 0 to 1343
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           1344 non-null   object
 1   allegiance     1185 non-null   object
 2   father         941 non-null    object
 3   url            1344 non-null   object
 4   fatherRumored  11 non-null     object
 5   mother         490 non-null    object
 6   spouse         523 non-null    object
 7   motherRumored  2 non-null      object
dtypes: object(8)
memory usage: 84.1+ KB


In [2]:
# Remove datapoints that have no relation at all.
# Old code and technically not necessarily anymore, already done in node.js
data_without_relation = df[(df['father'].isnull()) \
                           & (df['mother'].isnull()) \
                           & (df['spouse'].isnull())]

data_with_relation = df[~df['name'].isin(data_without_relation['name'])]
data_with_relation.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1246 entries, 0 to 1343
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           1246 non-null   object
 1   allegiance     1138 non-null   object
 2   father         941 non-null    object
 3   url            1246 non-null   object
 4   fatherRumored  11 non-null     object
 5   mother         490 non-null    object
 6   spouse         523 non-null    object
 7   motherRumored  2 non-null      object
dtypes: object(8)
memory usage: 87.6+ KB


In [3]:
duplicate_names = df[df['name'].duplicated()]
duplicate_names['name'].value_counts()

Series([], Name: name, dtype: int64)

In [4]:
# Weed out double fathers.
# Some are actually double, because they are rumoured
multiple_fathers = df[df['father'].apply(lambda x: isinstance(x, list) and len(x) > 1)]
multiple_fathers['father']

Series([], Name: father, dtype: object)

In [5]:
# Weed out double moms.
# Some are actually double, because they are rumoured
multiple_mothers = df[df['mother'].apply(lambda x: isinstance(x, list) and len(x) > 1)]
multiple_mothers['mother']

Series([], Name: mother, dtype: object)

In [6]:
# Weed out erratic spouses.
multiple_spouses = df[df['spouse'].apply(lambda x: isinstance(x, list) and len(x) > 1)]
multiple_spouses[['name', 'spouse']]

Unnamed: 0,name,spouse
81,Alys Karstark,"[Sigorn, Thenns]"
102,Alyssa Velaryon,"[Aenys I Targaryen, Rogar Baratheon]"
107,Amerei Frey,"[Pate of the Blue Fork, Lancel Lannister]"
289,Craster,"[Dyah, Ferny, Nella, Gilly]"
292,Cregan Stark,"[Arra Norrey, Alysanne Blackwood, Lynara Stark]"
309,Daemon Targaryen,"[Rhea Royce, Laena Velaryon, Rhaenyra Targaryen]"
327,Dalton Greyjoy,"[Tess, Kayce, Lysa Farman]"
341,Davos Dayne,"[Nymeria, Ny Sar]"
368,Donella Hornwood,"[Halys Hornwood, Ramsay Snow]"
400,Elaena Targaryen,"[Ossifer Plumm, Ronnel Penrose, Michael Manwoody]"


In [7]:
# Datapoints with children
data_with_children = data_without_relation[
    (data_without_relation['name'].isin(data_with_relation['mother']))
    | (data_without_relation['name'].isin(data_with_relation['father']))]

data_with_children.head()

Unnamed: 0,name,allegiance,father,url,fatherRumored,mother,spouse,motherRumored
72,Alyn Stokeworth,[House Stokeworth],,http://awoiaf.westeros.org/index.php/Alyn_Stok...,,,,
130,Arrec Durrandon,,,http://awoiaf.westeros.org/index.php/Arrec_Dur...,,,,
179,Benedict I Justman,,,http://awoiaf.westeros.org/index.php/Benedict_...,,,,
219,Brandon Stark (Shipwright),,,http://awoiaf.westeros.org/index.php/Brandon_S...,,,,
222,Brandon Stark (father of Walton),[House Stark],,http://awoiaf.westeros.org/index.php/Brandon_S...,,,,


In [8]:
# data_with_relation[data_with_relation['name'] == 'Aelor Targaryen']
data_with_relation['father']

0       Manfred Hightower (Aegon's Conquest)
1                             Damon Marbrand
2                             Eustace Osgrey
3                            Laenor Velaryon
6                         Daemon I Blackfyre
                        ...                 
1339                                     NaN
1340                              Yohn Royce
1341                              Tytos Frey
1342                                     NaN
1343                              Tytos Frey
Name: father, Length: 1246, dtype: object

In [9]:
# Datapoints for whom spouse is the only relationship

data_with_spouses = data_with_relation[(~data_with_relation['spouse'].isnull()) 
                                      & (df['father'].isnull()) 
                                       & (df['mother'].isnull())]

data_with_spouses.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 268 entries, 21 to 1342
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           268 non-null    object
 1   allegiance     252 non-null    object
 2   father         0 non-null      object
 3   url            268 non-null    object
 4   fatherRumored  0 non-null      object
 5   mother         0 non-null      object
 6   spouse         268 non-null    object
 7   motherRumored  0 non-null      object
dtypes: object(8)
memory usage: 18.8+ KB


  data_with_spouses = data_with_relation[(~data_with_relation['spouse'].isnull())


In [10]:
data_with_spouses.head()

Unnamed: 0,name,allegiance,father,url,fatherRumored,mother,spouse,motherRumored
21,Aelinor Penrose,"[House Penrose, House Targaryen]",,http://awoiaf.westeros.org/index.php/Aelinor_P...,,,[Aerys I Targaryen],
46,Alarra Massey,"[House Massey, House Velaryon]",,http://awoiaf.westeros.org/index.php/Alarra_Ma...,,,[Aethan Velaryon],
49,Alayne,[House Baelish],,http://awoiaf.westeros.org/index.php/Alayne_Ba...,,,[Baelish (lord)],
57,Alester Florent,"[House Florent, House Tyrell, House Baratheon ...",,http://awoiaf.westeros.org/index.php/Alester_F...,,,[Melara Crane],
73,Alyn Tarbeck,[House Tarbeck],,http://awoiaf.westeros.org/index.php/Alyn_Tarbeck,,,[Jeyne Westerling (wife of Maegor I)],


In [11]:
# Get spouses that have a spouse present in the dataset with all 
# parent-child relationships

parent_child_spouses = df.explode('spouse').dropna(subset=['spouse'])['spouse'].unique()
data_spouse_family = data_with_spouses[data_with_spouses['name'].isin(parent_child_spouses)]
data_spouse_family.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 202 entries, 46 to 1342
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           202 non-null    object
 1   allegiance     192 non-null    object
 2   father         0 non-null      object
 3   url            202 non-null    object
 4   fatherRumored  0 non-null      object
 5   mother         0 non-null      object
 6   spouse         202 non-null    object
 7   motherRumored  0 non-null      object
dtypes: object(8)
memory usage: 14.2+ KB


In [12]:
data_spouse_family.head()

Unnamed: 0,name,allegiance,father,url,fatherRumored,mother,spouse,motherRumored
46,Alarra Massey,"[House Massey, House Velaryon]",,http://awoiaf.westeros.org/index.php/Alarra_Ma...,,,[Aethan Velaryon],
57,Alester Florent,"[House Florent, House Tyrell, House Baratheon ...",,http://awoiaf.westeros.org/index.php/Alester_F...,,,[Melara Crane],
73,Alyn Tarbeck,[House Tarbeck],,http://awoiaf.westeros.org/index.php/Alyn_Tarbeck,,,[Jeyne Westerling (wife of Maegor I)],
77,Alys Arryn (wife of Rhaegel),"[House Arryn, House Targaryen]",,http://awoiaf.westeros.org/index.php/Alys_Arry...,,,[Rhaegel Targaryen],
78,Alys Beesbury,"[House Beesbury, House Tyrell]",,http://awoiaf.westeros.org/index.php/Alys_Bees...,,,[Leo Tyrell (son of Victor)],


In [13]:
all_usable_data = pd.concat([data_with_children, data_spouse_family, data_with_relation])
len(all_usable_data)

1484

In [14]:
all_usable_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1484 entries, 72 to 1343
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           1484 non-null   object
 1   allegiance     1345 non-null   object
 2   father         941 non-null    object
 3   url            1484 non-null   object
 4   fatherRumored  11 non-null     object
 5   mother         490 non-null    object
 6   spouse         725 non-null    object
 7   motherRumored  2 non-null      object
dtypes: object(8)
memory usage: 104.3+ KB


In [15]:
first_layer = all_usable_data[(all_usable_data['father'].isnull()) & (all_usable_data['mother'].isnull())]
first_layer.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 506 entries, 72 to 1342
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           506 non-null    object
 1   allegiance     459 non-null    object
 2   father         0 non-null      object
 3   url            506 non-null    object
 4   fatherRumored  0 non-null      object
 5   mother         0 non-null      object
 6   spouse         470 non-null    object
 7   motherRumored  0 non-null      object
dtypes: object(8)
memory usage: 35.6+ KB


In [16]:
all_usable_data.explode('allegiance').dropna(subset=['allegiance']).value_counts('allegiance').head(50)

allegiance
House Frey                        163
House Targaryen                   142
House Stark                       110
House Lannister                    82
Blacks                             53
House Tyrell                       52
House Hightower                    45
House Baratheon                    36
House Martell                      36
House Greyjoy                      32
House Velaryon                     28
House Arryn                        28
Greens                             23
House Blackwood                    18
House Tully                        18
House Manderly                     17
House Royce                        17
House Bracken                      17
House Redwyne                      16
House Florent                      16
Night's Watch                      16
House Tarbeck                      14
House Blackfyre                    14
House Waynwood                     14
House Baratheon of Dragonstone     13
Citadel                            13
H

In [17]:
# first_layer[first_layer['successor'].notna()]['successor'].head(100)

In [18]:
# Transform nodes into partnerships (= single or couple), 
# so that they will be easier to use in a hierarchical structure

all_with_spouse = all_usable_data[all_usable_data['spouse'].notnull()]
all_with_spouse.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 725 entries, 46 to 1342
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           725 non-null    object
 1   allegiance     695 non-null    object
 2   father         251 non-null    object
 3   url            725 non-null    object
 4   fatherRumored  2 non-null      object
 5   mother         149 non-null    object
 6   spouse         725 non-null    object
 7   motherRumored  0 non-null      object
dtypes: object(8)
memory usage: 51.0+ KB


In [19]:
all_no_spouse = all_usable_data[all_usable_data['spouse'].isna()]
all_no_spouse.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 759 entries, 72 to 1343
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           759 non-null    object
 1   allegiance     650 non-null    object
 2   father         690 non-null    object
 3   url            759 non-null    object
 4   fatherRumored  9 non-null      object
 5   mother         341 non-null    object
 6   spouse         0 non-null      object
 7   motherRumored  2 non-null      object
dtypes: object(8)
memory usage: 53.4+ KB


In [20]:
all_relations = all_with_spouse.explode('spouse').dropna(subset=['spouse'])
all_relations.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 761 entries, 46 to 1342
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           761 non-null    object
 1   allegiance     731 non-null    object
 2   father         291 non-null    object
 3   url            761 non-null    object
 4   fatherRumored  2 non-null      object
 5   mother         171 non-null    object
 6   spouse         761 non-null    object
 7   motherRumored  0 non-null      object
dtypes: object(8)
memory usage: 53.5+ KB


In [21]:
# all_relations.reset_index(inplace=True)
all_relations_with_spouse = pd.merge(all_relations, all_relations, how='left', left_on='spouse', right_on='name', suffixes=('', '_spouse'))
all_relations_with_spouse = all_relations_with_spouse.reset_index()
all_relations_with_spouse = pd.merge(all_relations_with_spouse, all_no_spouse, how='left', left_on='spouse', right_on='name', suffixes=('', '_no-spouse'))

all_relations_with_spouse.info(20)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1336 entries, 0 to 1335
Data columns (total 25 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   index                    1336 non-null   int64 
 1   name                     1336 non-null   object
 2   allegiance               1241 non-null   object
 3   father                   478 non-null    object
 4   url                      1336 non-null   object
 5   fatherRumored            2 non-null      object
 6   mother                   273 non-null    object
 7   spouse                   1336 non-null   object
 8   motherRumored            0 non-null      object
 9   name_spouse              1213 non-null   object
 10  allegiance_spouse        1123 non-null   object
 11  father_spouse            564 non-null    object
 12  url_spouse               1213 non-null   object
 13  fatherRumored_spouse     2 non-null      object
 14  mother_spouse            282 non-null   

In [22]:
all_relations_with_spouse = all_relations_with_spouse.drop_duplicates(subset=['name', 'father', 'mother', 'spouse', 'name_spouse', 'father_spouse', 'mother_spouse'], ignore_index=True)
all_relations_with_spouse.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 549 entries, 0 to 548
Data columns (total 25 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   index                    549 non-null    int64 
 1   name                     549 non-null    object
 2   allegiance               532 non-null    object
 3   father                   291 non-null    object
 4   url                      549 non-null    object
 5   fatherRumored            2 non-null      object
 6   mother                   171 non-null    object
 7   spouse                   549 non-null    object
 8   motherRumored            0 non-null      object
 9   name_spouse              444 non-null    object
 10  allegiance_spouse        430 non-null    object
 11  father_spouse            227 non-null    object
 12  url_spouse               444 non-null    object
 13  fatherRumored_spouse     2 non-null      object
 14  mother_spouse            141 non-null    o

In [23]:
# all_relations_with_spouse[all_relations_with_spouse['name_spouse'].isna()].info()

In [24]:
# all_relations_with_spouse[all_relations_with_spouse['name_no-spouse'].notna()].info()

In [25]:
# all_relations_with_spouse[all_relations_with_spouse['name_spouse'].isna()].sort_values('name').head(20)

In [26]:
# all_relations_with_spouse.reset_index(inplace=True)
# all_no_spouse.reset_index(inplace=True)
final_data = pd.concat([all_relations_with_spouse, all_no_spouse])
final_data.info()



<class 'pandas.core.frame.DataFrame'>
Int64Index: 1308 entries, 0 to 1343
Data columns (total 25 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   index                    549 non-null    float64
 1   name                     1308 non-null   object 
 2   allegiance               1182 non-null   object 
 3   father                   981 non-null    object 
 4   url                      1308 non-null   object 
 5   fatherRumored            11 non-null     object 
 6   mother                   512 non-null    object 
 7   spouse                   549 non-null    object 
 8   motherRumored            2 non-null      object 
 9   name_spouse              444 non-null    object 
 10  allegiance_spouse        430 non-null    object 
 11  father_spouse            227 non-null    object 
 12  url_spouse               444 non-null    object 
 13  fatherRumored_spouse     2 non-null      object 
 14  mother_spouse           

In [27]:
all_names = final_data['name']


mothers_no_primary_name = final_data[(final_data['mother'].notna() & ~final_data['mother'].isin(all_names))]['mother']
fathers_no_primary_name = final_data[(final_data['father'].notna() & ~final_data['father'].isin(all_names))]['father']
spouses_no_primary_name = final_data[(final_data['spouse'].notna() & ~final_data['spouse'].isin(all_names))]['spouse']

mothers_no_primary_name.drop_duplicates()

212                 Daenys Targaryen
231                  Marilda of Hull
255                    Catelyn Tully
272                Rohanne of Tyrosh
325              Goddess of the wind
364                   Rohanne Webber
388                      Dorna Swyft
411                   Olenna Redwyne
445                   Alayne Baelish
462                 Shaera Targaryen
471                 Valaena Velaryon
487                 Delonne Allyrion
524                  House Targaryen
27                    House Charlton
48                           Chataya
59                     Dorna Sarwyck
87                             Cissy
90                           Megette
127     House Fossoway of New Barrel
144                   Arwyn Oakheart
158                Bellegere Otherys
167                          Mhaegen
172                            Whore
174                Bellonara Otherys
177                         Dothraki
239                Melissa Blackwood
267                     Prostitution
4

In [44]:
fathers_no_primary_name.drop_duplicates().head(50)

219                    Jaehaerys I Targaryen
224                          Daemon Velaryon
225                             House Harlaw
232                             Jasper Arryn
233                           Lucas Harroway
234                         Rickard Karstark
236                           House Oakheart
237                            Lyonel Strong
238                         House Stackspear
239                          House Blackwood
261                           Rodrik Ryswell
262                             Raymar Royce
269              Rodrik Stark (son of Beron)
272                       Daemon I Blackfyre
278                                 Gulltown
280                            House Corbray
281                Lord Costayne (Daeron II)
287    Baelon Targaryen (son of Jaehaerys I)
295                         Vaemond Velaryon
297                            House Greyjoy
305                            Colin Florent
310                                   Bharbo
322       

In [35]:
spouses_no_primary_name.drop_duplicates().tail(16)

432                        Corpse queen
434                         House Uller
435                            Hellholt
448                       House Grafton
453                         Edmyn Tully
457                       Dorna Sarwyck
462                  Aerys II Targaryen
464    Aegon Targaryen (son of Aenys I)
480                  Daemon I Blackfyre
485                       House Belmore
499                 Arlan III Durrandon
510                      Roland I Arryn
523                                Jarl
526                        House Farman
540                      House Charlton
542                         House Rowan
Name: spouse, dtype: object

In [31]:
# Save final dataset
# all_usable_data.to_json('./ice_and_fire_final.json', orient='records')
final_data[['name', 'allegiance', 'url', 'mother', 'father', 'spouse', 'motherRumored', 'fatherRumored']].to_json('./ice_and_fire_final.json', orient='records')