In [1]:
import json
import pandas as pd

from folium import plugins
import folium
from datetime import datetime


In [2]:
descendants = pd.read_csv("data/descendants.csv")

In [3]:
descendants["id"]

0             11
1          19930
2              4
3          19939
4              1
           ...  
256764     33501
256765     74611
256766     33507
256767    235565
256768    164293
Name: id, Length: 256769, dtype: int64

In [4]:
data = pd.read_csv("data/data.csv")

In [5]:
data.head()

Unnamed: 0,id,name,thesis,school,country,year,subject,advisors,students
0,11,Alfred E. Engelbrecht,Coupled Bending and Torsional Free Vibration o...,Iowa State University,UnitedStates,1949.0,,[1],[]
1,4,Charles Joseph Thorne,The Approximate Solution of Linear Differentia...,Iowa State University,UnitedStates,1941.0,,[239],"[19930, 19939]"
2,1,Ernest Willard Anderson,Statics of Special Types of Homogeneous Elasti...,Iowa State University,UnitedStates,1933.0,74—Mechanics of deformable solids,[258],"[11, 28, 31]"
3,7,Carl Eric Langenhop,Properties of Kernels of Integral Equations Wh...,Iowa State University,UnitedStates,1948.0,,[281],"[48, 104689, 38, 33, 3026, 39, 32, 35]"
4,6,William B. Stiles,Solutions of Clamped Plated Problems by Means ...,Iowa State University,UnitedStates,1945.0,,[258],[]


In [6]:
coords = pd.read_csv("data/coordinates.csv")

In [7]:
coords.head()

Unnamed: 0,school,lat,lon
0,Iowa State University,42.027961,-93.644737
1,University of Tennessee - Knoxville,35.951635,-83.930882
2,University of Illinois at Urbana-Champaign,40.099118,-88.238516
3,"University of California, Berkeley",37.8755,-122.239069
4,University of Wisconsin-Madison,43.080274,-89.430959


In [8]:
coords.shape

(3907, 3)

In [9]:
coords.dropna(inplace=True)

# Merging the descendants, coordinates and the genealogy data

In [10]:
merged = pd.merge(data,coords,on='school',how='left')
merged.shape


(256769, 11)

In [11]:
full_data = pd.merge(merged,descendants,on='id',how="left")
full_data.columns

Index(['id', 'name', 'thesis', 'school', 'country', 'year', 'subject',
       'advisors', 'students', 'lat', 'lon', 'descendants'],
      dtype='object')

In [12]:
full_data.shape

(256769, 12)

In [13]:
full_data.head()

Unnamed: 0,id,name,thesis,school,country,year,subject,advisors,students,lat,lon,descendants
0,11,Alfred E. Engelbrecht,Coupled Bending and Torsional Free Vibration o...,Iowa State University,UnitedStates,1949.0,,[1],[],42.027961,-93.644737,1
1,4,Charles Joseph Thorne,The Approximate Solution of Linear Differentia...,Iowa State University,UnitedStates,1941.0,,[239],"[19930, 19939]",42.027961,-93.644737,3
2,1,Ernest Willard Anderson,Statics of Special Types of Homogeneous Elasti...,Iowa State University,UnitedStates,1933.0,74—Mechanics of deformable solids,[258],"[11, 28, 31]",42.027961,-93.644737,4
3,7,Carl Eric Langenhop,Properties of Kernels of Integral Equations Wh...,Iowa State University,UnitedStates,1948.0,,[281],"[48, 104689, 38, 33, 3026, 39, 32, 35]",42.027961,-93.644737,38
4,6,William B. Stiles,Solutions of Clamped Plated Problems by Means ...,Iowa State University,UnitedStates,1945.0,,[258],[],42.027961,-93.644737,1


## Influential universities

In [14]:
univ_descendants = full_data[["school","lat","lon","descendants"]].groupby(['school'])
univ_descendants = univ_descendants["descendants"].agg(['sum']).reset_index()
univ_descendants.shape


(3911, 2)

In [15]:
univ_descendants.head()

Unnamed: 0,school,sum
0,(Candidate of Sciences) Novosibirsk State Univ...,4
1,(Candidate of Sciences) Sobolev Institute of M...,1
2,(Doctorat 3e cycle) École Normale Supérie,1
3,(Doctorat d'Etat) Université Paris Diderot - P...,1
4,(Doctorat d'Etat) École Polytechnique,1


In [16]:
inf_uni = pd.merge(univ_descendants,coords,on='school',how='left')
inf_uni.shape

(3911, 4)

In [17]:
inf_uni.head()

Unnamed: 0,school,sum,lat,lon
0,(Candidate of Sciences) Novosibirsk State Univ...,4,54.846317,83.09375
1,(Candidate of Sciences) Sobolev Institute of M...,1,55.049999,82.949997
2,(Doctorat 3e cycle) École Normale Supérie,1,48.821259,2.315226
3,(Doctorat d'Etat) Université Paris Diderot - P...,1,48.829723,2.380833
4,(Doctorat d'Etat) École Polytechnique,1,48.713268,2.210165


In [None]:
X = inf_uni[["lat","lon","sum"]]
X.dropna(inplace=True)
X

## Plotting

In [18]:
def plot_inf_univ(full_data, t0 = 1950, t1 = 1960, ignore_ones = False, show_map = True):
    period = full_data[ (full_data["year"]>=t0) & (full_data['year']<t1)]

    p = period[["school","lat","lon","descendants"]].groupby(['school'])
    p = p["descendants"].agg(['sum']).reset_index()
    p = pd.merge(p,coords,on='school',how='left')

    X = p[["lat","lon","sum"]]
    X.dropna(inplace=True)
    if ignore_ones:
        X = X[X["sum"] != 1]
        map = folium.Map(location = [5,30], tiles = "Cartodb dark_matter", zoom_start=2)

        heat_data = X.values.tolist()

        plugins.HeatMap(heat_data).add_to(map)

        map.save(f"heat_map_ignore_{t0}_to_{t1}.html")
        if show_map:
            display(map)
        return map
        
    else:
        map = folium.Map(location = [5,30], tiles = "Cartodb dark_matter", zoom_start=2)

        heat_data = X.values.tolist()

        plugins.HeatMap(heat_data).add_to(map)

        map.save(f"heat_map_{t0}_to_{t1}.html")
        if show_map:
            display(map)
        return map


In [19]:
map = plot_inf_univ(full_data, ignore_ones= True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.dropna(inplace=True)


# Heatmap with time experiments

In [21]:

L=[]
for t0 in range(1950,2020,5):
    period = full_data[ (full_data["year"]>=t0) & (full_data['year']<t0+5)]
    p = period[["school","lat","lon","descendants"]].groupby(['school'])
    p = p["descendants"].agg(['sum']).reset_index()
    p = pd.merge(p,coords,on='school',how='left')

    X = p[["lat","lon","sum"]]
    X.dropna(inplace=True)
    L.append(X.values.tolist())
    
map = folium.Map(location = [5,30], tiles = "Cartodb dark_matter", zoom_start=2)


plugins.HeatMapWithTime(L, auto_play=True, max_opacity=0.3).add_to(map)
map.save("heat_time.html")
map

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the c