## Foundations and Grantees Network Generation

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import altair as alt
import seaborn as sns

Read in data -- original is https://indiana-my.sharepoint.com/:x:/r/personal/fulton_iu_edu/Documents/Funder-Grantee%20Network/Data/2019%20Data%20for%20Visualizations%20and%20Network%20Analyses/2019%20Matches%20for%20Data%20Visualizations%20and%20Network%20Analyses_2022.9.29.xlsx?d=w157b25b52b5c46edaabd7ad48ad31177&csf=1&web=1&e=N4wcXo
"2019 Matches for Data Visualizations and Network Analyses_2022.9.29"

**Suggested to download as CSV

Each row of this datasource denotes a funding action from a foundation to a grantee.

In [2]:
foundations_raw_all = pd.read_csv("/Users/lelee1/Desktop/D592/Foundations/2019 Matches for Data Visualizations and Network Analyses_2022.9.29.csv")
print(foundations_raw_all.shape)
foundations_raw_all.head()

  foundations_raw_all = pd.read_csv("/Users/lelee1/Desktop/D592/Foundations/2019 Matches for Data Visualizations and Network Analyses_2022.9.29.csv")


(558319, 120)


Unnamed: 0,Amount,Grantee City,Grantee City_grantee,Grantee Name,Grantee Name_grantee,Grantee State,Grantee State_grantee,Grantee Zip,Match Type,ntee_full2,...,f_amt_assets_total_boy,f_amt_assets_total_3,f_amt_liabilities_total_boy,f_amt_liabilities_total,f_amt_assets_net,f_amt_assets_net_2,f_operating_fdtn,f_operating_fdtn_2,f_website_address,f_amt_assets_total_2
0,10000.0,hanover,hanover,hampshire cooperative nursery school,hampshire cooperative nursery school,nh,nh,3755.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
1,2550.0,gorham,gorham,north country education services agency,north country education services agency,nh,nh,3581.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
2,10000.0,lebanon,lebanon,friends of norris cotton cancer center,friends of norris cotton cancer center,nh,nh,3756.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
3,5000.0,lebanon,lebanon,friends of norris cotton cancer center,friends of norris cotton cancer center,nh,nh,3756.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
4,5000.0,lebanon,lebanon,friends of norris cotton cancer center,friends of norris cotton cancer center,nh,nh,3756.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0


In [3]:
## Rename columns to be uniform format -- lowercase & no spaces
foundations_raw_all.columns = ["_".join(x.split(" ")).lower() for x in foundations_raw_all.columns.tolist()]
foundations_raw_all.head()

Unnamed: 0,amount,grantee_city,grantee_city_grantee,grantee_name,grantee_name_grantee,grantee_state,grantee_state_grantee,grantee_zip,match_type,ntee_full2,...,f_amt_assets_total_boy,f_amt_assets_total_3,f_amt_liabilities_total_boy,f_amt_liabilities_total,f_amt_assets_net,f_amt_assets_net_2,f_operating_fdtn,f_operating_fdtn_2,f_website_address,f_amt_assets_total_2
0,10000.0,hanover,hanover,hampshire cooperative nursery school,hampshire cooperative nursery school,nh,nh,3755.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
1,2550.0,gorham,gorham,north country education services agency,north country education services agency,nh,nh,3581.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
2,10000.0,lebanon,lebanon,friends of norris cotton cancer center,friends of norris cotton cancer center,nh,nh,3756.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
3,5000.0,lebanon,lebanon,friends of norris cotton cancer center,friends of norris cotton cancer center,nh,nh,3756.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0
4,5000.0,lebanon,lebanon,friends of norris cotton cancer center,friends of norris cotton cancer center,nh,nh,3756.0,Exact Zip,T20,...,46182976.0,75610714.0,7618.0,0.0,75610714.0,75610714.0,False,False,,0.0


In [4]:
# Create separate foundation & grantee dataframes from edgelist -- 
grantees = foundations_raw_all[['ein',
 'g_business_name',
 'g_ntee_full',
 'g_name_990',
 'g_name_line2_990',
 'g_formation_yr_990',
 'g_country_990',
 'g_city_990',
 'g_state_990',
 'g_zipcode_990',
 'g_taxexempt_type',
 'g_website_address_990',
 'g_amt_exp_total',
 'g_amt_assets_total',
 'g_amt_rev_total',
 'g_amt_rev_contrib_total',
 'g_num_volunteers']].rename(columns = {'ein':'ein',
 'g_business_name':'name',
 'g_ntee_full':'ntee',
 'g_name_990':'g_name_990',
 'g_name_line2_990':'g_name_line2_990',
 'g_formation_yr_990':'formation_yr',
 'g_country_990':'country',
 'g_city_990':'city',
 'g_state_990':'state',
 'g_zipcode_990':'zip',
 'g_taxexempt_type':'taxexempt_type',
 'g_website_address_990':'website',
 'g_amt_exp_total':'amt_exp_total',
 'g_amt_assets_total':'amt_assets_total',
 'g_amt_rev_total':'amt_rev_total',
 'g_amt_rev_contrib_total':'amt_rev_contrib_total',
 'g_num_volunteers':'num_volunteers'})

grantees.insert(0,'entity_type', 'grantee')

foundations = foundations_raw_all[[
 'foundation_ein',
 'f_business_name',
 'f_ntee_full',
 'f_city',
 'f_state',
 'f_zip',
 'f_website_address',
 'f_amt_exp_total',
 'f_amt_assets_total',
 'f_amt_rev_total']].rename(columns = {'foundation_ein':'ein',
 'f_business_name':'name',
 'f_ntee_full':'ntee',
 'f_city':'city',
 'f_state':'state',
 'f_zip':'zip',
 'f_website_address':'website',
 'f_amt_exp_total':'amt_exp_total',
 'f_amt_assets_total':'amt_assets_total',
 'f_amt_rev_total':'amt_rev_total'})
 
foundations.insert(0,'entity_type', 'foundation')

We need to get information about each entity to assign in the network representation, and thus need to pivot the wide-form raw data into a long-form unique attribute dataframe

In [118]:
# Concatenate foundations & grantees into a total entity dataframe
all_entities = pd.concat([grantees, foundations], ignore_index = True).drop_duplicates()

# Examine entries that demonstrate the same EIN but different attributes
all_entities.groupby(['ein']).filter(lambda x: len(x) > 1).sort_values('ein')

Unnamed: 0,entity_type,ein,name,ntee,g_name_990,g_name_line2_990,formation_yr,country,city,state,zip,taxexempt_type,website,amt_exp_total,amt_assets_total,amt_rev_total,amt_rev_contrib_total,num_volunteers
300,grantee,10211565,Waynflete School,B20,,,1935.0,,Portland,ME,4102.0,False,Www.Waynflete.Org,21123544.0,56687644.0,20348490.0,1273174.0,50.0
639,grantee,10211565,,,,,,,,,,,,,,,,
684,grantee,10278395,Woodfords Family Services,B28,,,1977.0,,Westbrook,ME,4092.0,False,Woodfords.Org,31196565.0,9157848.0,28585212.0,368291.0,11.0
208,grantee,10278395,,,,,,,,,,,,,,,,
194748,grantee,10280225,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1109834,foundation,954692410,Terasaki Nibei Foundation Fka Nibei Foundation,T20,,,,,Los Angeles,CA,90024.0,,terasaki.org/nibei,80841.0,6040712.0,6112989.0,,
151005,grantee,954804431,,,,,,,,,,,,,,,,
1110899,foundation,954804431,The Lawrence Foundation,T20,,,,,Santa Monica,CA,90408.0,,www.thelawrencefoundation.org,310005.0,3855161.0,287504.0,,
187559,grantee,990268061,,,,,,,,,,,,,,,,


Utilize EIN as a unique identifier for each entity. 

In [125]:
# Aggregate all entity attributes into sets (unique values)
all_entities_grouped = all_entities.astype(str).groupby(['ein']).agg(set).reset_index()
all_entities_grouped.head()

Unnamed: 0,ein,entity_type,name,ntee,g_name_990,g_name_line2_990,formation_yr,country,city,state,zip,taxexempt_type,website,amt_exp_total,amt_assets_total,amt_rev_total,amt_rev_contrib_total,num_volunteers
0,1,{grantee},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan}
1,100000085,{grantee},{Greater Rochester Enterprise Foundation Inc},{S31},{nan},{nan},{2002.0},{nan},{Rochester},{NY},{14604.0},{False},{Www.Rochesterbiz.Com},{1607806.0},{196436.0},{1497294.0},{1497001.0},{44.0}
2,100000573,{foundation},{The Couch Family Foundation C/O Mott Philanth...,{T20},{nan},{nan},{nan},{nan},{Boston},{MA},{2199.0},{nan},{nan},{3294710.0},{94849383.0},{34816817.0},{nan},{nan}
3,100000800,{grantee},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan},{nan}
4,100002102,{grantee},{Everybody Wins Vermont Inc},{O50},{nan},{nan},{2001.0},{nan},{Montpelier},{VT},{5601.0},{False},{Www.Everybodywinsvermont.Org},{314926.0},{50647.0},{232940.0},{232249.0},{600.0}


In [126]:
# Iterate through attribute columns & get unique, non-null values from the sets
for c in all_entities_grouped.columns.tolist()[1:]:
    all_entities_grouped[c] = all_entities_grouped[c].apply(lambda x: [i for i in x if i != 'nan'][0] if len([i for i in x if i != 'nan']) == 1 else (np.nan if len([i for i in x if i != 'nan']) == 0 else [i for i in x if i != 'nan']))

all_entities_grouped = all_entities_grouped.astype(all_entities.dtypes.to_dict()).astype({'entity_type':str})

In [140]:
all_entities_grouped.groupby(['ein']).filter(lambda x: len(x) > 1)

Unnamed: 0,ein,entity_type,name,ntee,g_name_990,g_name_line2_990,formation_yr,country,city,state,zip,taxexempt_type,website,amt_exp_total,amt_assets_total,amt_rev_total,amt_rev_contrib_total,num_volunteers


there now aren't any conflicting attribute assignments

In [131]:
# Create an edgelist for networkx
edgelist = foundations_raw_all[['amount','ein','foundation_ein']]

In [196]:
# Construct networkx graph
Graph_f_g = nx.from_pandas_edgelist(edgelist, source = 'foundation_ein', target='ein',edge_attr=['amount'], create_using=nx.DiGraph())

In [193]:
# Set node attributes
ent_props = all_entities_grouped.drop_duplicates().reset_index(drop = True)
xattrs = {}
for x,row in ent_props.set_index('ein').iterrows():
    xattrs[x] = row.dropna().to_dict()
# xattrs = ent_props.set_index('ein').to_dict('index')
nx.set_node_attributes(Graph_f_g, pd.Series(xattrs))

In [199]:
# Get network summary info
nx.info(Graph_f_g)


  nx.info(Graph_f_g)


'DiGraph with 187181 nodes and 502456 edges'

In [200]:
# Write gml file
nx.write_gml(Graph_f_g, path = '/Users/lelee1/Desktop/D592/Foundations/foundations_grantees_network.gml')