In [10]:
import pandas as pd
import seaborn as sns
import networkx as nx
import numpy as np
from funding import core

In [11]:
df_path = "../../data/NationalFunding/Data/DerivedData/Derived/dependence/pub_noforeign_fund_all2all.csv"

In [12]:
df = pd.read_csv(df_path)

In [13]:
df=df[(df.source!=df.target) & (df.p!=0)]
df=df[['source','target','p']]
df.columns=['source','target','weight']

In [16]:
net = nx.DiGraph()
for i, row in df.iterrows():
    net.add_edge(row['source'], row['target'], weight=row['weight'])


In [9]:
net_alpha = core.disparity_filter(net)

In [33]:
alpha = 0.005
selected_edges = [(u, v) for u, v, attrs in net_alpha.edges(data=True) if attrs.get('alpha_in', 0) < alpha]

In [34]:
selected_graph = net.edge_subgraph(selected_edges)

In [35]:
selected_graph.number_of_edges()

508

In [36]:
selected_graph.number_of_edges()/net.number_of_edges()

0.036350626118067976

In [37]:
selected_graph.number_of_nodes()/net.number_of_nodes()

0.8647342995169082

In [41]:
path = "selected_graph_alpha"+str(alpha)+".gexf"
nx.write_gexf(selected_graph, path)

In [17]:
fund_frac_path="../../data/NationalFunding/Data/DerivedData/Derived/cntry_fund_frac.csv"

In [18]:
fund_df=pd.read_csv(fund_frac_path)
fund_df.head()

Unnamed: 0,funder,year,intcol,cnt
0,Afghanistan,2010,1,1.0
1,Afghanistan,2011,1,0.333333
2,Afghanistan,2012,1,0.333333
3,Afghanistan,2015,0,0.148352
4,Afghanistan,2015,1,0.279167


In [19]:
fund_df = fund_df.groupby(['funder'])['cnt'].sum().reset_index(name='count')
fund_df['cnt_log']=np.log(fund_df['count'])
fund_df.head()

Unnamed: 0,funder,count,cnt_log
0,Afghanistan,3.089135,1.127891
1,Albania,17.032612,2.83513
2,Algeria,1824.735809,7.50919
3,Andorra,27.34127,3.308397
4,Angola,24.67152,3.20565


In [23]:
set(net.nodes())-set(fund_df.funder.unique())

{'Antarctica',
 'Antigua & Barbuda',
 'Cape Verde',
 'Comoros',
 'Crimea',
 'Eritrea',
 'Federated States of Micronesia ',
 'Marshall Islands',
 'Nauru',
 'Netherlands-Antilles',
 'Niue',
 'Saint-Vincent-et-les-Grenadines',
 'Sao Tome & Principe',
 'St-Lucia',
 'Turkmenistan',
 'Tuvalu'}

In [31]:
df[df.target=='Antarctica']

Unnamed: 0,source,target,weight
1003,EU,Antarctica,0.1
1012,France,Antarctica,0.3
1018,Germany,Antarctica,0.1
1037,Italy,Antarctica,0.2
1136,United States,Antarctica,0.1


In [43]:
net_alpha['France']['Antarctica']

{'weight': 0.3, 'alpha_out': 0.0073, 'alpha_in': 0.1526}

In [24]:
cntry_region_path = '../../data/NationalFunding/Data/AdditionalData/cntry_region.xlsx'

In [25]:
cntry_region = pd.read_excel(cntry_region_path)
cntry_region = cntry_region.replace(to_replace={'region2':{'EU':'Europe','Other Europe':'Europe'}})
cntry_region.head()

Unnamed: 0,cntry,region1,eulabel,region,region2
0,Algeria,Africa,,Africa,Africa
1,Angola,Africa,,Africa,Africa
2,Belgian Congo,Africa,,Africa,Africa
3,Benin,Africa,,Africa,Africa
4,Botswana,Africa,,Africa,Africa


In [26]:
fund_dict=dict(zip(fund_df['funder'],fund_df['cnt_log']))
region_dict=dict(zip(cntry_region['cntry'],cntry_region['region2']))

In [27]:
nx.set_node_attributes(net,0,'fund')
nx.set_node_attributes(net,0,'region')
nx.set_node_attributes(net,fund_dict,'fund')
nx.set_node_attributes(net,region_dict,'region')

In [29]:
net.nodes['Comoros']

{'fund': 0, 'region': 'Africa'}