In [4]:
import pandas as pd
import seaborn as sns
import networkx as nx
import numpy as np
from funding import core
import os

In [5]:
all2all_path = "../../data/NationalFunding/Data/DerivedData/Derived/dependence/pub_noforeign_fund_all2all.csv"
cntry_region_path = '../../data/NationalFunding/Data/AdditionalData/cntry_region.xlsx'
fund_frac_path="../../data/NationalFunding/Data/DerivedData/Derived/cntry_fund_frac.csv"
backbone_net_path = "../../data/NationalFunding/Data/DerivedData/Derived/all2all_backbone_alpha1.gexf"

In [6]:
all2all_df = pd.read_csv(all2all_path)

In [7]:
all2all_df=all2all_df[(all2all_df.source!=all2all_df.target)] #remove selfloop 
all2all_df=all2all_df[all2all_df.p!=0] #remove 0 weight
all2all_df=all2all_df[['source','target','p']]
all2all_df.columns=['source','target','weight']

In [11]:
fund_df=pd.read_csv(fund_frac_path)
fund_df = fund_df.groupby(['funder'])['cnt'].sum().reset_index(name='count')
fund_df['cnt_log']=np.log(fund_df['count'])
fund_df.head()

Unnamed: 0,funder,count,cnt_log
0,Afghanistan,3.089135,1.127891
1,Albania,17.032612,2.83513
2,Algeria,1824.735809,7.50919
3,Andorra,27.34127,3.308397
4,Angola,24.67152,3.20565


In [12]:
cntry_region = pd.read_excel(cntry_region_path)
cntry_region = cntry_region.replace(to_replace={'region2':{'EU':'Europe','Other Europe':'Europe'}})
cntry_region.head()

Unnamed: 0,cntry,region1,eulabel,region,region2
0,Algeria,Africa,,Africa,Africa
1,Angola,Africa,,Africa,Africa
2,Belgian Congo,Africa,,Africa,Africa
3,Benin,Africa,,Africa,Africa
4,Botswana,Africa,,Africa,Africa


## create network

In [8]:
net = nx.DiGraph()
for i, row in all2all_df.iterrows():
    net.add_edge(row['source'], row['target'], weight=row['weight'])

## set the attributes of nodes

In [13]:
fund_dict=dict(zip(fund_df['funder'],fund_df['cnt_log']))
region_dict=dict(zip(cntry_region['cntry'],cntry_region['region2']))

In [14]:
nx.set_node_attributes(net,0,'fund')
nx.set_node_attributes(net,0,'region')
nx.set_node_attributes(net,fund_dict,'fund')
nx.set_node_attributes(net,region_dict,'region')

In [9]:
net_alpha = core.disparity_filter(net)

In [28]:
def extract_alpha(path):
    filename, extension = os.path.splitext(os.path.basename(backbone_net_path))
    alpha=filename.split("_")[-1].replace("alpha","")
    if len(alpha)==4:
        alpha = int(alpha)/1000
    elif len(alpha)==3:
        alpha = int(alpha)/100
    elif len(alpha)==1:
        alpha = int(alpha)
    else:
        print("invalid alpha value")
    return alpha

In [33]:
alpha = extract_alpha(backbone_net_path)
selected_edges = [(u, v) for u, v, attrs in net_alpha.edges(data=True) if attrs.get('alpha_in', 0) < alpha]
selected_graph = net.edge_subgraph(selected_edges)

In [None]:
nx.write_gexf(selected_graph, backbone_net_path)