**Project Network Analysis**

Step 3.4: Unipartite Graph (Node Embedding + Role + ISF)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Required packages
!pip install fuzzywuzzy  &> /dev/null
!pip install Node2Vec &> /dev/null
!pip install graphrole &> /dev/null

In [None]:
from networkx.algorithms.isolate import isolates
import pandas as pd
import networkx as nx
from networkx.algorithms import bipartite
import os
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
import numpy as np
import matplotlib as mlt
from node2vec import Node2Vec
from graphrole import RecursiveFeatureExtractor, RoleExtractor
from sklearn.ensemble import IsolationForest

In [None]:
path = '/content/drive/MyDrive/3. IU Courses/Courses/5. Network Analysis/Project/Final/data'
output_path = '/content/drive/MyDrive/3. IU Courses/Courses/5. Network Analysis/Project/Final/output'

# Reading Procurement Data
wb_data = pd.read_excel(os.path.join(output_path, "WB_table.xlsx"))
sanctioned_data = pd.read_excel(os.path.join(path, "Sanctioned_List.xlsx"))

In [None]:
wb_data.head()

Unnamed: 0,Fiscal Year,Borrower Country,Supplier,Total Contract Amount (USD)
0,2022,Serbia,ASSECO SEE D.O.O.,3333598
1,2022,Ethiopia,"EPTISA SERVICIOS DE INGENIERIA, S.L. IN ASSOCI...",1851919
2,2022,China,"SHENZHEN CITY DONGSHEN ENGINEERING CO., LTD",14986361
3,2022,Turkey,KOLTEK MUSAVIRLIK A.S.,777600
4,2022,Chad,UNICEF,19326986


Finding Roles

In [None]:
year = 2018
wb_data_filter = wb_data[wb_data['Fiscal Year']==year]
# Grouping repeated transactions between country and suppliers
wb_data_group = wb_data_filter.groupby(['Fiscal Year','Borrower Country','Supplier'])['Total Contract Amount (USD)'].sum() 
wb_data_group = wb_data_group.reset_index()
# Setting up the network 
G = nx.Graph()
G.add_nodes_from(wb_data_group['Borrower Country'], bipartite='Country') 
G.add_nodes_from(wb_data_group['Supplier'], bipartite='Supplier') 
G.add_weighted_edges_from(zip(wb_data_group['Borrower Country'], wb_data_group['Supplier'], wb_data_group['Total Contract Amount (USD)']/1000000), weight = 'contract_amt')
G.to_undirected()

<networkx.classes.graph.Graph at 0x7fe894499e50>

In [None]:
# Conversion to projection network with sum of contract amount as weights
def my_weight(G, u, v, weight='contract_amt'):
    w = 0
    for nbr in set(G[u]) & set(G[v]):         
         w += G.edges[u,nbr].get(weight, 1) + G.edges[v, nbr].get(weight,1)        
    return w

supplier_nodes = [n for n in G.nodes() if G.nodes[n]['bipartite'] == 'Supplier'] 
Supplier_graph = bipartite.generic_weighted_projected_graph(G, nodes=supplier_nodes, weight_function=my_weight)
print(nx.info(Supplier_graph))

Name: 
Type: Graph
Number of nodes: 1725
Number of edges: 31872
Average degree:  36.9530


In [None]:
# extract features
feature_extractor = RecursiveFeatureExtractor(Supplier_graph)
features = feature_extractor.extract_features()

In [None]:
# assign node roles
role_extractor = RoleExtractor(n_roles=None)
role_extractor.extract_role_factors(features)
node_roles = role_extractor.roles
nx.set_node_attributes(Supplier_graph, node_roles, 'role' )
#print('\nNode role assignments:')
#print(node_roles)

#print('\nNode role membership by percentage:')
#print(role_extractor.role_percentage.round(2))

In [None]:
Supplier_graph.nodes(data=True)

NodeDataView({'AADA': {'bipartite': 'Supplier', 'role': 'role_0'}, 'AGA KHAN DEVELOPMENT NETWORK': {'bipartite': 'Supplier', 'role': 'role_0'}, 'ASTER PRIVATE LTD. OF INDIA': {'bipartite': 'Supplier', 'role': 'role_0'}, 'ATLAS AFROOZ SHARGH CO': {'bipartite': 'Supplier', 'role': 'role_0'}, 'BAKHTAR DEVELOPMENT NETWORK BDN': {'bipartite': 'Supplier', 'role': 'role_0'}, 'BANGLADESH RURAL ADVANCEMENT COMMITTEE (BRAC)': {'bipartite': 'Supplier', 'role': 'role_0'}, 'BRAC AFGHANISTAN': {'bipartite': 'Supplier', 'role': 'role_0'}, 'CHA': {'bipartite': 'Supplier', 'role': 'role_0'}, 'CONCERN': {'bipartite': 'Supplier', 'role': 'role_0'}, 'COOPERATIVE FOR ASSISTANCE AND RELIEF EVERYWHERE (CARE)': {'bipartite': 'Supplier', 'role': 'role_0'}, 'COORDINATION FOR HUMANITARIAN ASSISTANCE (CHA)': {'bipartite': 'Supplier', 'role': 'role_0'}, 'COORDINATION OF HUMANITARIAN ASSISTANT': {'bipartite': 'Supplier', 'role': 'role_0'}, 'CORDAID/AHDS': {'bipartite': 'Supplier', 'role': 'role_0'}, 'EUROSUPPORT (F

In [None]:
df = pd.DataFrame(Supplier_graph.nodes(data=True), columns= ['Supplier', 'Node_Info'])
df['role'] = None
for i in range(0, len(df)):
  df.iloc[i,2] = df.iloc[i,1]['role']

In [None]:
node2vec = Node2Vec(Supplier_graph, dimensions=20, walk_length=8, num_walks=10)

Computing transition probabilities:   0%|          | 0/1725 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [00:07<00:00,  1.34it/s]


In [None]:
# Learn embeddings 
model = node2vec.fit(window=10, min_count=1)

In [None]:
df1 = pd.concat([ pd.DataFrame(model.wv.index2word, columns=['Supplier']), pd.DataFrame(model.wv.vectors)], axis =1)
df1 = df1.rename({0: 'E0', 1: 'E1', 2: 'E2', 3:'E3', 4: 'E4', 5: 'E5', 6: 'E6', 7: 'E7', 8: 'E8', 9: 'E9', 10: 'E10', 11: 'E11', 
           12:'E12', 13:'E13', 14:'E14', 15:'E15', 16:'E16', 17:'E17', 18:'E18', 19:'E19'}, axis=1)

In [None]:
df_all = df1.merge(df, how ="left", on = "Supplier")
df_all = pd.get_dummies(df_all, columns=["role"], prefix="assigned")
df_all.head()

Unnamed: 0,Supplier,E0,E1,E2,E3,E4,E5,E6,E7,E8,...,E17,E18,E19,Node_Info,assigned_role_0,assigned_role_1,assigned_role_2,assigned_role_3,assigned_role_4,assigned_role_7
0,LARSEN TOUBRO LIMITED,1.590433,-0.43185,0.552511,0.044907,-1.323549,0.733911,-1.304529,-0.621773,0.181361,...,-0.253392,0.988759,1.063308,"{'bipartite': 'Supplier', 'role': 'role_3'}",0,0,0,1,0,0
1,THALES ESPANA GRP S.A.U,1.212893,-0.574359,0.007413,0.885817,-1.540424,0.801275,-0.797559,-1.010051,-0.174881,...,-0.526034,1.928375,1.727976,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0
2,ONUR TAAHHUT TASIMACILIK INSAAT TICARET VE SANAYI,0.659058,-0.406041,-0.53646,0.416689,-2.395493,0.715493,-1.584321,-0.273567,0.268433,...,0.337579,0.102912,0.947021,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0
3,"MOTA-ENGIL ENGEHARIA E CONSTRUCAO, AFRICA, SA",0.272854,-2.325122,0.200857,1.221961,-1.538408,0.653042,0.554232,1.24388,1.460231,...,2.067526,-0.679576,1.3238,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0
4,CHINA CIVIL ENGINEERING CONSTRUCTION CORPORATI...,2.300497,-1.647159,0.549183,-0.082708,-2.522454,-1.002814,-0.853858,-1.538851,1.063821,...,2.083966,1.310974,0.221251,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0


**Random Forest**

In [None]:
random_state = np.random.RandomState(42)

In [None]:
isf_model=IsolationForest(n_estimators=100,max_samples='auto',contamination=float(0.05),random_state=random_state)

isf_model.fit(df_all[['assigned_role_0'	,'assigned_role_1'	,'assigned_role_2', 'assigned_role_3'	,'assigned_role_4','assigned_role_7','E0','E1',
                       'E2','E3','E4','E5','E6','E7','E8','E9','E10','E11','E12','E13','E14','E15','E16','E17','E18','E19']])

print(isf_model.get_params())

{'bootstrap': False, 'contamination': 0.05, 'max_features': 1.0, 'max_samples': 'auto', 'n_estimators': 100, 'n_jobs': None, 'random_state': RandomState(MT19937) at 0x7FE8234557C0, 'verbose': 0, 'warm_start': False}


In [None]:
df_all['scores'] = isf_model.decision_function(df_all[['assigned_role_0'	,'assigned_role_1'	,'assigned_role_2', 'assigned_role_3'	,'assigned_role_4','assigned_role_7','E0','E1',
                       'E2','E3','E4','E5','E6','E7','E8','E9','E10','E11','E12','E13','E14','E15','E16','E17','E18','E19']])

df_all['anomaly_score'] = isf_model.predict(df_all[['assigned_role_0'	,'assigned_role_1'	,'assigned_role_2', 'assigned_role_3'	,'assigned_role_4','assigned_role_7','E0','E1',
                       'E2','E3','E4','E5','E6','E7','E8','E9','E10','E11','E12','E13','E14','E15','E16','E17','E18','E19']])

df_all[df_all['anomaly_score']==-1].head()

Unnamed: 0,Supplier,E0,E1,E2,E3,E4,E5,E6,E7,E8,...,E19,Node_Info,assigned_role_0,assigned_role_1,assigned_role_2,assigned_role_3,assigned_role_4,assigned_role_7,scores,anomaly_score
0,LARSEN TOUBRO LIMITED,1.590433,-0.43185,0.552511,0.044907,-1.323549,0.733911,-1.304529,-0.621773,0.181361,...,1.063308,"{'bipartite': 'Supplier', 'role': 'role_3'}",0,0,0,1,0,0,-0.000537,-1
4,CHINA CIVIL ENGINEERING CONSTRUCTION CORPORATI...,2.300497,-1.647159,0.549183,-0.082708,-2.522454,-1.002814,-0.853858,-1.538851,1.063821,...,0.221251,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0,-0.024433,-1
13,"KOLIN INSAAT, TURIZM SANAYI VE TICARET A.S.",0.550805,-1.441068,-1.028714,1.231882,-1.69679,2.681681,1.434417,-1.953973,2.479467,...,0.816706,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0,-0.022767,-1
16,GROUPEMENT ZECO-ECCOMAR,0.978149,-0.660167,1.604586,1.586061,-0.917066,-1.1281,-0.218129,-1.614033,1.037548,...,1.705699,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0,-0.01431,-1
17,"CONSTRUCTORA MECO, S.A.",2.840266,0.111236,0.562744,2.058171,-1.312122,1.059655,-1.03672,-1.37909,1.227057,...,1.516687,"{'bipartite': 'Supplier', 'role': 'role_0'}",1,0,0,0,0,0,-0.023296,-1


In [None]:
for index, row in df_all.iterrows():
    Supplier_graph.nodes[row['Supplier']]['anomaly'] = row['anomaly_score']
 
Supplier_graph.nodes(data=True)

NodeDataView({'AADA': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'AGA KHAN DEVELOPMENT NETWORK': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'ASTER PRIVATE LTD. OF INDIA': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'ATLAS AFROOZ SHARGH CO': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'BAKHTAR DEVELOPMENT NETWORK BDN': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'BANGLADESH RURAL ADVANCEMENT COMMITTEE (BRAC)': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'BRAC AFGHANISTAN': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'CHA': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'CONCERN': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'COOPERATIVE FOR ASSISTANCE AND RELIEF EVERYWHERE (CARE)': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'COORDINATION FOR HUMANITARIAN ASSISTANCE (CHA)': {'bipartite': 'Supplier', 'role': 'role_0', 'anomaly': 1}, 'COORDINA

In [None]:
nx.write_gml(Supplier_graph, os.path.join(output_path, "0422_UNI_Role_Node2Vec_ISF_2018.gml"))

**Node2Vec To Find Most Similar Firms Which Are Sanctioned**

<font color = 'grey'>***Node embedding for each year***

In [None]:
def clean(text):
  #define special characters list
  special_characters = ['!','#','$','%', '&','@','[',']',' ',']','_', '/', '(', ')', "'", ",", "-", "."]
  # lambda and join function
  clean_string = ''.join(filter(lambda i:i not in special_characters, text))     
  return clean_string

In [None]:
Common_list = pd.read_excel(os.path.join(path, 'Common_List.xlsx'))
node_list = Common_list['Common_list'].to_list()

In [None]:
# Doing the anlaysis by year (bipartite)
# node_list = ['TRACTEBEL ENGINEERING S.A.']

history = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
# history = [2021]
#fig = plt.figure("Degree of a Network Science graph", figsize=(12, 6))
# Create a gridspec for adding subplots of different sizes

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(9,6))  
# ax = axes.flatten()

i = 0
for year in history:
  wb_data_filter = wb_data[wb_data['Fiscal Year']==year]
  # Grouping repeated transactions between country and suppliers
  wb_data_group = wb_data_filter.groupby(['Fiscal Year','Borrower Country','Supplier'])['Total Contract Amount (USD)'].sum() 
  wb_data_group = wb_data_group.reset_index()
  # Setting up the network 
  G = nx.Graph()
  G.add_nodes_from(wb_data_group['Borrower Country'], bipartite='Country') 
  G.add_nodes_from(wb_data_group['Supplier'], bipartite='Supplier') 
  G.add_weighted_edges_from(zip(wb_data_group['Borrower Country'], wb_data_group['Supplier'], wb_data_group['Total Contract Amount (USD)']/1000000), weight = 'contract_amt')
  G.to_undirected()
  #print(year)

  # Making projection network
  supplier_nodes = [n for n in G.nodes() if G.nodes[n]['bipartite'] == 'Supplier'] 
  Supplier_graph = bipartite.generic_weighted_projected_graph(G, nodes=supplier_nodes, weight_function=my_weight)
  
  # Node2Vec Embedding
  similar_node = []
  node2vec = Node2Vec(Supplier_graph, dimensions=20, walk_length=8, num_walks=1)
  # Learn embeddings 
  model = node2vec.fit(window=10, min_count=1)

  for sanc_node in node_list:
    # print(sanc_node)
    if sanc_node in set(G.nodes()):
      # Finding Node Embedding if sanctioned firm had a contract in given year       
      for node, _ in model.most_similar(sanc_node):
        similar_node.append(node)

      # Building graph of most similar supplier in each year
      G_node_neighborhood = []
      top_node = similar_node[0] # Topmost Similar node
      
      for n1 in Supplier_graph.neighbors(top_node):
        G_node_neighborhood.append(n1)        
        for n2 in Supplier_graph.neighbors(n1):
          G_node_neighborhood.append(n2)          
    
      closest_ngh_graph = Supplier_graph.subgraph(G_node_neighborhood + [top_node])
  
      # Subgraph of selected node and its 1st and 2nd neighbors 
      # Specify colors
      cmap = mlt.colors.ListedColormap(['C0', 'darkorange'])
      Gcc = closest_ngh_graph.subgraph(sorted(nx.connected_components(closest_ngh_graph), key=len, reverse=True)[0])
      node_clr =[]
      labels = {}
      for node_name, attrb in Gcc.nodes(data=True):
        if node_name == top_node:
          node_clr.append('red')
          labels[node_name] = node_name
        else:
          if attrb['bipartite'] =='Country':
            node_clr.append('darkorange')
            labels[node_name] = node_name 
          else:
            node_clr.append('skyblue')
      
      fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12,6))        
      pos = nx.spring_layout(Gcc, seed=10396953, k=2)
      nx.draw_networkx_nodes(Gcc, pos, node_size=32, label=True, ax=ax, cmap=cmap, node_color = node_clr)
      nx.draw_networkx_edges(Gcc, pos, alpha=0.4, ax=ax)
      nx.draw_networkx_labels(Gcc ,pos, labels, font_size=10, 
                              font_color='red', ax=ax, alpha =.9, 
                              horizontalalignment ='center')
      ax.set_title(f"Graph of {top_node} in {year}: Similar to {sanc_node} ")
      ax.set_axis_off() 
      i +=1 
      plt.savefig(os.path.join(output_path, (clean(sanc_node) + "_" + str(year))))    



**Note**: Not very meaningful results from Unipartite graph.Hence, not discussed in PPT and project paper.