<a href="https://colab.research.google.com/github/ADRopentech/International_Trade_Network_Analysis/blob/main/Processing_by_quantity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install networkx



In [None]:
import pandas as pd
import numpy as np
import math
import time
import networkx as nx

# Importing data

In [None]:
exports = pd.read_csv('exports_2022.csv')
imports = pd.read_csv('imports_2022.csv')

In [None]:
url_hs_codes = "https://raw.githubusercontent.com/ADRopentech/Visualizing-Global-Trade-Networks/main/comtrade_codes/harmonized-system.csv"
url_partner_codes = "https://raw.githubusercontent.com/ADRopentech/Visualizing-Global-Trade-Networks/main/comtrade_codes/partnerAreas.csv"
url_reporter_codes = "https://raw.githubusercontent.com/ADRopentech/Visualizing-Global-Trade-Networks/main/comtrade_codes/reporterAreas.csv"

commodities = pd.read_csv(url_hs_codes)
partners = pd.read_csv(url_partner_codes)
reporters = pd.read_csv(url_reporter_codes)

In [None]:
display(reporters)

Unnamed: 0,id,text
0,all,All
1,4,Afghanistan
2,8,Albania
3,12,Algeria
4,20,Andorra
...,...,...
252,876,Wallis and Futuna Isds
253,887,Yemen
254,894,Zambia
255,716,Zimbabwe


# Segregating and processing data and creating nodes.

In [None]:
# @title total exports and imports
total_exports = exports[exports['partnerCode'] == 0][['reporterCode', 'partnerCode', 'netWgt']].groupby(['reporterCode', 'partnerCode']).agg('sum').reset_index()
total_imports = imports[imports['partnerCode'] == 0][['reporterCode', 'partnerCode', 'netWgt']].groupby(['reporterCode', 'partnerCode']).agg('sum').reset_index()

In [None]:
# @title exports and imports by country
exports_by_country = exports[exports['partnerCode'] != 0][['reporterCode', 'partnerCode', 'netWgt']].groupby(['reporterCode', 'partnerCode']).agg('sum').reset_index()
imports_by_country = imports[imports['partnerCode'] != 0][['reporterCode', 'partnerCode', 'netWgt']].groupby(['reporterCode', 'partnerCode']).agg('sum').reset_index()

In [None]:
# @title data cleaning and formatting
for dataset in [total_exports, total_imports, exports_by_country, imports_by_country]:
    for col in ['reporterCode', 'partnerCode']:
        dataset[col] = [
            '00' + str(x) if len(str(x)) == 1
            else '0' +  str(x) if len(str(x)) == 2
            else str(x)
            for x in dataset[col].tolist()
        ]

exports_by_country.columns = ['exporter', 'importer', 'quantity']
imports_by_country.columns = ['importer', 'exporter', 'quantity']
total_exports.columns = ['reporterCode', 'partnerCode', 'export_quantity']
total_imports.columns = ['reporterCode', 'partnerCode', 'import_quantity']

In [None]:
# @title official exports and imports reported by a country
total_trade = total_exports[['reporterCode','export_quantity']].set_index('reporterCode').join(
    total_imports[['reporterCode', 'import_quantity']].set_index('reporterCode'),how='outer').reset_index().rename(
        columns={'reporterCode': 'country_code'}).set_index('country_code')

In [None]:
for col in ['export_quantity', 'import_quantity']:
    total_trade[col + '_note'] = [1 if math.isnan(x) else 0 for x in total_trade[col].tolist()]

In [None]:
# @title countries exports according to its importers
imports_to_add = exports_by_country[['exporter', 'importer', 'quantity']].groupby('importer').agg('sum')

In [None]:
# @title countries imports according to its exporters
exports_to_add = imports_by_country[['importer', 'exporter', 'quantity']].groupby('exporter').agg('sum')

In [None]:
# @title total trade as reported by importers and exporters
trade_to_add = exports_to_add.rename(columns = {'quantity':'export_by_partners'}).join(imports_to_add.rename(columns = {'quantity':'import_by_partners'}),how = 'outer')

In [None]:
# @title total trade as officially and unofficially reported
df_nodes = total_trade.join(trade_to_add, how='outer')[['export_quantity', 'import_quantity', 'export_by_partners', 'import_by_partners']]
df_nodes[['export_quantity', 'import_quantity', 'export_by_partners','import_by_partners']] = df_nodes[['export_quantity', 'import_quantity', 'export_by_partners', 'import_by_partners']].fillna(0)

In [None]:
# @title correcting actual trade values
df_nodes['trade_exports'] = [
    ex if ex>=exp
    else exp for ex, im, exp, imp in zip(df_nodes['export_quantity'], df_nodes['import_quantity'], df_nodes['export_by_partners'], df_nodes['import_by_partners'])
]

df_nodes['trade_imports'] = [
    im if im>=imp
    else imp for ex, im, exp, imp in zip(df_nodes['export_quantity'], df_nodes['import_quantity'], df_nodes['export_by_partners'], df_nodes['import_by_partners'])
]

In [None]:
# @title actual total trade
df_nodes['trade'] = df_nodes['trade_exports'] + df_nodes['trade_imports']
df_nodes = df_nodes[df_nodes['trade']>0] # removing trades less than 0

In [None]:
# @title rescaling nodes
df_nodes['trade_rescaled'] = df_nodes['trade'] / df_nodes['trade'].max() * 60                            # Rescaling the sizes: max = 60 and min = 0.15
df_nodes['trade_rescaled'] = [ 0.15 if x <= 0.15 else x for x in df_nodes['trade_rescaled'].tolist()]

In [None]:
# @title creating nodes for network
nodes = df_nodes.index.values

# Creating Links

In [None]:
# @title Keep only countries with non-zero trade
export_by_country = exports_by_country[(exports_by_country['exporter'].isin(df_nodes.index.tolist())) & (exports_by_country['importer'].isin(df_nodes.index.tolist()))]
import_by_country = imports_by_country[(imports_by_country['exporter'].isin(df_nodes.index.tolist())) & (imports_by_country['importer'].isin(df_nodes.index.tolist()))]

In [None]:
df_list = []

for dataset in [export_by_country, import_by_country]:
  dataset['source_target'] = ['_'.join(sorted([exporter, importer]))
  for exporter, importer in zip(dataset['exporter'], dataset['importer'])
  ]

  dataset['order'] = dataset.groupby('source_target').cumcount()

  df = dataset[dataset['order']==0].set_index('source_target')[[
      'exporter', 'importer', 'quantity'
  ]].rename(columns = {
      'quantity' : 'to'
  }).join(dataset[dataset['order']==1].set_index('source_target')[['quantity']].rename(columns={'quantity':'back'}))

  df_list.append(df)

In [None]:
df_exp = export_by_country.sort_values(by = 'source_target')
df_imp = import_by_country.sort_values(by = 'source_target')

In [None]:
# @title creating final dataframe for link information
source_target_df = pd.merge(df_exp, df_imp, on=['exporter','importer','source_target'], how='outer', suffixes=('_exporter', '_importer')).fillna(0)

In [None]:
# @title highest trade as weights for each link
source_target_df['final_trade_quantity'] = [
    quantity_exporter if (quantity_exporter != 0) and (quantity_importer != 0) and (quantity_exporter>quantity_importer)
    else quantity_importer if (quantity_exporter != 0) and (quantity_importer != 0) and (quantity_exporter<quantity_importer)
    else quantity_exporter if (quantity_exporter != 0) and (quantity_importer == 0)
    else quantity_importer if (quantity_exporter == 0) and (quantity_importer != 0)
    else quantity_exporter
    for quantity_exporter, quantity_importer in zip(source_target_df['quantity_exporter'], source_target_df['quantity_importer'])
]

In [None]:
# @title removeing 0 trades
source_target_df = source_target_df[source_target_df['final_trade_quantity']>0]

In [None]:
# @title final links table
links_final = source_target_df[['exporter', 'importer', 'final_trade_quantity']].sort_values(by = ['exporter','importer'])

In [None]:
# @title Edges
edges = list(zip(links_final.exporter.values, links_final.importer.values, links_final.final_trade_quantity.values))

# Creating the network

In [None]:
G = nx.DiGraph()

In [None]:
G.add_nodes_from(nodes)

In [None]:
G.add_weighted_edges_from(edges)

In [None]:
#Number of nodes and links
print('Number of nodes:', G.number_of_nodes())
print('Number of links:', G.number_of_edges())
print('Density:', nx.density(G))

Number of nodes: 231
Number of links: 4806
Density: 0.09045736871823828


In [None]:
print('Is there any isolated node?', bool(list(nx.isolates(G))))
print('\n')
print('Is the graph strongly connected?', nx.is_strongly_connected(G))
print('Number of strongly connected components:', nx.number_strongly_connected_components(G))

Is there any isolated node? False


Is the graph strongly connected? False
Number of strongly connected components: 51


In [None]:
print('Is the graph weakly connected?', nx.is_weakly_connected(G))
print('Number of weakly connected components:', nx.number_weakly_connected_components(G))

Is the graph weakly connected? True
Number of weakly connected components: 1
