# Trade Under Pressure



## Import packages

In [2]:
import pandas as pa

## Data preprocessing
### Dist CEPII

In [3]:
# Loading and preprocessing Dist CEPII dataset
dist_cepii = pa.read_excel("./data/geographic/dist_cepii.xls",
                           index_col=None, header=0,
                           na_values=["."], verbose=True, decimal=",",
                           dtype={"contig": bool, "comlang_off": bool,
                                  "comlang_ethno": bool, "colony": bool,
                                  "smctry": bool}
                           )
dist_cepii = dist_cepii.drop(["comcol", "curcol", "col45"], axis=1)
dist_cepii = dist_cepii.rename(columns={"iso_o": "origin", "iso_d": "destination"})

Reading sheet 0


Inspect DataFrame properties

In [4]:
dist_cepii.head(n=50)

Unnamed: 0,origin,destination,contig,comlang_off,comlang_ethno,colony,smctry,dist,distcap,distw,distwces
0,ABW,ABW,False,False,False,False,False,5.225315,5.225315,25.09354,23.04723
1,ABW,AFG,False,False,False,False,False,13257.81,13257.81,13168.22,13166.37
2,ABW,AGO,False,False,False,False,False,9516.913,9516.913,9587.316,9584.193
3,ABW,AIA,False,False,True,False,False,983.2682,983.2682,976.8974,976.8916
4,ABW,ALB,False,False,False,False,False,9091.742,9091.742,9091.576,9091.466
5,ABW,AND,False,True,False,False,False,7572.788,7572.788,7570.084,7570.083
6,ABW,ANT,False,True,True,False,True,136.3848,136.3848,239.9064,142.8583
7,ABW,ARE,False,False,False,False,False,12735.01,12735.01,12773.08,12772.95
8,ABW,ARG,False,True,False,False,False,5396.22,5396.22,5187.788,5157.126
9,ABW,ARM,False,False,False,False,False,11107.78,11107.78,11106.96,11106.76


In [5]:
dist_cepii.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50176 entries, 0 to 50175
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   origin         50176 non-null  object 
 1   destination    50176 non-null  object 
 2   contig         50176 non-null  bool   
 3   comlang_off    50176 non-null  bool   
 4   comlang_ethno  50176 non-null  bool   
 5   colony         50176 non-null  bool   
 6   smctry         50176 non-null  bool   
 7   dist           50176 non-null  float64
 8   distcap        50176 non-null  float64
 9   distw          47961 non-null  float64
 10  distwces       47961 non-null  float64
dtypes: bool(5), float64(4), object(2)
memory usage: 2.5+ MB


In [6]:
dist_cepii.dtypes

origin            object
destination       object
contig              bool
comlang_off         bool
comlang_ethno       bool
colony              bool
smctry              bool
dist             float64
distcap          float64
distw            float64
distwces         float64
dtype: object

Check how many rows have NaN

In [7]:
dist_cepii.isna().sum()

origin              0
destination         0
contig              0
comlang_off         0
comlang_ethno       0
colony              0
smctry              0
dist                0
distcap             0
distw            2215
distwces         2215
dtype: int64

Count how many times a country has NaN in Origin and in Destination

In [8]:
na_rows = dist_cepii[dist_cepii.isna().any(axis=1)][["origin", "destination"]]
# Sanity check
na_rows.isna().sum()

origin         0
destination    0
dtype: int64

In [9]:
na_rows[["origin"]].value_counts()

origin
PCN       224
CCK       224
MAC       224
MSR       224
CXR       224
         ... 
GNQ         5
GRC         5
GRD         5
GRL         5
ZWE         5
Name: count, Length: 224, dtype: int64

Check how much % of rows are NaN from the total amount

In [10]:
print("Percentage of NaN rows: ", (na_rows.shape[0]/dist_cepii.shape[0])*100, "%")

Percentage of NaN rows:  4.414461096938775 %


Drop NaN values

In [11]:
dist_cepii = dist_cepii.dropna()

In [12]:
# Sanity check
dist_cepii.isna().any()

origin           False
destination      False
contig           False
comlang_off      False
comlang_ethno    False
colony           False
smctry           False
dist             False
distcap          False
distw            False
distwces         False
dtype: bool

Check how many unique countries there are in both origin and destination. Numbers should match.

In [13]:
print("Unique countries in origin column", dist_cepii["origin"].nunique())
print("Unique countries in destination column", dist_cepii["destination"].nunique())

Unique countries in origin column 219
Unique countries in destination column 219


### GSDB V3 Dyadic

Inspect the `GSDB_V3_Dyadic.dta` - this should be a Stata file. What is the difference to `GSDB_V3.xls`?

In [14]:
gsdb_stata = pa.read_stata("data/sanctions/GSDB_V3_Dyadic.dta")
gsdb_xls = pa.read_excel("data/sanctions/GSDB_V3.xls")

Retrieve rows that include `case_id` 471

In [15]:
gsdb_stata[gsdb_stata["case_id"].astype(str).str.contains("471")]

Unnamed: 0,case_id,sanctioning_state_iso3,sanctioning_state,sanctioned_state_iso3,sanctioned_state,year,arms,military,trade,descr_trade,financial,travel,other,target_mult,sender_mult,objective,success
0,471,AFG,Afghanistan,AGO,Angola,1993-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
1,471,AFG,Afghanistan,AGO,Angola,1994-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
2,471,AFG,Afghanistan,AGO,Angola,1995-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
3,471,AFG,Afghanistan,AGO,Angola,1996-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
4,471573,AFG,Afghanistan,AGO,Angola,1997-01-01,1,1,1,exp_part,0,1,1,0,1,"end_war,end_war","success_total,success_total"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150386,471594573,ZWE,Zimbabwe,AGO,Angola,1998-01-01,1,1,1,"exp_part,imp_part",1,1,1,0,1,"end_war,end_war,end_war","success_total,success_total,success_total"
150387,573471594,ZWE,Zimbabwe,AGO,Angola,1999-01-01,1,1,1,"exp_part,imp_part",1,1,1,0,1,"end_war,end_war,end_war","success_total,success_total,success_total"
150388,471594573,ZWE,Zimbabwe,AGO,Angola,2000-01-01,1,1,1,"exp_part,imp_part",1,1,1,0,1,"end_war,end_war,end_war","success_total,success_total,success_total"
150389,594471573,ZWE,Zimbabwe,AGO,Angola,2001-01-01,1,1,1,"exp_part,imp_part",1,1,1,0,1,"end_war,end_war,end_war","success_total,success_total,success_total"


In [16]:
gsdb_stata.head(500)

Unnamed: 0,case_id,sanctioning_state_iso3,sanctioning_state,sanctioned_state_iso3,sanctioned_state,year,arms,military,trade,descr_trade,financial,travel,other,target_mult,sender_mult,objective,success
0,471,AFG,Afghanistan,AGO,Angola,1993-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
1,471,AFG,Afghanistan,AGO,Angola,1994-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
2,471,AFG,Afghanistan,AGO,Angola,1995-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
3,471,AFG,Afghanistan,AGO,Angola,1996-01-01,1,1,1,exp_part,0,0,0,0,1,end_war,success_total
4,471573,AFG,Afghanistan,AGO,Angola,1997-01-01,1,1,1,exp_part,0,1,1,0,1,"end_war,end_war","success_total,success_total"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,738869754,AFG,Afghanistan,SDN,Sudan,2010-01-01,1,1,0,,1,1,0,0,1,"human_rights,end_war,human_rights,end_war,huma...","ongoing,ongoing,ongoing,ongoing,ongoing,ongoing"
496,738754869,AFG,Afghanistan,SDN,Sudan,2011-01-01,1,1,0,,1,1,0,0,1,"human_rights,end_war,human_rights,end_war,huma...","ongoing,ongoing,ongoing,ongoing,ongoing,ongoing"
497,754738869,AFG,Afghanistan,SDN,Sudan,2012-01-01,1,1,0,,1,1,0,0,1,"human_rights,end_war,human_rights,end_war,huma...","ongoing,ongoing,ongoing,ongoing,ongoing,ongoing"
498,869754738,AFG,Afghanistan,SDN,Sudan,2013-01-01,1,1,0,,1,1,0,0,1,"human_rights,end_war,human_rights,end_war,huma...","ongoing,ongoing,ongoing,ongoing,ongoing,ongoing"


In [17]:
gsdb_xls.head(500)

Unnamed: 0,case_id,sanctioned_state,sanctioning_state,begin,end,trade,descr_trade,arms,military,financial,travel,other,target_mult,sender_mult,objective,success
0,1,German Democratic Republic,Germany,1949,1973,0,,0,0,0,0,1,0,0,territorial_conflict,success_total
1,2,Pakistan,India,1949,1951,1,"exp_compl, imp_compl",0,0,0,0,0,0,0,policy_change,nego_settlement
2,3,Bulgaria,United States,1950,1966,0,,0,0,0,0,1,0,0,destab_regime,failed
3,4,Bulgaria,United States,1950,1959,0,,0,0,0,1,0,0,0,destab_regime,success_part
4,5,Bulgaria,United States,1950,1963,0,,0,0,1,0,0,0,0,destab_regime,success_part
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,496,Russia,Ukraine,1993,1996,1,imp_part,0,1,0,0,0,0,0,policy_change,nego_settlement
496,497,Sudan,United States,1993,2020,0,,0,0,1,0,0,0,0,terrorism,success_part
497,498,Togo,EU,1993,2007,0,,0,0,1,0,0,0,1,"democracy,human_rights","success_total,success_total"
498,499,Togo,France,1993,2008,0,,0,0,1,0,0,0,0,"human_rights,democracy,end_war","success_total,success_total,success_total"


In [18]:
print("Shape for Stata data: ", gsdb_stata.shape)
print("Shape for XLS data:", gsdb_xls.shape)

Shape for Stata data:  (150875, 17)
Shape for XLS data: (1325, 16)


In [19]:
# Column names Stata
gsdb_stata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150875 entries, 0 to 150874
Data columns (total 17 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   case_id                 150875 non-null  object        
 1   sanctioning_state_iso3  150875 non-null  object        
 2   sanctioning_state       150875 non-null  object        
 3   sanctioned_state_iso3   150875 non-null  object        
 4   sanctioned_state        150875 non-null  object        
 5   year                    150875 non-null  datetime64[ns]
 6   arms                    150875 non-null  int8          
 7   military                150875 non-null  int8          
 8   trade                   150875 non-null  int8          
 9   descr_trade             150875 non-null  object        
 10  financial               150875 non-null  int8          
 11  travel                  150875 non-null  int8          
 12  other                   150875

In [20]:
gsdb_stata.dtypes

case_id                           object
sanctioning_state_iso3            object
sanctioning_state                 object
sanctioned_state_iso3             object
sanctioned_state                  object
year                      datetime64[ns]
arms                                int8
military                            int8
trade                               int8
descr_trade                       object
financial                           int8
travel                              int8
other                               int8
target_mult                         int8
sender_mult                         int8
objective                         object
success                           object
dtype: object

In [21]:
# Column names XLS
gsdb_xls.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1325 entries, 0 to 1324
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   case_id            1325 non-null   int64 
 1   sanctioned_state   1325 non-null   object
 2   sanctioning_state  1325 non-null   object
 3   begin              1325 non-null   int64 
 4   end                1325 non-null   int64 
 5   trade              1325 non-null   int64 
 6   descr_trade        513 non-null    object
 7   arms               1325 non-null   int64 
 8   military           1325 non-null   int64 
 9   financial          1325 non-null   int64 
 10  travel             1325 non-null   int64 
 11  other              1325 non-null   int64 
 12  target_mult        1325 non-null   int64 
 13  sender_mult        1325 non-null   int64 
 14  objective          1325 non-null   object
 15  success            1325 non-null   object
dtypes: int64(11), object(5)
memory usage: 165.

In [22]:
gsdb_xls.dtypes

case_id               int64
sanctioned_state     object
sanctioning_state    object
begin                 int64
end                   int64
trade                 int64
descr_trade          object
arms                  int64
military              int64
financial             int64
travel                int64
other                 int64
target_mult           int64
sender_mult           int64
objective            object
success              object
dtype: object

## GDP Data

In [23]:
gdp = pa.read_csv(filepath_or_buffer="data/economic/GDP(currentUSD)_1974-2023.csv", sep=",", na_values=[".."])

In [24]:
gdp = gdp.drop(["Series Name", "Series Code", "Country Name"], axis=1)

In [25]:
gdp = gdp.rename(columns=lambda x: x if not x.endswith("]") else x.split(" ")[0])

In [26]:
gdp.head(10)

Unnamed: 0,Country Code,1974,1975,1976,1977,1978,1979,1980,1981,1982,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,AFG,,,,,,,,,,...,20497130000.0,19134220000.0,18116570000.0,18753460000.0,18053220000.0,18799440000.0,19955930000.0,14260000000.0,14497240000.0,17233050000.0
1,ALB,,,,,,,1578102000.0,1808177000.0,1861163000.0,...,13228150000.0,11386850000.0,11861200000.0,13019730000.0,15379510000.0,15585110000.0,15241460000.0,18032010000.0,19017240000.0,23547180000.0
2,DZA,13209870000.0,15557900000.0,17728240000.0,20972110000.0,26364490000.0,33243710000.0,42345830000.0,44348590000.0,45207170000.0,...,238942700000.0,187493900000.0,180763800000.0,189880900000.0,194554500000.0,193459700000.0,164873400000.0,186231200000.0,225638500000.0,247626200000.0
3,ASM,,,,,,,,,,...,643000000.0,673000000.0,671000000.0,612000000.0,639000000.0,647000000.0,721000000.0,750000000.0,871000000.0,
4,AND,186557100.0,220112600.0,227283900.0,253997900.0,308020300.0,411548700.0,446377800.0,388983300.0,375914700.0,...,3271686000.0,2789881000.0,2896610000.0,3000162000.0,3218420000.0,3155149000.0,2891001000.0,3324648000.0,3380613000.0,3785067000.0
5,AGO,,,,,,,5930503000.0,5550483000.0,5550483000.0,...,135966800000.0,90496420000.0,52761620000.0,73690150000.0,79450690000.0,70897960000.0,48501560000.0,66505130000.0,104399700000.0,84824650000.0
6,ATG,,,,77496300.0,88033330.0,109585200.0,132440700.0,149377800.0,166425900.0,...,1378830000.0,1437756000.0,1489693000.0,1531152000.0,1661530000.0,1725352000.0,1410796000.0,1601367000.0,1867733000.0,2033085000.0
7,ARG,72436780000.0,52438650000.0,51169500000.0,56781000000.0,89049450000.0,69252330000.0,76961920000.0,78676840000.0,84307490000.0,...,526319700000.0,594749300000.0,557532300000.0,643628400000.0,524819900000.0,447754700000.0,385740500000.0,486564100000.0,632790100000.0,646075300000.0
8,ARM,,,,,,,,,,...,11609510000.0,10553340000.0,10546140000.0,11527460000.0,12457940000.0,13619290000.0,12641700000.0,13878910000.0,19513510000.0,24085750000.0
9,ABW,,,,,,,,,,...,2790850000.0,2962907000.0,2983635000.0,3092429000.0,3276184000.0,3395799000.0,2481857000.0,2929447000.0,3279344000.0,3648573000.0


In [27]:
gdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222 entries, 0 to 221
Data columns (total 51 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Country Code  217 non-null    object 
 1   1974          147 non-null    float64
 2   1975          150 non-null    float64
 3   1976          150 non-null    float64
 4   1977          153 non-null    float64
 5   1978          152 non-null    float64
 6   1979          153 non-null    float64
 7   1980          163 non-null    float64
 8   1981          163 non-null    float64
 9   1982          164 non-null    float64
 10  1983          164 non-null    float64
 11  1984          166 non-null    float64
 12  1985          168 non-null    float64
 13  1986          168 non-null    float64
 14  1987          174 non-null    float64
 15  1988          176 non-null    float64
 16  1989          176 non-null    float64
 17  1990          192 non-null    float64
 18  1991          193 non-null    

In [28]:
gdp.shape

(222, 51)

Check how many countries there are in the dataset.

In [29]:
gdp["Country Code"].nunique()

217

In [30]:
gdp["Country Code"].isna().sum()

np.int64(5)

Check countries that are in CEPII, but not in GDP dataset.

In [31]:
countries_unique_to_cepii = list(set(dist_cepii["origin"].unique()) - set(gdp["Country Code"].unique()))
countries_unique_to_cepii

['WLF',
 'NFK',
 'GUF',
 'ANT',
 'SHN',
 'COK',
 'REU',
 'AIA',
 'PAL',
 'TMP',
 'NIU',
 'TWN',
 'GLP',
 'TKL',
 'MTQ',
 'ROM',
 'SPM',
 'ZAR',
 'ESH',
 'FLK',
 'YUG']

Check countries that are in GDP, but not in CEPII dataset.

In [32]:
countries_unique_to_gdp = list(set(gdp["Country Code"].unique()) - set(dist_cepii["origin"].unique()))
countries_unique_to_gdp

[nan,
 'ROU',
 'PSE',
 'CUW',
 'MNE',
 'SSD',
 'IMN',
 'SRB',
 'GUM',
 'COD',
 'XKX',
 'TLS',
 'SXM',
 'MCO',
 'ASM',
 'CHI',
 'MAF',
 'VIR',
 'MAC',
 'LIE']

#### --- ENDE GELÄNDE ---