In [1]:
import pandas as pd
import numpy as np

These notebook cleans data from the 'raw' subfolder and produces 20 csv files (13 dyad level and 7 node level)
Data cleaning steps:
   > - a. Renaming the exporter and importer columns
   > - b. Removing self loops
   > - c. Removing duplicate rows and keeping the first instance only
   > - d. Keeping only the necessary columns
   > - e. Keeping only the countries common in all datassets
   > - f. Sorting values by country pair and time period
   > - g. Reseting index
   > - h. Saving the data in the cleanind folder

### GHGFP: Emissions embodied in bilateral trade

In [2]:
co2 = pd.read_csv('../data/raw/DF_TRADE.csv')

In [3]:
co2.head()

Unnamed: 0,STRUCTURE,STRUCTURE_ID,ACTION,FREQ,TIME_PERIOD,MEASURE,EXPORTER,IMPORTER,ACTIVITY,PRODUCT_CATEGORY,UNIT_MEASURE,UNIT_MULT,OBS_VALUE
0,DATAFLOW,OECD.STI.PIE:DSD_ICIO_GHG_TRADE@DF_ICIO_GHG_TR...,R,A,1995,TRADE_GHG,ARG,ARG,A01_02,FNL,T_CO2E,6,0.0
1,DATAFLOW,OECD.STI.PIE:DSD_ICIO_GHG_TRADE@DF_ICIO_GHG_TR...,R,A,1996,TRADE_GHG,ARG,ARG,A01_02,FNL,T_CO2E,6,0.0
2,DATAFLOW,OECD.STI.PIE:DSD_ICIO_GHG_TRADE@DF_ICIO_GHG_TR...,R,A,1997,TRADE_GHG,ARG,ARG,A01_02,FNL,T_CO2E,6,0.0
3,DATAFLOW,OECD.STI.PIE:DSD_ICIO_GHG_TRADE@DF_ICIO_GHG_TR...,R,A,1998,TRADE_GHG,ARG,ARG,A01_02,FNL,T_CO2E,6,0.0
4,DATAFLOW,OECD.STI.PIE:DSD_ICIO_GHG_TRADE@DF_ICIO_GHG_TR...,R,A,1999,TRADE_GHG,ARG,ARG,A01_02,FNL,T_CO2E,6,0.0


In [4]:
co2['TIME_PERIOD'].unique()

array([1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
       2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
       2017, 2018, 2019, 2020], dtype=int64)

In [5]:
co2.shape

(22145760, 13)

In [6]:
co2 = co2[co2['EXPORTER'] != co2['IMPORTER']]

In [7]:
co2 = co2.rename(columns = {'EXPORTER': 'iso_o'})
co2 = co2.rename(columns = {'IMPORTER': 'iso_d'})

In [8]:
co2['ACTIVITY'].unique()

array(['A01_02', 'A03', 'B05_06', 'B07_08', 'B09', 'C10T12', 'C13T15',
       'C16', 'C17_18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25',
       'C26', 'C27', 'C28', 'C29', 'C30', 'C31T33', 'D', 'E', 'F', 'G',
       'H49', 'H50', 'H51', 'H52', 'H53', 'I', 'J58T60', 'J61', 'J62_63',
       'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', '_T', 'A', 'B',
       'C', 'C16T18', 'C19T23', 'C20_21', 'C24_25', 'C26_27', 'C29_30',
       'D_E', 'GTN', 'GTI', 'H', 'J', 'M_N', 'OTT', 'OTQ', 'RTT', 'R_S',
       'BTE', 'GTT', 'JTN', 'FTT', 'INFO'], dtype=object)

In [9]:
co2['ACTIVITY'].nunique()

70

In [10]:
co2['PRODUCT_CATEGORY'].unique()

array(['FNL', 'TTL'], dtype=object)

In [11]:
co2[(co2['iso_o'] == 'BGD') & (co2['iso_d'] == 'IND') & (co2['TIME_PERIOD'] == 2000) & (co2['ACTIVITY'] == '_T') & (co2['PRODUCT_CATEGORY'] == 'TTL')]

Unnamed: 0,STRUCTURE,STRUCTURE_ID,ACTION,FREQ,TIME_PERIOD,MEASURE,iso_o,iso_d,ACTIVITY,PRODUCT_CATEGORY,UNIT_MEASURE,UNIT_MULT,OBS_VALUE
18256165,DATAFLOW,OECD.STI.PIE:DSD_ICIO_GHG_TRADE@DF_ICIO_GHG_TR...,R,A,2000,TRADE_GHG,BGD,IND,_T,TTL,T_CO2E,6,0.123


Product category `TTL` contains both intermediate and final products. 
Activity `_T` is the sum of non-overlapping industry demand. I am keeping these two only.

In [12]:
co2_T = co2[(co2['TIME_PERIOD'] >= 2000) & (co2['ACTIVITY'] == '_T') & (co2['PRODUCT_CATEGORY'] == 'TTL')]

In [13]:
co2.shape

(21861840, 13)

In [14]:
co2_T.shape

(126126, 13)

In [15]:
print(co2_T['iso_o'].nunique())
print(co2_T['iso_d'].nunique())
print(co2_T['ACTIVITY'].nunique())
print(co2_T['PRODUCT_CATEGORY'].nunique())
print(co2_T['TIME_PERIOD'].nunique())
print(co2_T.groupby(['iso_o', 'iso_d']).ngroups)

78
78
1
1
21
6006


In [16]:
co2_T['TIME_PERIOD'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
      dtype=int64)

In [17]:
column = ['iso_o', 'iso_d', 'TIME_PERIOD']
dupli = co2_T.duplicated(subset = column, keep = 'last')

In [18]:
co2_uni = co2_T[~dupli]

In [19]:
co2_uni.shape #There is no duplicate.

(126126, 13)

In [20]:
co2_uni = co2_uni[['iso_o', 'iso_d', 'TIME_PERIOD', 'OBS_VALUE']]

In [21]:
co2_uni = co2_uni.reset_index(drop = True)

In [22]:
co2_uni = co2_uni[co2_uni['iso_o'] != 'W']
co2_uni = co2_uni[co2_uni['iso_d'] != 'W']

In [23]:
co2_uni.shape

(122892, 4)

In [24]:
print(co2_uni['iso_o'].nunique())
print(co2_uni['iso_d'].nunique())
print(co2_uni['TIME_PERIOD'].nunique())
print(co2_uni.groupby(['iso_o', 'iso_d']).ngroups)

77
77
21
5852


In [25]:
assert set(co2_uni['iso_o'].unique()) == set(co2_uni['iso_d'].unique()) #Exporters and importers have same set of countries.

In [26]:
countries = set(co2_uni['iso_o'].unique())

In [27]:
len(countries)

77

### Calculating Net Import of CO2 

In [28]:
co2_uni.head(2)

Unnamed: 0,iso_o,iso_d,TIME_PERIOD,OBS_VALUE
0,AUS,ARG,2000,0.393
1,AUS,ARG,2001,0.474


In [29]:
co2_export = co2_uni.groupby(['iso_o', 'TIME_PERIOD'])['OBS_VALUE'].sum().reset_index()

In [30]:
# sanity check
#co2_export[co2_export['iso_o'] == 'BGD']

In [31]:
print(co2_uni.shape)
print(co2_export.shape)
print(co2_uni['iso_o'].nunique())
print(co2_export['iso_o'].nunique())

(122892, 4)
(1617, 3)
77
77


In [32]:
co2_import = co2_uni.copy()

In [33]:
co2_import = co2_import.groupby(['iso_d', 'TIME_PERIOD'])['OBS_VALUE'].sum().reset_index()

In [34]:
co2_import.head(3)

Unnamed: 0,iso_d,TIME_PERIOD,OBS_VALUE
0,ARG,2000,44.666
1,ARG,2001,38.006
2,ARG,2002,17.889


In [35]:
co2_export.head(3)

Unnamed: 0,iso_o,TIME_PERIOD,OBS_VALUE
0,ARG,2000,48.636
1,ARG,2001,50.728
2,ARG,2002,87.26


In [39]:
co2_export[(co2_export['iso_o'] == 'ARG') & (co2_export['TIME_PERIOD'] == 2000)]

Unnamed: 0,iso_o,TIME_PERIOD,OBS_VALUE
0,ARG,2000,48.636


In [41]:
co2_import[(co2_import['iso_d'] == 'ARG') & (co2_import['TIME_PERIOD'] == 2000)]

Unnamed: 0,iso_d,TIME_PERIOD,OBS_VALUE
0,ARG,2000,44.666


In [609]:
#co2_import[(co2_import['iso_d'] == 'AUS') & (co2_import['TIME_PERIOD'] == 2008)]

In [42]:
co2_import = co2_import.rename(columns = {'iso_d': 'iso_o'})
co2_import = co2_import.rename(columns = {'OBS_VALUE': 'import'})
co2_export = co2_export.rename(columns = {'OBS_VALUE': 'export'})
nx = co2_export.merge(co2_import, on = ['iso_o', 'TIME_PERIOD'], how = 'left')
nx['net_imp'] = nx['import'] - nx['export'] 

In [43]:
nx.head()

Unnamed: 0,iso_o,TIME_PERIOD,export,import,net_imp
0,ARG,2000,48.636,44.666,-3.97
1,ARG,2001,50.728,38.006,-12.722
2,ARG,2002,87.26,17.889,-69.371
3,ARG,2003,81.623,23.903,-57.72
4,ARG,2004,83.397,33.429,-49.968


In [44]:
# sanity check
#nx[nx['iso_o'] == 'USA']

### CO2 edgelist for SNA

In [45]:
edgelist_co2 = co2_uni.pivot_table(index = ['iso_o', 'iso_d'], columns = 'TIME_PERIOD', values = 'OBS_VALUE', fill_value = 0).reset_index()

In [46]:
assert co2_uni.groupby(['iso_o', 'iso_d']).ngroups == edgelist_co2 .shape[0] 

### Dragging continent from CEPII country level data

In [47]:
node = pd.read_csv('../data/raw/geo_cepii.csv', encoding='latin1')

In [48]:
node.shape

(238, 34)

In [49]:
# renaming the exporter column name to match it with the dyad level fixed variable data
node = node.rename(columns = {'iso3': 'iso_o'})

In [50]:
node = node[['iso_o', 'country', 'continent']]

In [51]:
# removing duplicates
equivalent_columns = ['iso_o', 'country', 'continent']
duplicates = node.duplicated(subset = equivalent_columns, keep = 'last')
print(node[duplicates].shape)
#print(node[duplicates])

(13, 3)


In [52]:
node[duplicates]

Unnamed: 0,iso_o,country,continent
11,AUS,Australia,Pacific
23,BEN,Benin,Africa
27,BOL,Bolivia,America
29,BRA,Brazil,America
36,CAN,Canada,America
42,CIV,Côte d'Ivoire,Africa
55,DEU,Germany,Europe
117,KAZ,Kazakstan,Asia
153,NGA,Nigeria,Africa
211,TUR,Turkey,Europe


In [53]:
node_unique = node[~duplicates]
node_unique.shape

(225, 3)

In [54]:
node_unique[node_unique['iso_o'] == 'ZAF']

Unnamed: 0,iso_o,country,continent
234,ZAF,South Africa,Africa


In [55]:
node_unique['iso_o'].nunique()

225

### IMF

In [56]:
imf = pd.read_excel("../data/raw/IMF_classification.xlsx")

In [57]:
imf["development"] = 0

In [58]:
imf.loc[imf['category'] == 'Advanced Economies', 'development'] = 1

In [59]:
print(imf['category'].value_counts())
print(imf['development'].value_counts())

category
Emerging and Developing Economies    155
Advanced Economies                    41
Name: count, dtype: int64
development
0    155
1     41
Name: count, dtype: int64


In [60]:
imf

Unnamed: 0,country,category,development
0,Andorra,Advanced Economies,1
1,Australia,Advanced Economies,1
2,Austria,Advanced Economies,1
3,Belgium,Advanced Economies,1
4,Canada,Advanced Economies,1
...,...,...,...
191,Vietnam,Emerging and Developing Economies,0
192,West Bank and Gaza,Emerging and Developing Economies,0
193,Yemen,Emerging and Developing Economies,0
194,Zambia,Emerging and Developing Economies,0


In [61]:
q = set(imf['country'].unique()) - set(node_unique['country'].unique())

In [62]:
len(q)

36

In [63]:
q

{'Belgium',
 'Cabo Verde',
 'Democratic Republic of the Congo',
 'Eswatini',
 'Hong Kong SAR',
 'Kazakhstan',
 'Kosovo',
 'Kyrgyz Republic',
 'Lao P.D.R.',
 'Libya',
 'Macao SAR',
 'Micronesia',
 'Moldova',
 'Montenegro',
 'Myanmar',
 'North Macedonia',
 'Republic of Congo',
 'Russia',
 'Serbia',
 'Slovak Republic',
 'South Sudan',
 'St. Kitts and Nevis',
 'St. Lucia',
 'St. Vincent and the Grenadines',
 'Syria',
 'São Tomé and Príncipe',
 'Taiwan Province of China',
 'Tanzania',
 'The Bahamas',
 'The Gambia',
 'The Netherlands',
 'Timor-Leste',
 'Türkiye',
 'United States',
 'Vietnam',
 'West Bank and Gaza'}

In [64]:
node_unique.loc[node_unique['country'] == 'Bahamas', 'country'] = 'The Bahamas'
node_unique.loc[node_unique['country'] == 'Belgium and Luxembourg', 'country'] = 'Belgium'
node_unique.loc[node_unique['country'] ==  'Burma', 'country'] = 'Myanmar'
node_unique.loc[node_unique['country'] ==  'Cape Verde', 'country'] = 'Cabo Verde'
node_unique.loc[node_unique['country'] ==  'East Timor', 'country'] =  'Timor-Leste'
node_unique.loc[node_unique['country'] ==  'Gambia', 'country'] =  'The Gambia'
node_unique.loc[node_unique['country'] == 'Hong Kong', 'country'] = 'Hong Kong SAR'
node_unique.loc[node_unique['country'] == 'Kazakstan', 'country'] = 'Kazakhstan'
node_unique.loc[node_unique['country'] == "Korea, Dem. People's Rep. of", 'country'] = "Korea"
node_unique.loc[node_unique['country'] =='Kyrgyzstan', 'country'] = 'Kyrgyz Republic'
node_unique.loc[node_unique['country'] == "Lao People's Democratic Republic", 'country'] =  'Lao P.D.R.'
node_unique.loc[node_unique['country'] == 'Libyan Arab Jamahiriya', 'country'] = 'Libya'
node_unique.loc[node_unique['country'] == 'Macau (Aomen)', 'country'] = 'Macao SAR'
node_unique.loc[node_unique['country'] == 'Macedonia (the former Yugoslav Rep. of)', 'country'] = 'North Macedonia'
node_unique.loc[node_unique['country'] == 'Micronesia (Federated States of)', 'country'] = 'Micronesia'
node_unique.loc[node_unique['country'] == 'Moldova, Rep.of', 'country'] = 'Moldova'
node_unique.loc[node_unique['country'] == 'Netherlands', 'country'] = 'The Netherlands'
node_unique.loc[node_unique['country'] == 'Palestine', 'country'] = 'West Bank and Gaza'
node_unique.loc[node_unique['country'] == 'Russian Federation', 'country'] = 'Russia'
node_unique.loc[node_unique['country'] == 'Saint Kitts and Nevis', 'country'] = 'St. Kitts and Nevis'
node_unique.loc[node_unique['country'] == 'Saint Lucia', 'country'] = 'St. Lucia'
node_unique.loc[node_unique['country'] == 'Saint Vincent and the Grenadines', 'country'] = 'St. Vincent and the Grenadines'
node_unique.loc[node_unique['country'] == 'Sao Tome and Principe', 'country'] = 'São Tomé and Príncipe'
node_unique.loc[node_unique['country'] == 'Serbia and Montenegro', 'country'] = 'Serbia'
node_unique.loc[node_unique['country'] == 'Slovakia', 'country'] = 'Slovak Republic'
node_unique.loc[node_unique['country'] == 'Swaziland', 'country'] = 'Eswatini'
node_unique.loc[node_unique['country'] == 'Syrian Arab Republic', 'country'] =  'Syria'
node_unique.loc[node_unique['country'] ==  'Taiwan', 'country'] =   'Taiwan Province of China'
node_unique.loc[node_unique['country'] ==  'Tanzania, United Rep. of ', 'country'] =  'Tanzania'
node_unique.loc[node_unique['country'] ==  'Turkey', 'country'] =  'Türkiye'
node_unique.loc[node_unique['country'] ==  'United States of America', 'country'] =  'United States'
node_unique.loc[node_unique['country'] ==  'Viet Nam', 'country'] =  'Vietnam'
node_unique.loc[node_unique['country'] ==  'Congo', 'country'] =  'Republic of Congo'
node_unique.loc[node_unique['country'] ==  'Congo (Democratic Republic of the)', 'country'] =  'Democratic Republic of the Congo'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  node_unique.loc[node_unique['country'] == 'Bahamas', 'country'] = 'The Bahamas'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  node_unique.loc[node_unique['country'] == 'Belgium and Luxembourg', 'country'] = 'Belgium'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  node_unique.loc[node_unique['country'] ==  'Burma', 'country'] = 'Myanmar'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org

In [65]:
nodeUnique_imf = node_unique.merge(imf, on = 'country', how='left')

In [66]:
nodeUnique_imf = nodeUnique_imf[nodeUnique_imf['iso_o'].isin(countries)]

In [67]:
print(imf['country'].nunique())
print(node_unique['country'].nunique())
print(nodeUnique_imf['country'].nunique())

196
224
75


In [68]:
nodeUnique_imf['category'].value_counts()

category
Emerging and Developing Economies    38
Advanced Economies                   37
Name: count, dtype: int64

In [69]:
nodeUnique_imf['category'].isna().sum()

0

In [70]:
nodeUnique_imf[nodeUnique_imf['category'].isna()]

Unnamed: 0,iso_o,country,continent,category,development


In [71]:
print(nodeUnique_imf['country'].nunique())

75


In [72]:
print(co2_uni['iso_o'].nunique())
print(co2_uni['iso_d'].nunique())
print(edgelist_co2['iso_o'].nunique())
print(edgelist_co2['iso_d'].nunique())

77
77
77
77


In [73]:
set(co2_uni['iso_o'].unique()) - set(nodeUnique_imf['iso_o'].unique())

{'ROU', 'WXD'}

In [74]:
co2_uni[co2_uni['iso_o'] == 'WXD']

Unnamed: 0,iso_o,iso_d,TIME_PERIOD,OBS_VALUE
1575,WXD,ARG,2000,5.996
1576,WXD,ARG,2001,4.401
1577,WXD,ARG,2002,2.361
1578,WXD,ARG,2003,2.936
1579,WXD,ARG,2004,4.421
...,...,...,...,...
122866,WXD,ZAF,2016,11.112
122867,WXD,ZAF,2017,10.813
122868,WXD,ZAF,2018,13.556
122869,WXD,ZAF,2019,14.237


In [75]:
nodeUnique_imf[nodeUnique_imf['iso_o'] == 'ROU']

Unnamed: 0,iso_o,country,continent,category,development


In [76]:
Romania = ['ROU', 'Romania', 'Europe', 'Emerging and Developing Economies', 0]
nodeUnique_imf.loc[len(nodeUnique_imf)] = Romania

In [77]:
nodeUnique_imf['continent'].value_counts()

continent
Europe     34
Asia       23
America     9
Africa      8
Pacific     2
Name: count, dtype: int64

In [78]:
nodeUnique_imf['iso_o'].nunique()

76

In [79]:
nodeUnique_imf.head()

Unnamed: 0,iso_o,country,continent,category,development
9,ARG,Argentina,America,Emerging and Developing Economies,0.0
10,AUT,Austria,Europe,Advanced Economies,1.0
11,AUS,Australia,Pacific,Advanced Economies,1.0
16,BGD,Bangladesh,Asia,Emerging and Developing Economies,0.0
17,BEL,Belgium,Europe,Advanced Economies,1.0


# Time Varying Data

### Internet Penetration

In [80]:
internet = pd.read_csv('../data/raw/702e5907-a97c-4e0a-9f8f-511ea9b80ab0_Data.csv')

In [84]:
internet.isna().sum()

iso_o    0
1999     0
2000     0
2001     0
2002     0
2003     0
2004     0
2005     0
2006     0
2007     0
2008     0
2009     0
2010     0
2011     0
2012     0
2013     0
2014     0
2015     0
2016     0
2017     0
2018     0
2019     0
2020     0
dtype: int64

In [83]:
internet = internet.iloc[:265, :]
internet = internet.rename(columns = {'Country Code': 'iso_o'})
internet.columns = internet.columns.str.replace(r' \[YR\d+\]', '', regex=True)
internet.columns.values[4:] = pd.to_numeric(internet.columns[4:], errors='coerce').astype('Int64')
internet = internet.drop(columns= ['Country Name', 'Series Name', 'Series Code'])

In [85]:
internet.columns

Index(['iso_o',    1999,    2000,    2001,    2002,    2003,    2004,    2005,
          2006,    2007,    2008,    2009,    2010,    2011,    2012,    2013,
          2014,    2015,    2016,    2017,    2018,    2019,    2020],
      dtype='object')

In [86]:
year = list(range(1999, 2021))
internet[year] = internet[year].replace('..', np.nan)
internet[year] = internet[year].apply(lambda x: pd.to_numeric(x, errors='coerce')).round(2)
internet = internet[internet['iso_o'].isin(countries)]
internet = internet.reset_index(drop = True)

In [89]:
internet.isna().sum()

iso_o    0
1999     0
2000     0
2001     0
2002     0
2003     0
2004     0
2005     0
2006     0
2007     0
2008     0
2009     0
2010     0
2011     0
2012     0
2013     0
2014     0
2015     0
2016     0
2017     0
2018     0
2019     0
2020     0
dtype: int64

In [88]:
internet[year] = internet[year].fillna(internet[year].mean())

In [90]:
internet_long = internet.set_index('iso_o').stack().reset_index()

In [91]:
internet_long.head(2)

Unnamed: 0,iso_o,level_1,0
0,ARG,1999,3.28
1,ARG,2000,7.04


In [92]:
internet_long = internet_long.rename(columns= {'level_1': 'TIME_PERIOD'})
internet_long = internet_long.rename(columns= {0: 'internet'})

In [93]:
assert internet_long['iso_o'].nunique() == internet['iso_o'].nunique()

### GDP per capita

In [94]:
gdp_pct = pd.read_csv('../data/raw/ad57150c-c77e-4bee-bb87-4174bb32e6a0_Data.csv')

In [95]:
gdp_pct = gdp_pct.iloc[:265, :]
gdp_pct = gdp_pct.rename(columns = {'Country Code': 'iso_o'})
gdp_pct.columns = gdp_pct.columns.str.replace(r' \[YR\d+\]', '', regex=True)
gdp_pct.columns.values[4:] = pd.to_numeric(gdp_pct.columns[4:], errors='coerce').astype('Int64')
gdp_pct = gdp_pct.drop(columns= ['Country Name', 'Series Name', 'Series Code'])

In [96]:
gdp_pct = gdp_pct[gdp_pct['iso_o'].isin(countries)]
gdp_pct = gdp_pct.reset_index(drop = True)

In [97]:
gdp_pct[year] = gdp_pct[year].replace('..', np.nan)
gdp_pct[year] = gdp_pct[year].apply(lambda x: pd.to_numeric(x, errors='coerce')).round(2)

In [98]:
gdp_pct.isna().sum()

iso_o    0
1999     0
2000     0
2001     0
2002     0
2003     0
2004     0
2005     0
2006     0
2007     0
2008     0
2009     0
2010     0
2011     0
2012     0
2013     0
2014     0
2015     0
2016     0
2017     0
2018     0
2019     0
2020     0
dtype: int64

In [99]:
gdp_pct_long = gdp_pct.set_index('iso_o').stack().reset_index()

In [100]:
gdp_pct_long.head(3)

Unnamed: 0,iso_o,level_1,0
0,ARG,1999,10838.32
1,ARG,2000,10631.65
2,ARG,2001,10051.94


In [101]:
gdp_pct_long = gdp_pct_long.rename(columns= {'level_1': 'TIME_PERIOD'})
gdp_pct_long = gdp_pct_long.rename(columns= {0: 'gdp_pct'})

In [102]:
assert gdp_pct_long['iso_o'].nunique() == gdp_pct_long['iso_o'].nunique()

In [103]:
assert gdp_pct_long['iso_o'].nunique() == gdp_pct_long['iso_o'].nunique() == internet['iso_o'].nunique() == internet_long['iso_o'].nunique()

### Population density 

In [104]:
pop_den = pd.read_csv('../data/raw/d92eb14e-b603-4183-ac5b-acfd4c423b78_Data.csv')

In [105]:
pop_den = pop_den.iloc[:265, :]
pop_den = pop_den.rename(columns = {'Country Code': 'iso_o'})
pop_den.columns = pop_den.columns.str.replace(r' \[YR\d+\]', '', regex=True)
pop_den.columns.values[4:] = pd.to_numeric(pop_den.columns[4:], errors='coerce').astype('Int64')
pop_den = pop_den.drop(columns= ['Country Name', 'Series Name', 'Series Code'])
pop_den = pop_den[pop_den['iso_o'].isin(countries)]
pop_den = pop_den.reset_index(drop = True)
pop_den[year] = pop_den[year].replace('..', np.nan)
pop_den[year] = pop_den[year].apply(lambda x: pd.to_numeric(x, errors='coerce')).round(2)

In [106]:
pop_den.isna().sum()

iso_o    0
1999     2
2000     0
2001     0
2002     0
2003     0
2004     0
2005     0
2006     0
2007     0
2008     0
2009     0
2010     0
2011     0
2012     0
2013     0
2014     0
2015     0
2016     0
2017     0
2018     0
2019     0
2020     0
dtype: int64

In [107]:
pop_den[year] = pop_den[year].fillna(pop_den[year].mean())

In [108]:
pop_den_long = pop_den.set_index('iso_o').stack().reset_index()

In [109]:
pop_den_long.head()

Unnamed: 0,iso_o,level_1,0
0,ARG,1999,13.44
1,ARG,2000,13.6
2,ARG,2001,13.75
3,ARG,2002,13.9
4,ARG,2003,14.04


In [110]:
pop_den_long = pop_den_long.rename(columns= {'level_1': 'TIME_PERIOD'})
pop_den_long = pop_den_long.rename(columns= {0: 'pop_den'})

In [111]:
assert gdp_pct_long['iso_o'].nunique() == gdp_pct_long['iso_o'].nunique() == internet['iso_o'].nunique() == internet_long['iso_o'].nunique() == pop_den['iso_o'].nunique() == pop_den_long['iso_o'].nunique()

### CO2 emissions per capita

In [112]:
co2 = pd.read_csv('../data/raw/b1accbee-a743-40b0-b105-526ca6c03a1c_Data.csv')

In [113]:
co2 = co2.iloc[:265, :]
co2 = co2.rename(columns = {'Country Code': 'iso_o'})
co2.columns = co2.columns.str.replace(r' \[YR\d+\]', '', regex=True)
co2.columns.values[4:] = pd.to_numeric(co2.columns[4:], errors='coerce').astype('Int64')
co2 = co2.drop(columns= ['Country Name', 'Series Name', 'Series Code'])
co2 = co2[co2['iso_o'].isin(countries)]
co2 = co2.reset_index(drop = True)
co2[year] = co2[year].replace('..', np.nan)
co2[year] = co2[year].apply(lambda x: pd.to_numeric(x, errors='coerce')).round(2)

In [114]:
co2.isna().sum()

iso_o    0
1999     0
2000     0
2001     0
2002     0
2003     0
2004     0
2005     0
2006     0
2007     0
2008     0
2009     0
2010     0
2011     0
2012     0
2013     0
2014     0
2015     0
2016     0
2017     0
2018     0
2019     0
2020     0
dtype: int64

In [115]:
co2_long = co2.set_index('iso_o').stack().reset_index()

In [116]:
co2_long.head(2)

Unnamed: 0,iso_o,level_1,0
0,ARG,1999,3.8
1,ARG,2000,3.68


In [117]:
co2_long = co2_long.rename(columns= {'level_1': 'TIME_PERIOD'})
co2_long = co2_long.rename(columns= {0: 'co2'})

In [118]:
assert gdp_pct_long['iso_o'].nunique() == gdp_pct_long['iso_o'].nunique() == internet['iso_o'].nunique() == internet_long['iso_o'].nunique() == pop_den['iso_o'].nunique() == pop_den_long['iso_o'].nunique() == co2['iso_o'].nunique() == co2_long['iso_o'].nunique()

### Energy use

In [119]:
energy = pd.read_csv("../data/raw/a2c15875-4f14-40eb-bf3f-f4755d164953_Data.csv")

In [120]:
energy = energy.iloc[:265, :]
energy = energy.rename(columns = {'Country Code': 'iso_o'})
energy.columns = energy.columns.str.replace(r' \[YR\d+\]', '', regex=True)
energy.columns.values[4:] = pd.to_numeric(energy.columns[4:], errors='coerce').astype('Int64')
energy = energy.drop(columns= ['Country Name', 'Series Name', 'Series Code'])
energy = energy[energy['iso_o'].isin(countries)]
energy = energy.reset_index(drop = True)
energy[year] = energy[year].replace('..', np.nan)
energy[year] = energy[year].apply(lambda x: pd.to_numeric(x, errors='coerce')).round(2)

In [121]:
energy.isna().sum()

iso_o    0
1999     1
2000     0
2001     0
2002     0
2003     0
2004     0
2005     0
2006     0
2007     0
2008     0
2009     0
2010     0
2011     0
2012     0
2013     0
2014     0
2015     0
2016     0
2017     0
2018     0
2019     0
2020     0
dtype: int64

In [122]:
energy[year] = energy[year].fillna(energy[year].mean())

In [123]:
energy_long = energy.set_index('iso_o').stack().reset_index()

In [124]:
energy_long.head(3)

Unnamed: 0,iso_o,level_1,0
0,ARG,1999,1666.82
1,ARG,2000,1643.32
2,ARG,2001,1518.57


In [125]:
energy_long = energy_long.rename(columns= {'level_1': 'TIME_PERIOD'})
energy_long = energy_long.rename(columns= {0: 'energy'})

In [126]:
assert gdp_pct_long['iso_o'].nunique() == gdp_pct_long['iso_o'].nunique() == internet['iso_o'].nunique() == internet_long['iso_o'].nunique() == pop_den['iso_o'].nunique() == pop_den_long['iso_o'].nunique() == co2['iso_o'].nunique() == co2_long['iso_o'].nunique() == energy['iso_o'].nunique() == energy_long['iso_o'].nunique()

###  Trade Openness

In [127]:
trade_openness = pd.read_csv("../data/raw/35c7051d-c047-4d2a-b459-9b4e6ee01d98_Data.csv")

In [128]:
trade_openness = trade_openness.iloc[:265, :]
trade_openness = trade_openness.rename(columns = {'Country Code': 'iso_o'})
trade_openness.columns = trade_openness.columns.str.replace(r' \[YR\d+\]', '', regex=True)
trade_openness.columns.values[4:] = pd.to_numeric(trade_openness.columns[4:], errors='coerce').astype('Int64')
trade_openness = trade_openness.drop(columns= ['Country Name', 'Series Name', 'Series Code'])
trade_openness = trade_openness[trade_openness['iso_o'].isin(countries)]
trade_openness = trade_openness.reset_index(drop = True)
trade_openness[year] = trade_openness[year].replace('..', np.nan)
trade_openness[year] = trade_openness[year].apply(lambda x: pd.to_numeric(x, errors='coerce')).round(2)

In [129]:
trade_openness.isna().sum()

iso_o    0
1999     2
2000     2
2001     2
2002     2
2003     2
2004     2
2005     2
2006     2
2007     2
2008     2
2009     2
2010     2
2011     2
2012     2
2013     2
2014     2
2015     2
2016     2
2017     3
2018     3
2019     3
2020     3
dtype: int64

In [130]:
trade_openness[year] = trade_openness[year].fillna(trade_openness[year].mean())

In [131]:
trade_openness_long = trade_openness.set_index('iso_o').stack().reset_index()

In [132]:
trade_openness_long = trade_openness_long.rename(columns= {'level_1': 'TIME_PERIOD'})
trade_openness_long = trade_openness_long.rename(columns= {0: 'trade_openness'})

In [133]:
trade_openness_long.head(3)

Unnamed: 0,iso_o,TIME_PERIOD,trade_openness
0,ARG,1999,21.38
1,ARG,2000,22.62
2,ARG,2001,21.85


In [134]:
assert gdp_pct_long['iso_o'].nunique() == gdp_pct_long['iso_o'].nunique() == internet['iso_o'].nunique() == internet_long['iso_o'].nunique() == pop_den['iso_o'].nunique() == pop_den_long['iso_o'].nunique() == co2['iso_o'].nunique() == co2_long['iso_o'].nunique() == energy['iso_o'].nunique() == energy_long['iso_o'].nunique() == trade_openness['iso_o'].nunique() == trade_openness_long['iso_o'].nunique()

### Mean Year of Schooling 

In [135]:
schooling = pd.read_excel("../data/raw/hdr-data.xlsx")

In [136]:
schooling.tail()

Unnamed: 0,countryIsoCode,country,indexCode,index,dimension,indicatorCode,indicator,year,value,note
4179,ZWE,Zimbabwe,HDI,Human Development Index,,eys,Expected Years of Schooling (years),2016,10.899038,
4180,ZWE,Zimbabwe,HDI,Human Development Index,,eys,Expected Years of Schooling (years),2017,10.930538,
4181,ZWE,Zimbabwe,HDI,Human Development Index,,eys,Expected Years of Schooling (years),2018,10.96213,
4182,ZWE,Zimbabwe,HDI,Human Development Index,,eys,Expected Years of Schooling (years),2019,10.993813,
4183,ZWE,Zimbabwe,HDI,Human Development Index,,eys,Expected Years of Schooling (years),2020,11.025587,


In [137]:
schooling = schooling[['countryIsoCode', 'year', 'value']]

In [138]:
schooling = schooling.rename(columns = {'countryIsoCode': 'iso_o'})

In [139]:
schooling = schooling.rename(columns = {'year': 'TIME_PERIOD'})

In [140]:
schooling.dtypes

iso_o           object
TIME_PERIOD      int64
value          float64
dtype: object

In [141]:
schooling.isna().sum()

iso_o          0
TIME_PERIOD    0
value          0
dtype: int64

In [142]:
#mean_schooling_1999 = schooling[schooling['TIME_PERIOD'] == 1999].mean()
#schooling['value'] = schooling['value'].fillna(mean_schooling_1999)

In [143]:
schooling['iso_o'].nunique()

194

In [144]:
miss = countries - set(schooling['iso_o'])
print(miss)

{'TWN', 'WXD'}


In [145]:
schooling = schooling[schooling['iso_o'].isin(countries)].reset_index(drop= True)

In [146]:
schooling.shape

(1650, 3)

In [147]:
schooling['iso_o'].nunique()

75

In [148]:
schooling['TIME_PERIOD'].unique()

array([1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
      dtype=int64)

In [149]:
schooling.head(3)

Unnamed: 0,iso_o,TIME_PERIOD,value
0,ARG,1999,15.17326
1,ARG,2000,15.68757
2,ARG,2001,16.30578


In [150]:
#print(type(next(iter(countries))))
#print(schooling['iso_o'].apply(repr).head())

In [151]:
schooling = schooling.rename(columns = {'value': 'schooling'})

In [152]:
assert internet['iso_o'].nunique() == gdp_pct['iso_o'].nunique() == schooling['iso_o'].nunique()

In [153]:
assert set(internet['iso_o'].unique()) == set(gdp_pct['iso_o'].unique()) == set(schooling['iso_o'].unique())

#### Merging internet penetration, GDP per capita and Mean year of schooling

In [154]:
set(nx['iso_o'].unique()) - set(energy['iso_o'].unique())

{'TWN', 'WXD'}

In [155]:
nx = nx[(nx['iso_o'] != 'TWN') & (nx['iso_o'] != 'WXD')]

In [156]:
nx['iso_o'].nunique()

75

In [157]:
merged = nx.merge(gdp_pct_long, on = ['iso_o','TIME_PERIOD'], how = 'left')\
           .merge(trade_openness_long, on = ['iso_o','TIME_PERIOD'], how = 'left')\
           .merge(schooling, on = ['iso_o','TIME_PERIOD'], how = 'left')\
           .merge(internet_long, on = ['iso_o','TIME_PERIOD'], how = 'left')\
           .merge(energy_long, on = ['iso_o','TIME_PERIOD'], how = 'left')\
           .merge(pop_den_long, on = ['iso_o','TIME_PERIOD'], how = 'left')\
           .merge(co2_long, on = ['iso_o','TIME_PERIOD'], how = 'left')

In [158]:
merged['iso_o'].nunique()

75

In [159]:
merged.isna().sum()

iso_o             0
TIME_PERIOD       0
export            0
import            0
net_imp           0
gdp_pct           0
trade_openness    0
schooling         0
internet          0
energy            0
pop_den           0
co2               0
dtype: int64

In [160]:
nodeUnique_imf

Unnamed: 0,iso_o,country,continent,category,development
9,ARG,Argentina,America,Emerging and Developing Economies,0.0
10,AUT,Austria,Europe,Advanced Economies,1.0
11,AUS,Australia,Pacific,Advanced Economies,1.0
16,BGD,Bangladesh,Asia,Emerging and Developing Economies,0.0
17,BEL,Belgium,Europe,Advanced Economies,1.0
...,...,...,...,...,...
207,UKR,Ukraine,Europe,Emerging and Developing Economies,0.0
209,USA,United States,America,Advanced Economies,1.0
215,VNM,Vietnam,Asia,Emerging and Developing Economies,0.0
221,ZAF,South Africa,Africa,Emerging and Developing Economies,0.0


In [161]:
merged_new = merged.merge(nodeUnique_imf, on = 'iso_o', how = 'left')

In [162]:
merged_new.head(4)

Unnamed: 0,iso_o,TIME_PERIOD,export,import,net_imp,gdp_pct,trade_openness,schooling,internet,energy,pop_den,co2,country,continent,category,development
0,ARG,2000,48.636,44.666,-3.97,10631.65,22.62,15.68757,7.04,1643.32,13.6,3.68,Argentina,America,Emerging and Developing Economies,0.0
1,ARG,2001,50.728,38.006,-12.722,10051.94,21.85,16.30578,9.78,1518.57,13.75,3.49,Argentina,America,Emerging and Developing Economies,0.0
2,ARG,2002,87.26,17.889,-69.371,8861.56,41.75,16.392509,10.9,1484.26,13.9,3.26,Argentina,America,Emerging and Developing Economies,0.0
3,ARG,2003,81.623,23.903,-57.72,9545.53,40.64,16.26639,11.9,1590.62,14.04,3.52,Argentina,America,Emerging and Developing Economies,0.0


### LTE

In [163]:
lte = pd.read_excel("../data/raw/LTE.xlsx")

In [164]:
lte.head(2)

Unnamed: 0,country,2000,2001,2002,2003,2004,2005,2006,2007,2008,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Andorra,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,Australia,0,0,0,0,0,0,0,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [165]:
lte = lte.fillna(0)

In [166]:
lte.isna().sum()

country    0
2000       0
2001       0
2002       0
2003       0
2004       0
2005       0
2006       0
2007       0
2008       0
2009       0
2010       0
2011       0
2012       0
2013       0
2014       0
2015       0
2016       0
2017       0
2018       0
2019       0
2020       0
dtype: int64

In [167]:
lte_long = lte.set_index('country').stack().reset_index()

In [168]:
lte_long = lte_long.rename(columns= {'level_1': 'TIME_PERIOD'})
lte_long = lte_long.rename(columns= {0: 'LTE_year'})

In [169]:
m = list(set(merged_new['country'].unique()))

In [170]:
len(m)

75

In [171]:
lte_long = lte_long[lte_long['country'].isin(m)]

In [172]:
lte_long['country'].nunique()

75

In [173]:
# finding the minimum year for each country where LTE_year is 1
treatment_year = lte_long[lte_long['LTE_year'] == 1].groupby('country')['TIME_PERIOD'].min()

In [174]:
# mapping countries to their respective treatment year using a dictionary
treatment_dict = treatment_year.to_dict()

In [175]:
lte_long['adoption_year'] = lte_long['country'].map(treatment_dict)

In [176]:
# mapping the treatment years back to the dataframe
#lte_long['adoption_year'] = lte_long.apply(lambda row: treatment_dict.get(row['country']) if row['LTE_year'] == 1.0 else row['LTE_year'], axis=1)

In [177]:
# if any of the LTE_year column value is 1, the ever_adopted vector will have True for that entry
ever_adopted = lte_long.groupby('country')['LTE_year'].transform(lambda x: (x == 1.0).any())

In [178]:
# It changes the adopted column value to 1 if ever_adopted vector has True entry
lte_long['adopted'] = ever_adopted.astype(int)

In [179]:
lte_long = lte_long.reset_index(drop = True)

In [180]:
# sanity check 
#lte_long[lte_long['country'] == 'Bangladesh']

In [181]:
lte_long.head(2)

Unnamed: 0,country,TIME_PERIOD,LTE_year,adoption_year,adopted
0,Australia,2000,0.0,2011.0,1
1,Australia,2001,0.0,2011.0,1


In [182]:
merged_new.head(2)

Unnamed: 0,iso_o,TIME_PERIOD,export,import,net_imp,gdp_pct,trade_openness,schooling,internet,energy,pop_den,co2,country,continent,category,development
0,ARG,2000,48.636,44.666,-3.97,10631.65,22.62,15.68757,7.04,1643.32,13.6,3.68,Argentina,America,Emerging and Developing Economies,0.0
1,ARG,2001,50.728,38.006,-12.722,10051.94,21.85,16.30578,9.78,1518.57,13.75,3.49,Argentina,America,Emerging and Developing Economies,0.0


In [183]:
column = ['iso_o', 'iso_d', 'TIME_PERIOD']
dupli = co2_T.duplicated(subset = column, keep = 'last')

In [184]:
lte = merged_new.merge(lte_long, on = ['country', 'TIME_PERIOD'] , how = 'left')

In [185]:
lte.shape

(1575, 19)

In [186]:
lte.head(3)

Unnamed: 0,iso_o,TIME_PERIOD,export,import,net_imp,gdp_pct,trade_openness,schooling,internet,energy,pop_den,co2,country,continent,category,development,LTE_year,adoption_year,adopted
0,ARG,2000,48.636,44.666,-3.97,10631.65,22.62,15.68757,7.04,1643.32,13.6,3.68,Argentina,America,Emerging and Developing Economies,0.0,0.0,2014.0,1
1,ARG,2001,50.728,38.006,-12.722,10051.94,21.85,16.30578,9.78,1518.57,13.75,3.49,Argentina,America,Emerging and Developing Economies,0.0,0.0,2014.0,1
2,ARG,2002,87.26,17.889,-69.371,8861.56,41.75,16.392509,10.9,1484.26,13.9,3.26,Argentina,America,Emerging and Developing Economies,0.0,0.0,2014.0,1


In [187]:
set(edgelist_co2['iso_o'].unique()) - set(lte['iso_o'].unique())

{'TWN', 'WXD'}

In [188]:
c = list(set(lte['iso_o'].unique()))

In [189]:
len(c)

75

In [190]:
edgelist_co2 = edgelist_co2[edgelist_co2['iso_o'].isin(c)]
edgelist_co2 = edgelist_co2[edgelist_co2['iso_d'].isin(c)]

In [191]:
print(edgelist_co2['iso_o'].nunique())
print(edgelist_co2['iso_o'].nunique())

75
75


In [192]:
assert set(edgelist_co2['iso_o'].unique()) == set(lte['iso_o'].unique()) == set(edgelist_co2['iso_d'].unique())

In [193]:
lte.to_csv("../data/cleaned/pollution_trade.csv", encoding = 'utf-8', index = False)

In [194]:
edgelist_co2.to_csv("../data/cleaned/emission_trade_edgelist.csv", encoding='utf-8', index=False)

In [195]:
lte['LTE_year'].value_counts()

LTE_year
0.0    947
1.0    628
Name: count, dtype: int64

In [196]:
a2000 = lte[lte['TIME_PERIOD'] == 2000]
a2001 = lte[lte['TIME_PERIOD'] == 2001]
a2002 = lte[lte['TIME_PERIOD'] == 2002]
a2003 = lte[lte['TIME_PERIOD'] == 2003]
a2004 = lte[lte['TIME_PERIOD'] == 2004]
a2005 = lte[lte['TIME_PERIOD'] == 2005]
a2006 = lte[lte['TIME_PERIOD'] == 2006]
a2007 = lte[lte['TIME_PERIOD'] == 2007]
a2008 = lte[lte['TIME_PERIOD'] == 2008]
a2009 = lte[lte['TIME_PERIOD'] == 2009]
a2010 = lte[lte['TIME_PERIOD'] == 2010]
a2011 = lte[lte['TIME_PERIOD'] == 2011]
a2012 = lte[lte['TIME_PERIOD'] == 2012]
a2013 = lte[lte['TIME_PERIOD'] == 2013]
a2014 = lte[lte['TIME_PERIOD'] == 2014]
a2015 = lte[lte['TIME_PERIOD'] == 2015]
a2016 = lte[lte['TIME_PERIOD'] == 2016]
a2017 = lte[lte['TIME_PERIOD'] == 2017]
a2018 = lte[lte['TIME_PERIOD'] == 2018]
a2019 = lte[lte['TIME_PERIOD'] == 2019]
a2020 = lte[lte['TIME_PERIOD'] == 2020]

In [197]:
a2000.to_csv('../data/cleaned/attribute2000.csv', encoding='utf-8', index=False)
a2001.to_csv('../data/cleaned/attribute2001.csv', encoding='utf-8', index=False)
a2002.to_csv('../data/cleaned/attribute2002.csv', encoding='utf-8', index=False)
a2003.to_csv('../data/cleaned/attribute2003.csv', encoding='utf-8', index=False)
a2004.to_csv('../data/cleaned/attribute2004.csv', encoding='utf-8', index=False)
a2005.to_csv('../data/cleaned/attribute2005.csv', encoding='utf-8', index=False)
a2006.to_csv('../data/cleaned/attribute2006.csv', encoding='utf-8', index=False)
a2007.to_csv('../data/cleaned/attribute2007.csv', encoding='utf-8', index=False)
a2008.to_csv('../data/cleaned/attribute2008.csv', encoding='utf-8', index=False)
a2009.to_csv('../data/cleaned/attribute2009.csv', encoding='utf-8', index=False)
a2010.to_csv('../data/cleaned/attribute2010.csv', encoding='utf-8', index=False)
a2011.to_csv('../data/cleaned/attribute2011.csv', encoding='utf-8', index=False)
a2012.to_csv('../data/cleaned/attribute2012.csv', encoding='utf-8', index=False)
a2013.to_csv('../data/cleaned/attribute2013.csv', encoding='utf-8', index=False)
a2014.to_csv('../data/cleaned/attribute2014.csv', encoding='utf-8', index=False)
a2015.to_csv('../data/cleaned/attribute2015.csv', encoding='utf-8', index=False)
a2016.to_csv('../data/cleaned/attribute2016.csv', encoding='utf-8', index=False)
a2017.to_csv('../data/cleaned/attribute2017.csv', encoding='utf-8', index=False)
a2018.to_csv('../data/cleaned/attribute2018.csv', encoding='utf-8', index=False)
a2019.to_csv('../data/cleaned/attribute2019.csv', encoding='utf-8', index=False)
a2020.to_csv('../data/cleaned/attribute2020.csv', encoding='utf-8', index=False)