In [1]:
!pip install rise



In [2]:
from traitlets.config.manager import BaseJSONConfigManager
from pathlib import Path
path = Path.home() / ".jupyter" / "nbconfig"
cm = BaseJSONConfigManager(config_dir=str(path))
cm.update(
    "rise",
    {
        "theme": "sky",
        "transition": "zoom",
        "start_slideshow_at": "selected",
         "rise": {"width": "90%", "height": "90%"}
     }
)

{'theme': 'sky',
 'transition': 'zoom',
 'start_slideshow_at': 'selected',
 'rise': {'width': '90%', 'height': '90%'}}

# The Tata Econometrics Group

## The Gravity Model of Trade

## Motivation

- Our Indian Link & Economic History 

- Research Question: "What are the key insights we can obtain when comparing colonised and not colonised countries on their bilateral trade today?" 
    
    - Hypothesis: 	* *The duration of a colony determines the volume of bilateral trade such that a longer a nation       colonised, the less the bilateral trade. 
    
- (Working) Papers:
    - Mitchener & Weidenmier (2008)
        - Trade Flow factors, from railroads 
    - Nguyen & Wu (2019)
        - Preferential Trade Agreements 

- Our Regression equation:

### ln(Exportsij) = β0 + β1lnYi+ β2lnYj + β3lnDij + β4ELTi + β5ESTi + β4BLTi+ β5BSTi + β6ln(TOij) + β7comcol  + εij

## Approach

- Split into two pairs by timezones

- Our "Tools":
    - Linjun as our Guru
        - Whiteboard 
        - Theory
        - Technical (GroupBy Functions)
    
    - Microsoft Teams/Zoom
   
    - Google Colab
    
    - Github for the Final steps
    


In [5]:
#Tushar Mehra

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

gravity = pd.read_excel('https://www.dropbox.com/s/2uha8rwc8bngcsz/servicesdataset%202.xlsx?dl=1')

gravity.head()

gravity.shape

gravity.describe()

gravity['trade_restrictiveness'] = gravity['ent_cost_imp'] + gravity['ent_cost_exp']
#Adding cost oftrade to importer and cost of trade to exporter together to measure trade restrictiveness (0 means none, more than 2 means virtually impossible to trade) 

gravity = gravity.drop(['contig','comlang_off','comlang_ethno','colony','distcap','etcr_exp','etcr_imp','lat_exp','lat_imp'], axis=1)

colony= pd.read_excel('https://www.dropbox.com/s/216s24p3f8aj8ni/Copy%20of%20Colonial_transformation_data%20%281%29.xls?dl=1')

colony.head()

colony.describe()

colony.shape

gravity.columns

colony.columns

def clean_col(col):
  col = col.strip()
  col = col.replace('Country Name', 'Country Name - Importer')
  col = col.replace('Main colonial "motherland", source: Ziltener/KŸnzler', 'Colonial Motherland - Importer')
  col = col.replace('onset of colonialism, source: Ziltener/KŸnzler 2008', 'Year of Colonisation - Importer')
  col = col.replace('end of colonialism, source: Ziltener/KŸnzler 2008', 'Year of Independence - Importer')
  col = col.replace('COLYEARS', 'Years under Colonisation - Importer')
  return col

new_columns = []
for c in colony.columns:
    clean_c = clean_col(c)
    new_columns.append(clean_c)

colony.columns = new_columns

colony_series_dropper = colony.columns.to_series()['Violent Colonization, Wars of Defence etc; Source: own coding; cf. COW Dataset (':'colonial borders split ethnic groups, cut traditional space of close interaction']
colony = colony.drop(colony_series_dropper, axis=1)
#Dropping extra data points that are not going to be needed for our regression; had to convert columns to a series to be able to drop a range of columns 


combined_imp = gravity.merge(colony, left_on = 'imp', right_on = 'Country Code World Bank', how = 'left')
combined_imp.tail()

def clean_col_2(col):
  col = col.strip()
  col = col.replace('Country Name - Importer', 'Country Name - Exporter')
  col = col.replace('Colonial Motherland - Importer', 'Colonial Motherland - Exporter')
  col = col.replace('Year of Colonisation - Importer', 'Year of Colonisation - Exporter' )
  col = col.replace('Year of Independence - Importer', 'Year of Independence - Exporter')
  col = col.replace('Years under Colonisation - Importer', 'Years under Colonisation - Exporter')
  return col

new_columns = []
for c in colony.columns:
    clean_c = clean_col_2(c)
    new_columns.append(clean_c)

colony.columns = new_columns

# note here that to merge two things, you need to provide a reference column so the computer knows what you're merging based on, i.e. to associate A in dataframe 1 with A in dataframe 2.
# hence ask yourself if you're merging based on exp or imp in the gravity dataframe, and which associatetd column in the colonisation dataframe.

# after that, read the .merge reference here https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.merge.html or I can provide more hints, if you would like.


combined = combined_imp.merge(colony, left_on = 'exp', right_on = 'Country Code World Bank', how = 'left')
combined.tail()

combined.head()

imp_short_term_colony_bool = combined.loc[:, 'Years under Colonisation - Importer'] <= 100
imp_long_term_colony_bool = combined['Years under Colonisation - Importer'] > 100
exp_short_term_colony_bool = combined['Years under Colonisation - Exporter'] <= 100
exp_long_term_colony_bool = combined['Years under Colonisation - Exporter'] > 100
#Time period of 100 years determined by seeing the mean and median of the data from colony.describe() above

combined.loc[imp_short_term_colony_bool,:]

combined.loc[imp_long_term_colony_bool, :]

combined.shape

GBR_test_bool = (combined['imp'] == 'GBR')
#Have this boolean for testing purposes to see what happens to non-colonised countries in our data set


combined.loc[GBR_test_bool, :]
#This is the test to see what happens when you examine the importing data for GBR, a country that was never colonised


combined['imp_short_term_colony'] = 0
combined['imp_long_term_colony'] = 0
combined['exp_short_term_colony'] = 0
combined['exp_long_term_colony'] = 0
#Do all the coding for short-term and long-term colonies in the same cells so that they are easier to keep track of

combined.loc[imp_short_term_colony_bool,'imp_short_term_colony'] = 1
combined.loc[imp_long_term_colony_bool,'imp_long_term_colony'] = 1
combined.loc[exp_short_term_colony_bool,'exp_short_term_colony'] = 1
combined.loc[exp_long_term_colony_bool,'exp_long_term_colony'] = 1

colony_duration_test_bool = (combined['imp'] == 'IND')
combined.loc[colony_duration_test_bool, :]
#This boolean is to test whether the dummy of short-term and long-term colony has worked or not; can change the three-letter imp code and see the short-term colony column

combined['Colonial Motherland - Importer'].unique()

combined['Colonial Motherland - Exporter'].unique()

combined.shape

def combine_partner_name(x):
  partner_1 = x['imp']
  partner_2 = x['exp']

  partner_list = [partner_1, partner_2]
  partner_list.sort()

  x['partner-list'] = partner_list[0] + "-" + partner_list[1]
  return x

combined = combined.apply(combine_partner_name, axis = 1)

combined.tail()

combined.shape

trade_data = combined.groupby(['partner-list'])['trade'].sum().reset_index()
trade_data.head()
#If you don't use the reset index method, then you would just have a series where the partner-list is the index. The reset index takes the partner list out and makes it a separate column

combined_specific = combined[['partner-list', 'comcol', 'curcol', 'col45', 'dist','distw', 'distwces', 'gdp_exp', 'gdp_imp', 'ent_cost_imp', 'ent_cost_exp','trade_restrictiveness', 'imp_short_term_colony',	'imp_long_term_colony','exp_short_term_colony','exp_long_term_colony']].drop_duplicates(subset = ['partner-list'])
combined_specific.rename(columns={'gdp_exp': 'gdp_1', 'gdp_imp':'gdp_2','imp_short_term_colony':'short_term_colony_1', 'exp_short_term_colony': 'short_term_colony_2', 'imp_long_term_colony':'long_term_colony_1', 'exp_long_term_colony': 'long_term_colony_2' })
#Dropping duplicates for colony data in the subset of partner list doesn't matter since the only case where there would be a duplicate to be dropped is if neither are colonies, both are short-term or both are long-term

combined_trade_data = pd.merge(trade_data, combined_specific, on ='partner-list', how = 'inner')
combined_trade_data[combined_trade_data['partner-list'] == 'GBR-IND']

combined_trade_data = combined_trade_data.rename(columns={'gdp_exp': 'gdp_1', 'gdp_imp':'gdp_2','imp_short_term_colony':'short_term_colony_1', 'exp_short_term_colony': 'short_term_colony_2', 'imp_long_term_colony':'long_term_colony_1', 'exp_long_term_colony': 'long_term_colony_2', 'ent_cost_imp':'ent_cost_1','ent_cost_exp':'ent_cost_2' })
combined_trade_data.head()

combined_trade_data.shape

combined_trade_data['either_short_term_colony'] = combined_trade_data['short_term_colony_1'] + combined_trade_data['short_term_colony_2']
combined_trade_data['either_long_term_colony'] = combined_trade_data['long_term_colony_1'] + combined_trade_data['long_term_colony_2']
combined_trade_data['either_short_term_colony'].value_counts()


combined_trade_data['either_long_term_colony'].value_counts()

combined_trade_data.loc[combined_trade_data['either_short_term_colony'] == 2, 'both_short_term_colony'] = 1
combined_trade_data.loc[combined_trade_data['either_short_term_colony'] == 2]

combined_trade_data.loc[combined_trade_data['either_long_term_colony'] == 2, 'both_long_term_colony'] = 1
combined_trade_data.loc[combined_trade_data['either_long_term_colony'] == 2]

combined_trade_data.shape

combined_trade_data['either_short_term_colony'] = combined_trade_data['either_short_term_colony'].replace(2,0)
combined_trade_data['either_long_term_colony'] = combined_trade_data['either_long_term_colony'].replace(2,0)

combined_trade_data['either_short_term_colony'].value_counts()

combined_trade_data['either_long_term_colony'].value_counts()

combined_trade_data['both_short_term_colony'] = combined_trade_data['both_short_term_colony'].replace(np.nan,0)
combined_trade_data['both_long_term_colony'] = combined_trade_data['both_long_term_colony'].replace(np.nan,0)

combined_trade_data['both_long_term_colony'].value_counts()

combined_trade_data['both_short_term_colony'].value_counts()

combined_trade_data.shape

combined_trade_data.tail()

combined_trade_data['ln_GDP_1'] = np.log(combined_trade_data['gdp_1'])
combined_trade_data['ln_GDP_2'] = np.log(combined_trade_data['gdp_2'])
combined_trade_data['ln_distance'] = np.log(combined_trade_data['dist'])
combined_trade_data = combined_trade_data[combined_trade_data['trade'] != 0]
combined_trade_data['ln_trade'] = np.log(combined_trade_data['trade'])
combined_trade_data['ln_trade_restrictiveness'] = np.log(combined_trade_data['trade_restrictiveness'])

combined_trade_data.to_csv('Clean_Data.csv', index = False)

X = combined_trade_data[['ln_GDP_1', 'ln_GDP_2', 'ln_distance', 'ln_trade_restrictiveness','comcol', 'either_short_term_colony', 'either_long_term_colony', 'both_short_term_colony','both_long_term_colony']]
Y = combined_trade_data['ln_trade']
X = sm.add_constant(X)

regression_1 = sm.OLS(endog=Y, exog=X, missing ='drop')
regression_1_results = regression_1.fit(cov_type = 'HC1')
regression_1_results.summary()



0,1,2,3
Dep. Variable:,ln_trade,R-squared:,0.644
Model:,OLS,Adj. R-squared:,0.643
Method:,Least Squares,F-statistic:,480.5
Date:,"Sat, 27 Feb 2021",Prob (F-statistic):,0.0
Time:,11:03:19,Log-Likelihood:,-3537.2
No. Observations:,1949,AIC:,7094.0
Df Residuals:,1939,BIC:,7150.0
Df Model:,9,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-27.3346,0.775,-35.277,0.000,-28.853,-25.816
ln_GDP_1,0.7440,0.017,43.710,0.000,0.711,0.777
ln_GDP_2,0.7704,0.020,39.409,0.000,0.732,0.809
ln_distance,-0.8759,0.036,-24.223,0.000,-0.947,-0.805
ln_trade_restrictiveness,-0.0883,0.035,-2.518,0.012,-0.157,-0.020
comcol,0.6626,0.323,2.053,0.040,0.030,1.295
either_short_term_colony,-0.0645,0.086,-0.753,0.452,-0.233,0.104
either_long_term_colony,0.1044,0.098,1.063,0.288,-0.088,0.297
both_short_term_colony,0.7735,0.321,2.411,0.016,0.145,1.402

0,1,2,3
Omnibus:,37.084,Durbin-Watson:,1.585
Prob(Omnibus):,0.0,Jarque-Bera (JB):,44.959
Skew:,-0.263,Prob(JB):,1.73e-10
Kurtosis:,3.526,Cond. No.,911.0


## Findings
- Being a long-term colony is insignificant to biilateral trade
- Short-term colonies seemingly ally together
- Trade restrictiveness is less of a trade cost than distance
- The traditional gravity model takes precedence over our assumptions/hypothesis

In [None]:
from IPython.display import Image
Image(filename = 'GDP-Trade.png', width = 400, height = 400)
Image(filename = 'Distance-Trade.png', width = 400, height = 400)


In [None]:
#Visualisations

# Obstacles
- how to specify regression equation?
- workflow management
- understanding why some data is missing and finding good data

# Lessons
- Drawing things out and having a structured approach before starting to code
- Good programming etiquette, eg writing functions
- Regularly check the data and make sure it is what you expect