<a href="https://colab.research.google.com/github/RyomaIchiba/Project1-2/blob/main/growth_comp_2023_ipynb_%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import pandas as pd
import numpy as np


# Load data
pwt1001 = pd.read_stata('https://dataverse.nl/api/access/datafile/354098')

# Filter and select relevant columns
data = pwt1001.loc[pwt1001['country'].isin(["France","Germany", "Canada", "Italy", "Japan", "United Kingdom", "United States"])][['year', 'countrycode', 'rgdpna', 'rkna', 'pop', 'emp', 'avh', 'labsh', 'rtfpna']]
data = data.loc[(data['year'] >= 1995) & (data['year'] <= 2019)].dropna()

# Calculate additional columns
data['y_pc'] = np.log(data['rgdpna'] / data['emp'])  # GDP per worker
data['k_pc'] = np.log(data['rkna'] / data['emp'])  # Capital per worker
data['a'] = 1 - data['labsh']  # Capital share
data['t'] = data['labsh'] # TFP share
data['y_n'] = data['rgdpna'] / data['emp']  # Y/N
data['hours'] = data['emp'] * data['avh']  # L
data['tfp_term'] = data['rtfpna'] ** (1 / (1 - data['a']))  # A^(1/(1-a))
data['cap_term'] = (data['rkna'] / data['rgdpna']) ** (data['a'] / (1 - data['a']))  # (K/Y)^(a/(1-a))
data['lab_term'] = data['hours'] / data['pop']  # L/N
data = data.sort_values('year').groupby('countrycode').apply(lambda x: x.assign(
    alpha=1 - x['labsh'],
    y_n_shifted=100 * x['y_n'] / x['y_n'].iloc[0],
    tfp_term_shifted=100 * x['tfp_term'] / x['tfp_term'].iloc[0],
    cap_term_shifted=100 * x['cap_term'] / x['cap_term'].iloc[0],
    lab_term_shifted=100 * x['lab_term'] / x['lab_term'].iloc[0]
)).reset_index(drop=True).dropna()

# Order by year
data = data.sort_values('year')

# Group by isocode
grouped_data = data.groupby('countrycode')

# Calculate growth rates and Solow residual
data['g'] = (grouped_data['y_pc'].diff() * 100)  # Growth rate of GDP per capita

# Remove missing values
data = data.dropna()

# Calculate summary statistics
summary = data.groupby('countrycode').agg({'g': 'mean',
                                           'tfp_term_shifted': "mean",
                                           'cap_term_shifted': "mean",
                                           't':"mean",
                                       'a': "mean"})

# Calculate additional summary statistics
summary['Growth Rate'] = summary['g']
summary['TFP Growth'] = summary['tfp_term_shifted']
summary['Capital Deeping'] = summary['cap_term_shifted']
summary['TFP Share'] = summary['t']
summary['Capital Share'] = summary['a']



# Print output
print(summary)

                    g  tfp_term_shifted  cap_term_shifted         t         a  \
countrycode                                                                     
CAN          0.887700        103.191681         45.881821  0.653810  0.346190   
DEU          0.661055        107.241402         38.176662  0.629270  0.370730   
FRA          0.829236        105.963249        101.082375  0.618806  0.381194   
GBR          1.015544        111.556435        893.372253  0.587007  0.412993   
ITA         -0.037181         83.797211         76.108536  0.515165  0.484835   
JPN          0.710956        100.996216         35.388798  0.569399  0.430601   
USA          1.536985        118.028290        128.616684  0.606740  0.393260   

             Growth Rate  TFP Growth  Capital Deeping  TFP Share  \
countrycode                                                        
CAN             0.887700  103.191681        45.881821   0.653810   
DEU             0.661055  107.241402        38.176662   0.629270  

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  data = data.sort_values('year').groupby('countrycode').apply(lambda x: x.assign(
