## 1. Import Modules

In [1]:
import numpy as np
import pandas as pd # import library for data analysis

from get_regression_coefs_general import get_regression_coefs # import our function to get GDP trend

# For plotting
import matplotlib.pyplot as plt
import seaborn as sns # for nicer plots
sns.set_theme('talk', style = 'white')


## 2. Data Collection & Cleaning

### 2.1 Load Data from Excel Table

In [2]:
data = pd.read_excel('Data/pwt110_download_2025_11_24.xlsx', sheet_name = 'Data', header = 0)

### 2.2 Clean Data

In [3]:
# Filt data by columns. We need country, year, real gdp, population, real capital, human capital and employment)
data_filtered = data.loc[:, ("country", "year", "rgdpe", "pop","rnna", "hc", "emp")]
# Calculate real dgp per person
data_filtered["rgdpe_pc"] = data_filtered["rgdpe"] / data["pop"]

# select Chile as a country
data_Chile = data_filtered.loc[data["country"] == "Chile"]
# Generate table for year and rgdp_pc of Chile for trend analysis
data_Chile_rgdpe_pc = data_Chile.loc[:, ("year", "rgdpe_pc")]
# Reset the index
data_Chile = data_Chile.reset_index(drop = True)
data_Chile_rgdpe_pc = data_Chile_rgdpe_pc.reset_index(drop = True)

In [4]:
# Subset the RGDP per capita series
ymax = 2006
ymin = 1951 # Start at 1951 because data for Chile in 1950 is unavailable.
# We use logical indexing to subset the data to rows of years ymin <= year <= ymax
Y = data_Chile_rgdpe_pc.loc[np.logical_and(data["year"] <= ymax, data["year"] >= ymin), "rgdpe_pc"]
y = np.log(Y)
data_Chile_rgdpe_pc = data_Chile_rgdpe_pc[data_Chile_rgdpe_pc["year"] >= ymin] # we keep only years after ymin

# Compute separate sample sizes for the subsample used for estimation and the whole sample.
T = len(Y) # sample size used for regression
T_all = data["year"].max() - (ymin - 1) # number of all years in the data after ymin