In [78]:
import pandas as pd, numpy as np
import os

# Read in data

co2_df = pd.read_csv('data/owid-co2-data.csv')

print(co2_df.columns)

Index(['country', 'year', 'iso_code', 'population', 'gdp', 'cement_co2',
       'cement_co2_per_capita', 'co2', 'co2_growth_abs', 'co2_growth_prct',
       'co2_including_luc', 'co2_including_luc_growth_abs',
       'co2_including_luc_growth_prct', 'co2_including_luc_per_capita',
       'co2_including_luc_per_gdp', 'co2_including_luc_per_unit_energy',
       'co2_per_capita', 'co2_per_gdp', 'co2_per_unit_energy', 'coal_co2',
       'coal_co2_per_capita', 'consumption_co2', 'consumption_co2_per_capita',
       'consumption_co2_per_gdp', 'cumulative_cement_co2', 'cumulative_co2',
       'cumulative_co2_including_luc', 'cumulative_coal_co2',
       'cumulative_flaring_co2', 'cumulative_gas_co2', 'cumulative_luc_co2',
       'cumulative_oil_co2', 'cumulative_other_co2', 'energy_per_capita',
       'energy_per_gdp', 'flaring_co2', 'flaring_co2_per_capita', 'gas_co2',
       'gas_co2_per_capita', 'ghg_excluding_lucf_per_capita', 'ghg_per_capita',
       'land_use_change_co2', 'land_use_chang

In [73]:
# List of treatment groups for Europe (implemented in 2005)
europe_li = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden']

# List of control groups for Europe
europe_controls_li = ['Albania', 'Bosnia and Herzegovina', 'Montenegro', 'North Macedonia', 'Serbia']

# Korea implemented ETS in 2015
korea_li = ['South Korea']

# List of control groups for China and South Korea
asia_controls_li = ['Cambodia', 'Indonesia', 'Laos', 'Malaysia', 'Mongolia', 'Myanmar', 'Philippines', 'Singapore', 'Thailand', 'Timor-Leste', 'Vietnam']

# Kazakhstan implemented ETS in 2013

kazakhstan_li = ['Kazakhstan']

# Kazakhstan control group in Central Asia
central_asia_controls_li = ['Kyrgyzstan', 'Tajikistan', 'Turkmenistan', 'Uzbekistan'] 

# Combining all lists for the total countries list

countries_li = europe_li + europe_controls_li + korea_li + asia_controls_li + kazakhstan_li + central_asia_controls_li

# Creating a new dataframe with only the countries in the countries list
co2_df = co2_df[co2_df['country'].isin(countries_li)]

# Now we filter for all the years past 1985

co2_df = co2_df[co2_df['year'] >= 1985]
co2_df = co2_df[co2_df['year'] < 2019]
co2_df = co2_df[['country', 'year', 'co2', 'co2_growth_prct', 'co2_per_capita', 'gdp', 'population']]

co2_df['gdp_per_capita'] = co2_df['gdp'] / co2_df['population']

In [76]:
# Filtering to important variables only

final_df = co2_df[['country', 'year', 'co2_per_capita', 'gdp_per_capita', 'population', 'gdp', 'co2']]

# Next task: create an indicator variable for whether a country is in the treatment group or not. We should probably separate the regions into different dataframes.

all_europe_region = europe_li + europe_controls_li
all_korea_region = korea_li + asia_controls_li
all_kazakhstan_region = kazakhstan_li + central_asia_controls_li


europe_df = final_df[final_df['country'].isin(all_europe_region)]
korea_df = final_df[final_df['country'].isin(all_korea_region)]
kazakhstan_df = final_df[final_df['country'].isin(all_kazakhstan_region)]

# Creating the indicator variable for each region

# EU: treatment starts in 2005 for only EU countries (ocuntries in euroep_li)
europe_df['treated'] = ((europe_df['country'].isin(europe_li)) & (europe_df['year'] >= 2005)).astype(int)
korea_df['treated'] = ((korea_df['country'].isin(korea_li)) & (korea_df['year'] >= 2015)).astype(int)
kazakhstan_df['treated'] = ((kazakhstan_df['country'].isin(kazakhstan_li)) & (kazakhstan_df['year'] >= 2013)).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  europe_df['treated'] = ((europe_df['country'].isin(europe_li)) & (europe_df['year'] >= 2005)).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  korea_df['treated'] = ((korea_df['country'].isin(korea_li)) & (korea_df['year'] >= 2015)).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [77]:
# Export file
europe_df.to_csv('output/europe_df.csv', index=False)
korea_df.to_csv('output/korea_df.csv', index=False)
kazakhstan_df.to_csv('output/kazakhstan_df.csv', index=False)