# importing data

In [1]:
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np
import datetime

from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
sns.set(style="darkgrid")



from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import ParameterGrid

## reading data

In [2]:
co2_df = pd.read_csv('data/co2_concentration_df.csv')
co2_df['datetime'] = co2_df['datetime'].apply(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d'))
co2_df.set_index('datetime', inplace=True)
co2_diff = pd.DataFrame(co2_df['data_mean_global'].pct_change())
co2_diff.dropna(inplace=True)

In [3]:
global_temp_df = pd.read_csv('data/global_temp_df.csv')
global_temp_df['dt'] = global_temp_df['dt'].apply(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d'))
global_temp_df['dt'] = global_temp_df['dt'].apply(lambda x: x.replace(day=15))
global_temp_df.set_index('dt', inplace=True)
global_temp_diff = pd.DataFrame(global_temp_df['LandAndOceanAverageTemperature'].pct_change())
global_temp_diff.dropna(inplace=True)
global_temp_df.index.name = 'datetime'

## lagged data

In [4]:
lagged_df = pd.DataFrame()

In [5]:
for i in range(12, 0, -1):
    lagged_df[['t-' + str(i)]] = co2_df.shift(i)
   
lagged_df.dropna(axis=0, inplace=True)

In [6]:
lagged_df = pd.merge(lagged_df, global_temp_df, left_on='datetime', right_on='datetime')

In [7]:
lagged_df

Unnamed: 0_level_0,t-12,t-11,t-10,t-9,t-8,t-7,t-6,t-5,t-4,t-3,t-2,t-1,LandAndOceanAverageTemperature
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1850-01-15,284.834193,285.225604,285.575565,285.823897,285.775630,285.051013,283.418971,281.823622,281.790659,282.833831,283.844182,284.486830,12.833
1850-02-15,285.225604,285.575565,285.823897,285.775630,285.051013,283.418971,281.823622,281.790659,282.833831,283.844182,284.486830,284.944656,13.588
1850-03-15,285.575565,285.823897,285.775630,285.051013,283.418971,281.823622,281.790659,282.833831,283.844182,284.486830,284.944656,285.333792,14.043
1850-04-15,285.823897,285.775630,285.051013,283.418971,281.823622,281.790659,282.833831,283.844182,284.486830,284.944656,285.333792,285.682825,14.667
1850-05-15,285.775630,285.051013,283.418971,281.823622,281.790659,282.833831,283.844182,284.486830,284.944656,285.333792,285.682825,285.931179,15.507
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014-08-15,393.129029,393.588520,395.189965,396.656354,397.589370,398.410233,398.790647,399.020053,399.094613,398.623928,397.337617,395.648835,17.607
2014-09-15,393.588520,395.189965,396.656354,397.589370,398.410233,398.790647,399.020053,399.094613,398.623928,397.337617,395.648835,394.573461,16.975
2014-10-15,395.189965,396.656354,397.589370,398.410233,398.790647,399.020053,399.094613,398.623928,397.337617,395.648835,394.573461,395.026810,16.029
2014-11-15,396.656354,397.589370,398.410233,398.790647,399.020053,399.094613,398.623928,397.337617,395.648835,394.573461,395.026810,396.668760,14.899
