In [21]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sns.set(font_scale=1.5)
plt.style.use('fivethirtyeight')

from ipywidgets import *
from IPython.display import display

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import warnings
warnings.simplefilter('ignore')

In [22]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, RidgeCV, LassoCV
from sklearn.metrics import mean_absolute_error
import datetime
from sklearn.ensemble import RandomForestRegressor

In [23]:
dengue_train_sj = pd.read_pickle('./dengue_train_sj.pkl')
dengue_test_sj = pd.read_pickle('./dengue_test_sj.pkl')
dengue_train_iq = pd.read_pickle('./dengue_train_iq.pkl')
dengue_test_iq = pd.read_pickle('./dengue_test_iq.pkl')

In [24]:
dengue_train_sj['total_cases_shift1'] = dengue_train_sj['total_cases'].shift(1)
dengue_train_sj['min_air_shift4'] = dengue_train_sj['reanalysis_min_air_temp_k'].shift(4)
dengue_train_sj.fillna(method='bfill', inplace=True)
dengue_test_sj['total_cases_shift1'] = 0
dengue_test_sj['min_air_shift4'] = 0
dengue_train_iq['total_cases_shift1'] = dengue_train_iq['total_cases'].shift(1)
dengue_train_iq['min_air_shift4'] = dengue_train_iq['reanalysis_min_air_temp_k'].shift(4)
dengue_train_iq.fillna(method='bfill', inplace=True)
dengue_test_iq['total_cases_shift1'] = 0
dengue_test_iq['min_air_shift4'] = 0

In [25]:
dengue_train_merge = pd.concat([dengue_train_sj, dengue_train_iq])

In [26]:
X_me = dengue_train_merge[['ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 
       'reanalysis_air_temp_k', 'reanalysis_avg_temp_k',
       'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k',
       'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2',
       'reanalysis_relative_humidity_percent', 'reanalysis_sat_precip_amt_mm',
       'reanalysis_specific_humidity_g_per_kg', 'reanalysis_tdtr_k',
       'station_avg_temp_c', 'station_diur_temp_rng_c', 'station_max_temp_c',
       'station_min_temp_c', 'station_precip_mm', 'total_cases_shift1', 'min_air_shift4']]
y_me = dengue_train_merge.total_cases

In [27]:
# standardize the data and fit
scaler_me = StandardScaler()
X_me = pd.DataFrame(scaler_me.fit_transform(X_me), columns=X_me.columns)

In [28]:
model_me = RandomForestRegressor(n_estimators=100)
model_me.fit(X_me, y_me)
print("Score:", model_me.score(X_me, y_me))

Score: 0.9864849199101768


In [29]:
train_pred_me = model_me.predict(X_me)

In [30]:
train_pred_me = [int(x) for x in train_pred_me]

In [31]:
mean_absolute_error(y_me, train_pred_me)

2.6502177068214805

In [32]:
dengue_test_sj.total_cases_shift1[0] = list(dengue_train_sj.total_cases)[-1]

In [33]:
dengue_test_sj.min_air_shift4[0] = list(dengue_train_sj.reanalysis_min_air_temp_k)[-4]
dengue_test_sj.min_air_shift4[1] = list(dengue_train_sj.reanalysis_min_air_temp_k)[-3]
dengue_test_sj.min_air_shift4[2] = list(dengue_train_sj.reanalysis_min_air_temp_k)[-2]
dengue_test_sj.min_air_shift4[3] = list(dengue_train_sj.reanalysis_min_air_temp_k)[-1]

In [34]:
dengue_test_iq.total_cases_shift1[0] = list(dengue_train_iq.total_cases)[-1]

In [35]:
dengue_test_iq.min_air_shift4[0] = list(dengue_train_iq.reanalysis_min_air_temp_k)[-4]
dengue_test_iq.min_air_shift4[1] = list(dengue_train_iq.reanalysis_min_air_temp_k)[-3]
dengue_test_iq.min_air_shift4[2] = list(dengue_train_iq.reanalysis_min_air_temp_k)[-2]
dengue_test_iq.min_air_shift4[3] = list(dengue_train_iq.reanalysis_min_air_temp_k)[-1]

In [36]:
cols_sj = ['ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 
       'reanalysis_air_temp_k', 'reanalysis_avg_temp_k',
       'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k',
       'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2',
       'reanalysis_relative_humidity_percent', 'reanalysis_sat_precip_amt_mm',
       'reanalysis_specific_humidity_g_per_kg', 'reanalysis_tdtr_k',
       'station_avg_temp_c', 'station_diur_temp_rng_c', 'station_max_temp_c',
       'station_min_temp_c', 'station_precip_mm', 'total_cases_shift1', 'min_air_shift4']

In [37]:
cols_iq = ['ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 
       'reanalysis_air_temp_k', 'reanalysis_avg_temp_k',
       'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k',
       'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2',
       'reanalysis_relative_humidity_percent', 'reanalysis_sat_precip_amt_mm',
       'reanalysis_specific_humidity_g_per_kg', 'reanalysis_tdtr_k',
       'station_avg_temp_c', 'station_diur_temp_rng_c', 'station_max_temp_c',
       'station_min_temp_c', 'station_precip_mm', 'total_cases_shift1', 'min_air_shift4']

In [38]:
for index in range(len(dengue_test_sj)):
    if model_me.predict(scaler_me.transform(dengue_test_sj.loc[index, cols_sj].values.reshape(1,-1)))[0]>461:
        dengue_test_sj.total_cases_shift1[index+1] = 461
        dengue_test_sj.min_air_shift4[index+4] = 461
    else:
        dengue_test_sj.total_cases_shift1[index+1] = \
            int(model_me.predict(scaler_me.transform(dengue_test_sj.loc[index, cols_sj].values.reshape(1,-1)))[0])
        dengue_test_sj.min_air_shift4[index+4] = \
            int(model_me.predict(scaler_me.transform(dengue_test_sj.loc[index, cols_sj].values.reshape(1,-1)))[0])

for index in range(len(dengue_test_iq)):
    if model_me.predict(scaler_me.transform(dengue_test_iq.loc[index, cols_iq].values.reshape(1,-1)))[0]>116:
        dengue_test_iq.total_cases_shift1[index+1] = 116
        dengue_test_iq.min_air_shift4[index+4] = 116
    else:
        dengue_test_iq.total_cases_shift1[index+1] = \
            int(model_me.predict(scaler_me.transform(dengue_test_iq.loc[index, cols_sj].values.reshape(1,-1)))[0])
        dengue_test_iq.min_air_shift4[index+4] = \
            int(model_me.predict(scaler_me.transform(dengue_test_iq.loc[index, cols_sj].values.reshape(1,-1)))[0])

In [40]:
dengue_test_merge = pd.concat([dengue_test_sj, dengue_test_iq])

In [41]:
dengue_test_merge.total_cases_shift1 = [0 if x < 0 else x for x in dengue_test_merge.total_cases_shift1]
dengue_test_merge.min_air_shift4 = [0 if x < 0 else x for x in dengue_test_merge.min_air_shift4]

In [42]:
me_predictions = [int(x) for x in model_me.predict(scaler_me.transform(dengue_test_merge[cols_sj]))]

In [43]:
dengue_test_merge['total_cases'] = me_predictions

In [44]:
submission_cols = ['city', 'year', 'weekofyear', 'total_cases']

In [45]:
submission = dengue_test_merge[submission_cols]

In [48]:
submission.head()

Unnamed: 0,city,year,weekofyear,total_cases
0,sj,2008,18,6
1,sj,2008,19,6
2,sj,2008,20,6
3,sj,2008,21,7
4,sj,2008,22,9


In [49]:
submission.to_csv('natasha_dengue_minairtempk4.csv', index=False)