In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
from IPython.display import display
pd.options.display.max_columns = None
from sklearn.preprocessing import Imputer

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
X_train = pd.read_csv('dengue_features_train.csv', index_col=None)
X_test = pd.read_csv('dengue_features_test.csv', index_col=None)

In [3]:
X_train['week_start_date'] = pd.to_datetime(X_train['week_start_date'])
X_test['week_start_date'] = pd.to_datetime(X_test['week_start_date'])

In [4]:
X_train['month'] = X_train['week_start_date'].dt.month
X_train['quarter'] = X_train['week_start_date'].dt.quarter
X_train = X_train.drop(['week_start_date'], axis=1)
X_test['month'] = X_test['week_start_date'].dt.month
X_test['quarter'] = X_test['week_start_date'].dt.quarter
X_test = X_test.drop(['week_start_date'], axis=1)

In [5]:
city_train = pd.get_dummies(X_train['city'])
X_train = pd.concat([X_train, city_train], axis=1)
X_train = X_train.drop(['city'], axis=1)

In [6]:
city_test = pd.get_dummies(X_test['city'])
X_test = pd.concat([X_test, city_test], axis=1)
X_test = X_test.drop(['city'], axis=1)

In [7]:
X_train.head()

Unnamed: 0,year,weekofyear,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,reanalysis_dew_point_temp_k,reanalysis_max_air_temp_k,reanalysis_min_air_temp_k,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm,month,quarter,iq,sj
0,1990,18,0.1226,0.103725,0.198483,0.177617,12.42,297.572857,297.742857,292.414286,299.8,295.9,32.0,73.365714,12.42,14.012857,2.628571,25.442857,6.9,29.4,20.0,16.0,4,2,0,1
1,1990,19,0.1699,0.142175,0.162357,0.155486,22.82,298.211429,298.442857,293.951429,300.9,296.4,17.94,77.368571,22.82,15.372857,2.371429,26.714286,6.371429,31.7,22.2,8.6,5,2,0,1
2,1990,20,0.03225,0.172967,0.1572,0.170843,34.54,298.781429,298.878571,295.434286,300.5,297.3,26.1,82.052857,34.54,16.848571,2.3,26.714286,6.485714,32.2,22.8,41.4,5,2,0,1
3,1990,21,0.128633,0.245067,0.227557,0.235886,15.36,298.987143,299.228571,295.31,301.4,297.0,13.9,80.337143,15.36,16.672857,2.428571,27.471429,6.771429,33.3,23.3,4.0,5,2,0,1
4,1990,22,0.1962,0.2622,0.2512,0.24734,7.52,299.518571,299.664286,295.821429,301.9,297.5,12.2,80.46,7.52,17.21,3.014286,28.942857,9.371429,35.0,23.9,5.8,5,2,0,1


In [8]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1456 entries, 0 to 1455
Data columns (total 26 columns):
year                                     1456 non-null int64
weekofyear                               1456 non-null int64
ndvi_ne                                  1262 non-null float64
ndvi_nw                                  1404 non-null float64
ndvi_se                                  1434 non-null float64
ndvi_sw                                  1434 non-null float64
precipitation_amt_mm                     1443 non-null float64
reanalysis_air_temp_k                    1446 non-null float64
reanalysis_avg_temp_k                    1446 non-null float64
reanalysis_dew_point_temp_k              1446 non-null float64
reanalysis_max_air_temp_k                1446 non-null float64
reanalysis_min_air_temp_k                1446 non-null float64
reanalysis_precip_amt_kg_per_m2          1446 non-null float64
reanalysis_relative_humidity_percent     1446 non-null float64
reanalysis_sat_

In [9]:
X_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416 entries, 0 to 415
Data columns (total 26 columns):
year                                     416 non-null int64
weekofyear                               416 non-null int64
ndvi_ne                                  373 non-null float64
ndvi_nw                                  405 non-null float64
ndvi_se                                  415 non-null float64
ndvi_sw                                  415 non-null float64
precipitation_amt_mm                     414 non-null float64
reanalysis_air_temp_k                    414 non-null float64
reanalysis_avg_temp_k                    414 non-null float64
reanalysis_dew_point_temp_k              414 non-null float64
reanalysis_max_air_temp_k                414 non-null float64
reanalysis_min_air_temp_k                414 non-null float64
reanalysis_precip_amt_kg_per_m2          414 non-null float64
reanalysis_relative_humidity_percent     414 non-null float64
reanalysis_sat_precip_amt_mm   

In [10]:
y_train = pd.read_csv('dengue_labels_train.csv', index_col=None)
y_train = y_train['total_cases']

In [11]:
def larger_model():
	# create model
	model = Sequential()
	model.add(Dense(26, input_dim=26, kernel_initializer='normal', activation='relu'))
	model.add(Dense(13, kernel_initializer='normal', activation='relu'))
	model.add(Dense(5, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal'))
	# Compile model
	model.compile(loss='mean_squared_error', optimizer='adam')
	return model

In [12]:
seed = 7
np.random.seed(seed)

In [13]:
X_train = X_train.fillna(X_train.mean())
X_test = X_test.fillna(X_test.mean())

In [14]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [15]:
nn_model = KerasRegressor(build_fn=larger_model, nb_epoch=100, batch_size=5, verbose=0)

In [16]:
nn_model.fit(X_train,y_train)

<keras.callbacks.History at 0xce24829f98>

In [17]:
result = nn_model.predict(X_test)

In [18]:
result

array([26.93495  , 38.571285 , 33.776127 , 38.77647  , 38.197887 ,
       48.102993 , 40.872997 , 41.98698  , 50.97095  , 55.818462 ,
       50.0785   , 54.555645 , 41.392536 , 60.482594 , 51.253727 ,
       61.116985 , 45.39352  , 53.70654  , 57.330643 , 38.708652 ,
       49.022915 , 35.105404 , 42.6851   , 55.763348 , 44.681084 ,
       43.556927 , 60.423374 , 60.63898  , 51.134953 , 48.24715  ,
       49.0457   , 47.46519  , 42.600094 , 41.395405 , 40.0827   ,
        4.0333505, 16.957735 , 27.32358  , 22.032463 , 21.55979  ,
       24.49092  , 15.672383 , 21.52809  , 11.321181 , 21.043589 ,
       23.614618 , 23.15824  , 20.71576  , 24.681503 , 26.840845 ,
       29.358084 , 26.131334 , 28.707708 , 34.25617  , 36.038967 ,
       32.94237  , 31.630575 , 37.047626 , 37.966404 , 41.311905 ,
       39.294746 , 49.04935  , 46.44382  , 53.198536 , 51.87352  ,
       50.524117 , 58.112766 , 54.84675  , 48.58498  , 53.25871  ,
       52.267147 , 54.22424  , 52.71535  , 52.573887 , 61.3922

In [19]:
X_test = pd.read_csv('dengue_features_test.csv', index_col=None)
result_columns = X_test[['city','year','weekofyear']]
result_cases = pd.DataFrame(result)
result_cases.columns = ['total_cases']
result_df = pd.concat([result_columns, result_cases], axis=1)
result_df['total_cases'] = result_df['total_cases'].round().astype(int)

In [20]:
result_df['total_cases'] = np.where(result_df['total_cases']<0, 0, result_df['total_cases'])

In [21]:
result_df.head()

Unnamed: 0,city,year,weekofyear,total_cases
0,sj,2008,18,27
1,sj,2008,19,39
2,sj,2008,20,34
3,sj,2008,21,39
4,sj,2008,22,38


In [22]:
result_df.to_csv('keras_deep.csv', index=False)