In [1]:
import pandas as pd
import scipy as sc
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

feature_columns = ['city', 'weekofyear',
 'reanalysis_avg_temp_k',
 'reanalysis_sat_precip_amt_mm',
 'reanalysis_specific_humidity_g_per_kg',
 'reanalysis_tdtr_k',
 'station_avg_temp_c']

#ndvi = density of green
#precipitation = rain amount

df = pd.read_csv('dengue_features_train.csv', usecols=feature_columns)
label_columns = ['city', 'total_cases']
labels = pd.read_csv('dengue_labels_train.csv', usecols=label_columns)

data = pd.concat([df, labels['total_cases']], axis=1, join_axes=[df.index])

data = data.dropna()

data['reanalysis_sat_precip_amt_mm'] = data['reanalysis_sat_precip_amt_mm'].rolling(window=30, min_periods=(1)).mean()

data_sj = data[data['city']=='sj']
data_iq = data[data['city']=='iq']

data_sj = data_sj.drop('city', axis=1)
data_iq = data_iq.drop('city', axis=1)
data_sj = data_sj.drop('total_cases', axis=1)
data_iq = data_iq.drop('total_cases', axis=1)


labels_sj = data[data['city']=='sj']
labels_iq = data[data['city']=='iq']

sj_train_features, sj_test_features, sj_train_labels, sj_test_labels = train_test_split(
    data_sj, labels_sj['total_cases'], test_size=0.25, random_state=0, shuffle=False)

iq_train_features, iq_test_features, iq_train_labels, iq_test_labels = train_test_split(
    data_iq, labels_iq['total_cases'], test_size=0.25, random_state=0, shuffle=False)

In [20]:
test_features = pd.read_csv('dengue_features_test.csv', usecols=feature_columns)

test_features['reanalysis_sat_precip_amt_mm'] = test_features['reanalysis_sat_precip_amt_mm'].rolling(window=10, min_periods=(1)).mean()
test_features = test_features.fillna(test_features.mean())

test_features_sj = test_features[test_features['city']=='sj']
test_features_iq = test_features[test_features['city']=='iq']

test_features_sj = test_features_sj.drop('city', axis=1)
test_features_iq = test_features_iq.drop('city', axis=1)

In [2]:
import numpy as np
X = sj_train_features
y = sj_train_labels


X1 = iq_train_features
y1 = iq_train_labels

In [None]:
# --------------------------------------------- Random Forest ---------------------------------

In [13]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=1000, max_features='auto',
                                 max_depth=10, min_samples_leaf=0.005,
                                 criterion='mae', min_weight_fraction_leaf=0.1
                                , warm_start=True)
model.fit(X,y)

predictions_sj = model.predict(sj_test_features).astype(int)
predictions_sj = predictions_sj.clip(min=0)

model1 = RandomForestRegressor(n_estimators=1000, max_features='auto',
                                 max_depth=10, min_samples_leaf=0.005,
                                 criterion='mae', min_weight_fraction_leaf=0.1
                                , warm_start=True)
model1.fit(X1, y1)
predictions_iq = model1.predict(iq_test_features).astype(int)
predictions_iq = predictions_iq.clip(min=0)

In [14]:
iq_error = mean_absolute_error(iq_test_labels, predictions_iq)
sj_error = mean_absolute_error(sj_test_labels, predictions_sj)

In [36]:
sj_error, iq_error

(17.346153846153847, 7.917355371900826)

In [7]:
sj_error, iq_error

(17.98901098901099, 8.132231404958677)

In [15]:
sj_error, iq_error

(17.895604395604394, 7.884297520661157)

In [15]:
from sklearn.neural_network import MLPRegressor
model = MLPRegressor(hidden_layer_sizes=(5,),
                                       activation='relu',
                                       solver='adam',
                                       learning_rate='constant',
                                       max_iter=1000,
                                       learning_rate_init=0.01,
                                       alpha=0.01)
model.fit(X,y)

predictions_sj = model.predict(sj_test_features).astype(int)
predictions_sj = predictions_sj.clip(min=0)

model1 = MLPRegressor(hidden_layer_sizes=(5,),
                                       activation='relu',
                                       solver='adam',
                                       learning_rate='constant',
                                       max_iter=1000,
                                       learning_rate_init=0.01,
                                       alpha=0.01)
model1.fit(X1, y1)
predictions_iq = model1.predict(iq_test_features).astype(int)
predictions_iq = predictions_iq.clip(min=0)

iq_error = mean_absolute_error(iq_test_labels, predictions_iq)
sj_error = mean_absolute_error(sj_test_labels, predictions_sj)

In [16]:
sj_error, iq_error

(25.43103448275862, 10.636363636363637)

In [24]:
sj_error, iq_error

(32.75431034482759, 13.892561983471074)

In [21]:
predictions_sj = model.predict(test_features_sj).astype(int)
predictions_sj = predictions_sj.clip(min=0)
predictions_iq = model1.predict(test_features_iq).astype(int)
predictions_iq = predictions_iq.clip(min=0)

total_preditions = np.concatenate((predictions_sj, predictions_iq))
np.savetxt("foo.csv", total_preditions, delimiter=",")