In [54]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [55]:
from tensorflow import keras 
from tensorflow.keras import layers, models, metrics
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense, Input, Dropout, GRU 
from sklearn.model_selection import train_test_split
from imblearn.combine import SMOTEENN
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

In [56]:
data="task-2/data/"
train_data=pd.read_csv(data+"cleaned_data.csv")

In [57]:
y = train_data["WnvPresent"]
x = train_data.drop(['WnvPresent'], axis = 1)
y.value_counts()

0    19910
1     1102
Name: WnvPresent, dtype: int64

### Oversample and clean using SMOTEENN

In [58]:
smoteenn=SMOTEENN()
x_aug ,y_aug = smoteenn.fit_resample(x, y)
y_aug.value_counts()

0    18427
1    18135
Name: WnvPresent, dtype: int64

In [59]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)
x_train_aug, x_test_aug, y_train_aug, y_test_aug = train_test_split(x_aug, y_aug, test_size = 0.2, random_state = 42)

### Build and train GRU model

In [60]:
model=Sequential()

In [61]:
x_train.shape

(16809, 20)

In [62]:
# model.add(Input(20,))
model.add(GRU(256,activation="relu",input_shape=(20,1,)))
model.add(Dense(128,activation="relu"))
model.add(Dense(32,activation="relu"))
model.add(Dense(1,activation="sigmoid"))



In [63]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_3 (GRU)                 (None, 256)               198912    
                                                                 
 dense_12 (Dense)            (None, 128)               32896     
                                                                 
 dense_13 (Dense)            (None, 32)                4128      
                                                                 
 dense_14 (Dense)            (None, 1)                 33        
                                                                 
Total params: 235,969
Trainable params: 235,969
Non-trainable params: 0
_________________________________________________________________


In [64]:
model.compile(
    optimizer="Adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [65]:
callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            filepath="task-2/saved_models/ANN",
            monitor='val_loss', 
            mode='min', 
            save_best_only=True,
            verbose=1
        )
]

In [67]:
history=model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=20,batch_size=64,callbacks=callbacks)

Epoch 1/20
Epoch 1: val_loss improved from 0.17433 to 0.17298, saving model to task-2/saved_models/ANN
INFO:tensorflow:Assets written to: task-2/saved_models/ANN/assets
Epoch 2/20
Epoch 2: val_loss did not improve from 0.17298
Epoch 3/20
Epoch 3: val_loss did not improve from 0.17298
Epoch 4/20
Epoch 4: val_loss improved from 0.17298 to 0.16972, saving model to task-2/saved_models/ANN
INFO:tensorflow:Assets written to: task-2/saved_models/ANN/assets
Epoch 5/20
Epoch 5: val_loss did not improve from 0.16972
Epoch 6/20
Epoch 6: val_loss did not improve from 0.16972
Epoch 7/20
Epoch 7: val_loss did not improve from 0.16972
Epoch 8/20
Epoch 8: val_loss did not improve from 0.16972
Epoch 9/20
Epoch 9: val_loss did not improve from 0.16972
Epoch 10/20
Epoch 10: val_loss improved from 0.16972 to 0.16815, saving model to task-2/saved_models/ANN
INFO:tensorflow:Assets written to: task-2/saved_models/ANN/assets
Epoch 11/20
Epoch 11: val_loss improved from 0.16815 to 0.16812, saving model to task

In [68]:
y_pred=model.predict(x_test)
y_pred=np.round(y_pred)
score = classification_report(y_test, y_pred)



In [69]:
print(score)
print(roc_auc_score(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.99      0.97      3983
           1       0.38      0.07      0.12       220

    accuracy                           0.95      4203
   macro avg       0.67      0.53      0.55      4203
weighted avg       0.92      0.95      0.93      4203

0.5330997649099583


In [85]:
model_aug=Sequential()

# model.add(Input(20,))
model_aug.add(GRU(256,activation="relu",input_shape=(20,1,)))
model_aug.add(Dense(128,activation="relu"))
model_aug.add(Dense(32,activation="relu"))
model_aug.add(Dense(1,activation="sigmoid"))

model_aug.compile(
    optimizer="Adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

callbacks_aug = [
        tf.keras.callbacks.ModelCheckpoint(
            filepath="task-2/saved_models/ANN_aug",
            monitor='val_loss', 
            mode='min', 
            save_best_only=True,
            verbose=1
        )
]



In [86]:
history_aug=model_aug.fit(x_train_aug,y_train_aug,validation_data=(x_test_aug,y_test_aug),epochs=50,batch_size=64,callbacks=callbacks_aug)

Epoch 1/50
Epoch 1: val_loss improved from inf to 0.70694, saving model to task-2/saved_models/ANN_aug
INFO:tensorflow:Assets written to: task-2/saved_models/ANN_aug/assets
Epoch 2/50
Epoch 2: val_loss improved from 0.70694 to 0.51792, saving model to task-2/saved_models/ANN_aug
INFO:tensorflow:Assets written to: task-2/saved_models/ANN_aug/assets
Epoch 3/50
Epoch 3: val_loss did not improve from 0.51792
Epoch 4/50
Epoch 4: val_loss improved from 0.51792 to 0.43716, saving model to task-2/saved_models/ANN_aug
INFO:tensorflow:Assets written to: task-2/saved_models/ANN_aug/assets
Epoch 5/50
Epoch 5: val_loss did not improve from 0.43716
Epoch 6/50
Epoch 6: val_loss improved from 0.43716 to 0.35982, saving model to task-2/saved_models/ANN_aug
INFO:tensorflow:Assets written to: task-2/saved_models/ANN_aug/assets
Epoch 7/50
Epoch 7: val_loss did not improve from 0.35982
Epoch 8/50
Epoch 8: val_loss did not improve from 0.35982
Epoch 9/50
Epoch 9: val_loss did not improve from 0.35982
Epoch 

In [87]:
y_pred_aug=model_aug.predict(x_test_aug)
y_pred_aug=np.round(y_pred_aug)
score_aug = classification_report(y_test_aug, y_pred_aug)
print(score_aug)

              precision    recall  f1-score   support

           0       0.99      0.96      0.97      3648
           1       0.96      0.99      0.97      3665

    accuracy                           0.97      7313
   macro avg       0.97      0.97      0.97      7313
weighted avg       0.97      0.97      0.97      7313



In [88]:
roc_auc_score(y_test_aug,y_pred_aug)

0.9731601236207845