In [1]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
import argparse
import yaml
import time
import datetime
import cv2
import numpy as np
import pandas as pd
import random

from skimage import io, transform
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
from scipy import ndimage, misc

import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms, utils
from torchvision.utils import save_image
from torch.utils.data import Dataset, DataLoader
import torchvision
import torch
from torch.autograd import Variable
import torch.optim as optim
from sklearn.metrics import mean_absolute_error

from google.colab import drive
import os
import warnings
from collections import defaultdict
from sklearn.model_selection import KFold
import time
from scipy import stats
import sys


In [3]:
categorical_columns = ['GENUS', 'SPECIES', 'SETTIMEOFDAY', 'TRAPTYPE']
numerical_columns = ['DIFF_DAYS', 'uvIndex13', 'uvIndex5', 'uvIndex12','uvIndex6', 'windBearing4','precipIntensityMaxTime12','precipIntensityMaxTime14','apparentTemperatureMinTime13','sunsetTime4','sunsetTime6','sunsetTime1','sunriseTime12','temperatureLowTime10','windGustTime13','sunsetTime3']

print(numerical_columns)

['DIFF_DAYS', 'uvIndex13', 'uvIndex5', 'uvIndex12', 'uvIndex6', 'windBearing4', 'precipIntensityMaxTime12', 'precipIntensityMaxTime14', 'apparentTemperatureMinTime13', 'sunsetTime4', 'sunsetTime6', 'sunsetTime1', 'sunriseTime12', 'temperatureLowTime10', 'windGustTime13', 'sunsetTime3']


In [4]:
sentinel_frame = pd.read_csv('/content/drive/My Drive/BerkeleyResults/Approach2/aedes_mosquito_weather_clean.csv')

sentinel_frame["TRAPSET"]= pd.to_datetime(sentinel_frame["TRAPSET"])
sentinel_frame["TRAPCOLLECT"]= pd.to_datetime(sentinel_frame["TRAPCOLLECT"])
sentinel_frame["DIFF_DAYS"] = (sentinel_frame["TRAPCOLLECT"] - sentinel_frame["TRAPSET"]).dt.days
sentinel_frame["TRAPSET"]= sentinel_frame["TRAPSET"].dt.week
sentinel_frame["TRAPCOLLECT"]= sentinel_frame["TRAPCOLLECT"].dt.week

for category in categorical_columns:
    sentinel_frame[category] = sentinel_frame[category].astype('category')

categorical_column_sizes = [len(sentinel_frame[column].cat.categories) for column in categorical_columns]
categorical_embedding_sizes = [(col_size, min(50, (col_size+1)//2)) for col_size in categorical_column_sizes]

for col in numerical_columns:
    sentinel_frame[col] = (sentinel_frame[col] - sentinel_frame[col].mean()) / (sentinel_frame[col].max() - sentinel_frame[col].min())
    
def get_categorical_tensor(in_frame, categories):
    categorical_data = np.stack([in_frame[col].cat.codes.values for col in categories], 1)
    categorical_data = torch.tensor(categorical_data, dtype=torch.int64)

    return categorical_data

def get_numerical_tensor(in_frame, numerical_columns):
    numerical_data = np.stack([in_frame[col].values for col in numerical_columns], 1)
    numerical_data = torch.tensor(numerical_data, dtype=torch.float)
    return numerical_data

In [5]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.size()[0], -1)

class Swish(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_tensors[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
    
class SwishM(nn.Module):
    def forward(self, input_tensor):
        return Swish.apply(input_tensor)
        
def create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout):
    all_layers = []
    num_categorical_cols = sum((nf for ni, nf in embedding_size))
    input_size = num_categorical_cols + num_numerical_cols
    all_layers.append(nn.Linear(input_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, output_size))
    all_layers.append(nn.ReLU())

    return nn.Sequential(*all_layers)

class Sentinel_net(nn.Module):

    def __init__(self, embedding_size, num_numerical_cols, output_size, dropout, hidden_size):
        super(Sentinel_net, self).__init__()
        self.layers = create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout)
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(dropout)
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

    def forward(self, x_categorical, x_numerical):

        embeddings = []
        for i, e in enumerate(self.all_embeddings):
            embeddings.append(e(x_categorical[:, i]))
        y = torch.cat(embeddings, 1)
        y = self.embedding_dropout(y)
        x_numerical = self.batch_norm_num(x_numerical)
        y = torch.cat([y, x_numerical], 1)
        y = self.layers(y)

        return y

In [6]:
def train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr):
    model = Sentinel_net(categorical_embedding_sizes, train_n_t.shape[1], 1, dropout, hidden_size)
    model.train().cuda()

    criterion = torch.nn.L1Loss()
    params_dict = dict(model.named_parameters())
    params = []
    best = sys.maxsize
    r_value = 0
    std_err = 0
    
    for key, value in params_dict.items():
        if 'weight' in key and 'layers' in key:
            params += [{'params':value, 'weight_decay':4e-3}]
        else:
            params += [{'params':value, 'weight_decay':0.0}]

    optimizer = torch.optim.RMSprop(params, lr=lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0.9, centered=False)
    epoch = 0

    for i in range(1, 1000):
        y_pred = model(train_c_t, train_n_t)
        loss = criterion(y_pred.squeeze(), outputs_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            y_pred2 = model(test_c_t, test_n_t)
            c_out, c_pred = outputs_test.cpu().detach().numpy(), y_pred2.cpu().detach().numpy().squeeze()
            current_score = mean_absolute_error(c_out, c_pred)
            if current_score < best:
                best = current_score
                m, b, r_value, p_value, std_err = stats.linregress(c_out,c_pred)

            model.train()
    
    #print('MAE = ',current_score, 'r2 = ',r_value, 'SE = ',std_err)
    return best, r_value, std_err

In [7]:
start_time = time.time()
learning = [0.01, 0.001, 0.0001] 
best_score_k_fold = sys.maxsize

print('2 Layers Architecture')

for dropout in range(0,7):
    dropout = round(dropout*.1, 2) 
    for hidden_size in range(50, 150, 10):
        for lr in learning: 

            scores, r_values, std_errors = [], [], []
            cv = KFold(n_splits=5, random_state=42, shuffle=True)
            foldc = 1
            for train_index, test_index in cv.split(sentinel_frame, None):
                #print('Fold ', foldc, end =" ") 
                train, test  = sentinel_frame.iloc[train_index], sentinel_frame.iloc[test_index]

                train_c_t = get_categorical_tensor(train, categorical_columns).cuda()
                test_c_t = get_categorical_tensor(test, categorical_columns).cuda()
                train_n_t = get_numerical_tensor(train, numerical_columns).cuda()
                test_n_t = get_numerical_tensor(test, numerical_columns).cuda()

                outputs_train = torch.tensor(train['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()
                outputs_test = torch.tensor(test['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()

                best_fold_score, best_r_value, best_std_error = train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr)
                scores.append(best_fold_score)
                r_values.append(best_r_value)
                std_errors.append(best_std_error)
                foldc += 1
            #print(scores)
            current_score = sum(scores) / len(scores)
            print('MAE = ', current_score ,'r2 = ', sum(r_values) / len(r_values) ,'SE = ', sum(std_errors) / len(std_errors),'lr = ', lr,'hz = ', hidden_size, 'dr = ', dropout )
            if current_score < best_score_k_fold:
                best_score_k_fold = current_score
                print('Best so far', best_score_k_fold)
                print('')


print("--- %s seconds ---" % (time.time() - start_time))

2 Layers Architecture
MAE =  4.675465488433838 r2 =  0.2202575963967508 SE =  0.01361041044830646 lr =  0.01 hz =  50 dr =  0.0
Best so far 4.675465488433838

MAE =  4.666958475112915 r2 =  0.2498635211910182 SE =  0.014166555147078494 lr =  0.001 hz =  50 dr =  0.0
Best so far 4.666958475112915

MAE =  5.447975730895996 r2 =  0.10860419674363744 SE =  0.024088568740422228 lr =  0.0001 hz =  50 dr =  0.0
MAE =  4.704497003555298 r2 =  0.21948000008707597 SE =  0.010894023854708813 lr =  0.01 hz =  60 dr =  0.0
MAE =  4.713172197341919 r2 =  0.16265628708372304 SE =  0.00872462145594578 lr =  0.001 hz =  60 dr =  0.0
MAE =  5.507284355163574 r2 =  0.1880259723749256 SE =  0.028449723621964306 lr =  0.0001 hz =  60 dr =  0.0
MAE =  4.6987017631530765 r2 =  0.20151692952704847 SE =  0.008377163540463532 lr =  0.01 hz =  70 dr =  0.0
MAE =  4.665342855453491 r2 =  0.26763628204591794 SE =  0.011068169423470981 lr =  0.001 hz =  70 dr =  0.0
Best so far 4.665342855453491

MAE =  5.387070274

In [8]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.size()[0], -1)

class Swish(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_tensors[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
    
class SwishM(nn.Module):
    def forward(self, input_tensor):
        return Swish.apply(input_tensor)
        
def create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout):
    all_layers = []
    num_categorical_cols = sum((nf for ni, nf in embedding_size))
    input_size = num_categorical_cols + num_numerical_cols
    all_layers.append(nn.Linear(input_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, output_size))
    all_layers.append(nn.ReLU())

    return nn.Sequential(*all_layers)

class Sentinel_net(nn.Module):

    def __init__(self, embedding_size, num_numerical_cols, output_size, dropout, hidden_size):
        super(Sentinel_net, self).__init__()
        self.layers = create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout)
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(dropout)
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

    def forward(self, x_categorical, x_numerical):

        embeddings = []
        for i, e in enumerate(self.all_embeddings):
            embeddings.append(e(x_categorical[:, i]))
        y = torch.cat(embeddings, 1)
        y = self.embedding_dropout(y)
        x_numerical = self.batch_norm_num(x_numerical)
        y = torch.cat([y, x_numerical], 1)
        y = self.layers(y)

        return y

In [9]:
start_time = time.time()
learning = [0.01, 0.001, 0.0001] 
best_score_k_fold = sys.maxsize

print('3 Layers Architecture')

for dropout in range(0,7):
    dropout = round(dropout*.1, 2) 
    for hidden_size in range(50, 150, 10):
        for lr in learning: 

            scores, r_values, std_errors = [], [], []
            cv = KFold(n_splits=5, random_state=42, shuffle=True)
            foldc = 1
            for train_index, test_index in cv.split(sentinel_frame, None):
                #print('Fold ', foldc, end =" ") 
                train, test  = sentinel_frame.iloc[train_index], sentinel_frame.iloc[test_index]

                train_c_t = get_categorical_tensor(train, categorical_columns).cuda()
                test_c_t = get_categorical_tensor(test, categorical_columns).cuda()
                train_n_t = get_numerical_tensor(train, numerical_columns).cuda()
                test_n_t = get_numerical_tensor(test, numerical_columns).cuda()

                outputs_train = torch.tensor(train['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()
                outputs_test = torch.tensor(test['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()

                best_fold_score, best_r_value, best_std_error = train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr)
                scores.append(best_fold_score)
                r_values.append(best_r_value)
                std_errors.append(best_std_error)
                foldc += 1
            #print(scores)
            current_score = sum(scores) / len(scores)
            print('MAE = ', current_score ,'r2 = ', sum(r_values) / len(r_values) ,'SE = ', sum(std_errors) / len(std_errors),'lr = ', lr,'hz = ', hidden_size, 'dr = ', dropout )
            if current_score < best_score_k_fold:
                best_score_k_fold = current_score
                print('Best so far', best_score_k_fold)
                print('')


print("--- %s seconds ---" % (time.time() - start_time))

3 Layers Architecture
MAE =  4.662385082244873 r2 =  0.26037107690659955 SE =  0.014311930927390817 lr =  0.01 hz =  50 dr =  0.0
Best so far 4.662385082244873

MAE =  4.704654216766357 r2 =  0.20834722836473435 SE =  0.018428990887445425 lr =  0.001 hz =  50 dr =  0.0
MAE =  5.3623473167419435 r2 =  0.10126690359220691 SE =  0.02048493944936674 lr =  0.0001 hz =  50 dr =  0.0
MAE =  4.666802978515625 r2 =  0.22599642009113205 SE =  0.013225819092678536 lr =  0.01 hz =  60 dr =  0.0
MAE =  4.705280017852783 r2 =  0.15822436898462172 SE =  0.010515759996952073 lr =  0.001 hz =  60 dr =  0.0
MAE =  5.652919483184815 r2 =  0.10928041724108169 SE =  0.02460415925149831 lr =  0.0001 hz =  60 dr =  0.0
MAE =  4.680678653717041 r2 =  0.2451288445662593 SE =  0.010891915946372177 lr =  0.01 hz =  70 dr =  0.0
MAE =  4.715058946609497 r2 =  0.18159476367676192 SE =  0.01462568242961918 lr =  0.001 hz =  70 dr =  0.0
MAE =  5.361666870117188 r2 =  0.2095991010658714 SE =  0.02387607287624465 lr 

In [10]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.size()[0], -1)

class Swish(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_tensors[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
    
class SwishM(nn.Module):
    def forward(self, input_tensor):
        return Swish.apply(input_tensor)
        
def create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout):
    all_layers = []
    num_categorical_cols = sum((nf for ni, nf in embedding_size))
    input_size = num_categorical_cols + num_numerical_cols
    all_layers.append(nn.Linear(input_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))
    
    all_layers.append(nn.Linear(hidden_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, output_size))
    all_layers.append(nn.ReLU())

    return nn.Sequential(*all_layers)

class Sentinel_net(nn.Module):

    def __init__(self, embedding_size, num_numerical_cols, output_size, dropout, hidden_size):
        super(Sentinel_net, self).__init__()
        self.layers = create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout)
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(dropout)
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

    def forward(self, x_categorical, x_numerical):

        embeddings = []
        for i, e in enumerate(self.all_embeddings):
            embeddings.append(e(x_categorical[:, i]))
        y = torch.cat(embeddings, 1)
        y = self.embedding_dropout(y)
        x_numerical = self.batch_norm_num(x_numerical)
        y = torch.cat([y, x_numerical], 1)
        y = self.layers(y)

        return y

In [11]:
start_time = time.time()
learning = [0.01, 0.001, 0.0001] 
best_score_k_fold = sys.maxsize

print('4 Layers Architecture')

for dropout in range(0,7):
    dropout = round(dropout*.1, 2) 
    for hidden_size in range(50, 150, 10):
        for lr in learning: 

            scores, r_values, std_errors = [], [], []
            cv = KFold(n_splits=5, random_state=42, shuffle=True)
            foldc = 1
            for train_index, test_index in cv.split(sentinel_frame, None):
                #print('Fold ', foldc, end =" ") 
                train, test  = sentinel_frame.iloc[train_index], sentinel_frame.iloc[test_index]

                train_c_t = get_categorical_tensor(train, categorical_columns).cuda()
                test_c_t = get_categorical_tensor(test, categorical_columns).cuda()
                train_n_t = get_numerical_tensor(train, numerical_columns).cuda()
                test_n_t = get_numerical_tensor(test, numerical_columns).cuda()

                outputs_train = torch.tensor(train['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()
                outputs_test = torch.tensor(test['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()

                best_fold_score, best_r_value, best_std_error = train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr)
                scores.append(best_fold_score)
                r_values.append(best_r_value)
                std_errors.append(best_std_error)
                foldc += 1
            #print(scores)
            current_score = sum(scores) / len(scores)
            print('MAE = ', current_score ,'r2 = ', sum(r_values) / len(r_values) ,'SE = ', sum(std_errors) / len(std_errors),'lr = ', lr,'hz = ', hidden_size, 'dr = ', dropout )
            if current_score < best_score_k_fold:
                best_score_k_fold = current_score
                print('Best so far', best_score_k_fold)
                print('')


print("--- %s seconds ---" % (time.time() - start_time))

4 Layers Architecture
MAE =  4.684787654876709 r2 =  0.20281117925114298 SE =  0.015242587960180912 lr =  0.01 hz =  50 dr =  0.0
Best so far 4.684787654876709

MAE =  4.7157501697540285 r2 =  0.25844242334457135 SE =  0.017926818129159153 lr =  0.001 hz =  50 dr =  0.0
MAE =  5.685319328308106 r2 =  0.1794386864898869 SE =  0.02064169395478934 lr =  0.0001 hz =  50 dr =  0.0
MAE =  4.663125658035279 r2 =  0.17682918530626685 SE =  0.009080748131348133 lr =  0.01 hz =  60 dr =  0.0
Best so far 4.663125658035279

MAE =  4.668560409545899 r2 =  0.22013300454559648 SE =  0.01896010859174938 lr =  0.001 hz =  60 dr =  0.0
MAE =  5.580199337005615 r2 =  0.06676590532127159 SE =  0.02208979367918332 lr =  0.0001 hz =  60 dr =  0.0
MAE =  4.666135978698731 r2 =  0.1951466136447661 SE =  0.013832278731686982 lr =  0.01 hz =  70 dr =  0.0
MAE =  4.631529140472412 r2 =  0.22566240488170078 SE =  0.012960241321310711 lr =  0.001 hz =  70 dr =  0.0
Best so far 4.631529140472412

MAE =  5.526603794