In [None]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import torch.utils.data
from torch import nn, optim
import torch
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
import time
from scipy import stats
import sys

In [None]:
categorical_columns = ['TRAPTYPE', 'ATTRACTANTSUSED', 'SETTIMEOFDAY', 'COLLECTTIMEOFDAY', 'GENUS', 'SPECIES', 'TRAPID','TRAPSITE']
numerical_columns = ['LATITUDE', 'LONGITUDE', 'TRAPSET', 'YEAR','TRAPCOLLECT', 'DIFF_DAYS']

In [None]:
sentinel_frame = pd.read_csv('/content/drive/My Drive/BerkeleyResults/Approach1/original_mosquito_clean_parameters.csv')

sentinel_frame["TRAPSET"]= pd.to_datetime(sentinel_frame["TRAPSET"])
sentinel_frame["TRAPCOLLECT"]= pd.to_datetime(sentinel_frame["TRAPCOLLECT"])
sentinel_frame["DIFF_DAYS"] = (sentinel_frame["TRAPCOLLECT"] - sentinel_frame["TRAPSET"]).dt.days
sentinel_frame["TRAPSET"]= sentinel_frame["TRAPSET"].dt.week
sentinel_frame["TRAPCOLLECT"]= sentinel_frame["TRAPCOLLECT"].dt.week

for category in categorical_columns:
    sentinel_frame[category] = sentinel_frame[category].astype('category')

categorical_column_sizes = [len(sentinel_frame[column].cat.categories) for column in categorical_columns]
categorical_embedding_sizes = [(col_size, min(50, (col_size+1)//2)) for col_size in categorical_column_sizes]

for col in numerical_columns:
    sentinel_frame[col] = (sentinel_frame[col] - sentinel_frame[col].mean()) / (sentinel_frame[col].max() - sentinel_frame[col].min())

def get_categorical_tensor(in_frame, categories):
    categorical_data = np.stack([in_frame[col].cat.codes.values for col in categories], 1)
    categorical_data = torch.tensor(categorical_data, dtype=torch.int64)

    return categorical_data

def get_numerical_tensor(in_frame, numerical_columns):
    numerical_data = np.stack([in_frame[col].values for col in numerical_columns], 1)
    numerical_data = torch.tensor(numerical_data, dtype=torch.float)
    return numerical_data

print(sentinel_frame.columns)

Index(['X', 'Y', 'OBJECTID', 'TRAPTYPE', 'ATTRACTANTSUSED', 'TRAPID',
       'LATITUDE', 'LONGITUDE', 'ADDRESS', 'TOWN', 'STATE', 'COUNTY',
       'TRAPSITE', 'TRAPSET', 'SETTIMEOFDAY', 'TRAPCOLLECT', 'YEAR',
       'COLLECTTIMEOFDAY', 'GENUS', 'SPECIES', 'TOTAL', 'DIFF_DAYS'],
      dtype='object')


In [None]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.size()[0], -1)

class Swish(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_tensors[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
    
class SwishM(nn.Module):
    def forward(self, input_tensor):
        return Swish.apply(input_tensor)
        
def create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout):
    all_layers = []
    num_categorical_cols = sum((nf for ni, nf in embedding_size))
    input_size = num_categorical_cols + num_numerical_cols
    print(num_categorical_cols)
    print(num_numerical_cols)

    all_layers.append(nn.Linear(input_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, hidden_size))
    all_layers.append(nn.LeakyReLU())
    all_layers.append(nn.BatchNorm1d(hidden_size))
    all_layers.append(nn.Dropout(dropout))

    all_layers.append(nn.Linear(hidden_size, output_size))
    all_layers.append(nn.ReLU())

    return nn.Sequential(*all_layers)

class Sentinel_net(nn.Module):

    def __init__(self, embedding_size, num_numerical_cols, output_size, dropout, hidden_size):
        super(Sentinel_net, self).__init__()
        self.layers = create_neural_modules(embedding_size, num_numerical_cols, output_size, hidden_size, dropout)
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(dropout)
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

    def forward(self, x_categorical, x_numerical):

        embeddings = []
        for i, e in enumerate(self.all_embeddings):
            embeddings.append(e(x_categorical[:, i]))
        y = torch.cat(embeddings, 1)
        y = self.embedding_dropout(y)
        x_numerical = self.batch_norm_num(x_numerical)
        y = torch.cat([y, x_numerical], 1)
        y = self.layers(y)

        return y

In [None]:
def train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr):
    model = Sentinel_net(categorical_embedding_sizes, train_n_t.shape[1], 1, dropout, hidden_size)
    model.train().cuda()

    criterion = torch.nn.MSELoss(reduction='mean') 
    params_dict = dict(model.named_parameters())
    params = []
    best = sys.maxsize
    r_value = 0
    std_err = 0
    
    for key, value in params_dict.items():
        if 'weight' in key and 'layers' in key:
            params += [{'params':value, 'weight_decay':4e-3}]
        else:
            params += [{'params':value, 'weight_decay':0.0}]

    optimizer = torch.optim.RMSprop(params, lr=lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0.9, centered=False)
    epoch = 0

    for i in range(1, 1000):
        y_pred = model(train_c_t, train_n_t)
        loss = criterion(y_pred.squeeze(), outputs_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            y_pred2 = model(test_c_t, test_n_t)
            c_out, c_pred = outputs_test.cpu().detach().numpy(), y_pred2.cpu().detach().numpy().squeeze()
            current_score = mean_absolute_error(c_out, c_pred)
            if current_score < best:
                best = current_score
                m, b, r_value, p_value, std_err = stats.linregress(c_out,c_pred)

            model.train()
    
    #print('MAE = ',current_score, 'r2 = ',r_value, 'SE = ',std_err)
    return best, r_value, std_err

In [None]:
start_time = time.time()
learning = [0.01, 0.001, 0.0001] 
best_score_k_fold = sys.maxsize

print('4 Layers Architecture')

for dropout in range(0,7):
    dropout = round(dropout*.1, 2) 
    for hidden_size in range(10, 100, 10):
        for lr in learning: 

            scores, r_values, std_errors = [], [], []
            cv = KFold(n_splits=5, random_state=42, shuffle=True)
            foldc = 1
            for train_index, test_index in cv.split(sentinel_frame, None):
                #print('Fold ', foldc, end =" ") 
                train, test  = sentinel_frame.iloc[train_index], sentinel_frame.iloc[test_index]

                train_c_t = get_categorical_tensor(train, categorical_columns).cuda()
                test_c_t = get_categorical_tensor(test, categorical_columns).cuda()
                train_n_t = get_numerical_tensor(train, numerical_columns).cuda()
                test_n_t = get_numerical_tensor(test, numerical_columns).cuda()

                outputs_train = torch.tensor(train['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()
                outputs_test = torch.tensor(test['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()

                best_fold_score, best_r_value, best_std_error = train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr)
                scores.append(best_fold_score)
                r_values.append(best_r_value)
                std_errors.append(best_std_error)
                foldc += 1
            #print(scores)
            current_score = sum(scores) / len(scores)
            print('MAE = ', current_score ,'r2 = ', sum(r_values) / len(r_values) ,'SE = ', sum(std_errors) / len(std_errors),'lr = ', lr,'hz = ', hidden_size, 'dr = ', dropout )
            if current_score < best_score_k_fold:
                best_score_k_fold = current_score
                print('Best so far', best_score_k_fold)
                print('')


print("--- %s seconds ---" % (time.time() - start_time))

4 Layers Architecture
MAE =  10.090110206604004 r2 =  0.5355006473834596 SE =  0.01895487531484183 lr =  0.01 hz =  10 dr =  0.0
Best so far 10.090110206604004

MAE =  10.258098697662353 r2 =  0.563056071966874 SE =  0.02211415881408705 lr =  0.001 hz =  10 dr =  0.0
MAE =  10.63067569732666 r2 =  0.4724715813853452 SE =  0.023361536381824026 lr =  0.0001 hz =  10 dr =  0.0
MAE =  9.897272777557372 r2 =  0.5178666779094655 SE =  0.01900470836034101 lr =  0.01 hz =  20 dr =  0.0
Best so far 9.897272777557372

MAE =  10.396403694152832 r2 =  0.4928121059469154 SE =  0.026408359486912038 lr =  0.001 hz =  20 dr =  0.0
MAE =  10.925870323181153 r2 =  0.49370039750092387 SE =  0.027779412905069568 lr =  0.0001 hz =  20 dr =  0.0
MAE =  9.751920318603515 r2 =  0.5891073957216352 SE =  0.027389015093601053 lr =  0.01 hz =  30 dr =  0.0
Best so far 9.751920318603515

MAE =  10.283277702331542 r2 =  0.5410975514459084 SE =  0.025967564983611535 lr =  0.001 hz =  30 dr =  0.0
MAE =  11.068619537

In [None]:
start_time = time.time()
learning = [0.01, 0.001, 0.0001] 
best_score_k_fold = sys.maxsize

print('3 Layers Architecture')

for dropout in range(0,7):
    dropout = round(dropout*.1, 2) 
    for hidden_size in range(10, 100, 10):
        for lr in learning: 

            scores, r_values, std_errors = [], [], []
            cv = KFold(n_splits=5, random_state=42, shuffle=True)
            foldc = 1
            for train_index, test_index in cv.split(sentinel_frame, None):
                #print('Fold ', foldc, end =" ") 
                train, test  = sentinel_frame.iloc[train_index], sentinel_frame.iloc[test_index]

                train_c_t = get_categorical_tensor(train, categorical_columns).cuda()
                test_c_t = get_categorical_tensor(test, categorical_columns).cuda()
                train_n_t = get_numerical_tensor(train, numerical_columns).cuda()
                test_n_t = get_numerical_tensor(test, numerical_columns).cuda()

                outputs_train = torch.tensor(train['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()
                outputs_test = torch.tensor(test['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()

                best_fold_score, best_r_value, best_std_error = train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr)
                scores.append(best_fold_score)
                r_values.append(best_r_value)
                std_errors.append(best_std_error)
                foldc += 1
            #print(scores)
            current_score = sum(scores) / len(scores)
            print('MAE = ', current_score ,'r2 = ', sum(r_values) / len(r_values) ,'SE = ', sum(std_errors) / len(std_errors),'lr = ', lr,'hz = ', hidden_size, 'dr = ', dropout )
            if current_score < best_score_k_fold:
                best_score_k_fold = current_score
                print('Best so far', best_score_k_fold)
                print('')


print("--- %s seconds ---" % (time.time() - start_time))

3 Layers Architecture
MAE =  10.322676658630371 r2 =  0.5092701764971895 SE =  0.028076377387916778 lr =  0.01 hz =  10 dr =  0.0
Best so far 10.322676658630371

MAE =  10.387911224365235 r2 =  0.5255053358765919 SE =  0.021793538536944344 lr =  0.001 hz =  10 dr =  0.0
MAE =  11.042734527587891 r2 =  0.45942951510301244 SE =  0.0203738847880801 lr =  0.0001 hz =  10 dr =  0.0
MAE =  10.72754135131836 r2 =  0.5747949399956335 SE =  0.03468675843528591 lr =  0.01 hz =  20 dr =  0.0
MAE =  10.484874725341797 r2 =  0.5299155562274198 SE =  0.022869531718739015 lr =  0.001 hz =  20 dr =  0.0
MAE =  10.951271247863769 r2 =  0.5085522055476033 SE =  0.026668962388208556 lr =  0.0001 hz =  20 dr =  0.0
MAE =  10.264885711669923 r2 =  0.5432391884447447 SE =  0.029815834832183218 lr =  0.01 hz =  30 dr =  0.0
Best so far 10.264885711669923

MAE =  10.371103477478027 r2 =  0.5163973572409184 SE =  0.026766716686468545 lr =  0.001 hz =  30 dr =  0.0
MAE =  11.292121315002442 r2 =  0.534485772507

In [None]:
start_time = time.time()
learning = [0.01, 0.001, 0.0001] 
best_score_k_fold = sys.maxsize

print('2 Layers Architecture')

for dropout in range(0,7):
    dropout = round(dropout*.1, 2) 
    for hidden_size in range(10, 100, 10):
        for lr in learning: 

            scores, r_values, std_errors = [], [], []
            cv = KFold(n_splits=5, random_state=42, shuffle=True)
            foldc = 1
            for train_index, test_index in cv.split(sentinel_frame, None):
                #print('Fold ', foldc, end =" ") 
                train, test  = sentinel_frame.iloc[train_index], sentinel_frame.iloc[test_index]

                train_c_t = get_categorical_tensor(train, categorical_columns).cuda()
                test_c_t = get_categorical_tensor(test, categorical_columns).cuda()
                train_n_t = get_numerical_tensor(train, numerical_columns).cuda()
                test_n_t = get_numerical_tensor(test, numerical_columns).cuda()

                outputs_train = torch.tensor(train['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()
                outputs_test = torch.tensor(test['TOTAL'].values).flatten().type(torch.FloatTensor).cuda()

                best_fold_score, best_r_value, best_std_error = train_shallow_net(train_c_t,test_c_t,train_n_t,test_n_t,outputs_train,outputs_test, dropout, hidden_size, lr)
                scores.append(best_fold_score)
                r_values.append(best_r_value)
                std_errors.append(best_std_error)
                foldc += 1
            #print(scores)
            current_score = sum(scores) / len(scores)
            print('MAE = ', current_score ,'r2 = ', sum(r_values) / len(r_values) ,'SE = ', sum(std_errors) / len(std_errors),'lr = ', lr,'hz = ', hidden_size, 'dr = ', dropout )
            if current_score < best_score_k_fold:
                best_score_k_fold = current_score
                print('Best so far', best_score_k_fold)
                print('')


print("--- %s seconds ---" % (time.time() - start_time))

2 Layers Architecture
MAE =  10.63724308013916 r2 =  0.5133541121677232 SE =  0.025910139985497904 lr =  0.01 hz =  10 dr =  0.0
Best so far 10.63724308013916

MAE =  10.823487091064454 r2 =  0.4971609963838426 SE =  0.02183756187619381 lr =  0.001 hz =  10 dr =  0.0
MAE =  10.687663459777832 r2 =  0.484497416810081 SE =  0.020811302714191036 lr =  0.0001 hz =  10 dr =  0.0
MAE =  10.65988712310791 r2 =  0.5787449947384234 SE =  0.028304803130046567 lr =  0.01 hz =  20 dr =  0.0
MAE =  10.780644416809082 r2 =  0.5560185780566743 SE =  0.028313090847573087 lr =  0.001 hz =  20 dr =  0.0
MAE =  10.63439655303955 r2 =  0.538799595365371 SE =  0.022196101517119374 lr =  0.0001 hz =  20 dr =  0.0
Best so far 10.63439655303955

MAE =  10.728522109985352 r2 =  0.5103326154165904 SE =  0.027908196735550738 lr =  0.01 hz =  30 dr =  0.0
MAE =  10.803426361083984 r2 =  0.4975163295819775 SE =  0.02449744767018454 lr =  0.001 hz =  30 dr =  0.0
MAE =  11.125982666015625 r2 =  0.49383307304248686 