In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
%load_ext autoreload
%autoreload 2

In [39]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import h5py
import torch
import torch.nn as nn

from collections import OrderedDict
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable

sys.path.insert(0, '../train/')
from models import TimeSeriesFCN

## Define some functions that we will be using

In [21]:
def load_data_as_tensor_datasets(file_path, shuffle_data=False, random_seed=42):

    # Set the seed for the random number generator
    np.random.seed(random_seed)

    # Read in the spectrograms from the HDF file
    with h5py.File(file_path, 'r') as file:

        x = np.array(file['x'])[:10]
        y = np.array(file['y_true'])[:10]
    
    # Convert to torch Tensors
    x = torch.from_numpy(x).float()
    y = torch.from_numpy(y).float()

    # Create TensorDatasets for training, test and validation
    tensor_dataset = TensorDataset(x, y)

    return tensor_dataset



def apply_model(model, data_loader, as_numpy=False):

    # Initialize an empty array for our predictions
    y_pred = []

    # Loop over the test set (in mini-batches) to get the predictions
    for mb_idx, mb_data in enumerate(data_loader):

        print(mb_idx)
        
        # Get the inputs and wrap them in a PyTorch variable
        inputs, labels = mb_data
        inputs = Variable(inputs, volatile=True)
        labels = Variable(labels, volatile=True)

        # If CUDA is available, run everything on the GPU
        if torch.cuda.is_available():
            inputs, labels = inputs.cuda(), labels.cuda()

        # Make predictions for the given mini-batch
        outputs = model.forward(inputs)
        outputs = outputs.view((outputs.size()[0], outputs.size()[-1]))

        # Stack that onto the previous predictions
        y_pred.append(outputs)

    # Concatenate the list of Variables to one Variable (this is faster than
    # concatenating all intermediate results) and make sure results are float
    y_pred = torch.cat(y_pred, dim=0).float()

    # If necessary, convert model outputs to numpy array
    if as_numpy:
        y_pred = y_pred.data.cpu().numpy()

    return y_pred

## Load data into DataLoaders

In [47]:
file_path_0100_0300 = '../data/predictions/timeseries/baseline/predictions_GW170104_0100_0300_8k.h5'
file_path_0250_0500 = '../data/predictions/timeseries/baseline/predictions_GW170104_0250_0500_8k.h5'
file_path_0400_0800 = '../data/predictions/timeseries/baseline/predictions_GW170104_0400_0800_8k.h5'
file_path_0700_1200 = '../data/predictions/timeseries/baseline/predictions_GW170104_0700_1200_8k.h5'

datatensor_0100_0300 = load_data_as_tensor_datasets(file_path_0100_0300)
datatensor_0250_0500 = load_data_as_tensor_datasets(file_path_0250_0500)
datatensor_0400_0800 = load_data_as_tensor_datasets(file_path_0400_0800)
datatensor_0700_1200 = load_data_as_tensor_datasets(file_path_0700_1200)

dataloader_0100_0300 = DataLoader(datatensor_0100_0300, batch_size=32)
dataloader_0250_0500 = DataLoader(datatensor_0250_0500, batch_size=32)
dataloader_0400_0800 = DataLoader(datatensor_0400_0800, batch_size=32)
dataloader_0700_1200 = DataLoader(datatensor_0700_1200, batch_size=32)

true_labels_0100_0300 = Variable(datatensor_0100_0300.target_tensor, volatile=True)
true_labels_0250_0500 = Variable(datatensor_0250_0500.target_tensor, volatile=True)
true_labels_0400_0800 = Variable(datatensor_0400_0800.target_tensor, volatile=True)
true_labels_0700_1200 = Variable(datatensor_0700_1200.target_tensor, volatile=True)

## Load the model and set up the net

In [35]:
model = TimeSeriesFCN()

# Load the model weights: A little cumbersome, because we don't have CUDA
# and GPU parallelization like during training time
weights_file = '../train/weights/timeseries_weights_GW170104_0100_1200_16k.net'
state_dict = torch.load(weights_file, map_location=lambda storage, loc: storage)
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] # remove `module.`
    new_state_dict[name] = v

model.load_state_dict(new_state_dict)

## Run the model on the test data

In [36]:
predictions_0100_0300 = apply_model(model, dataloader_0100_0300)
predictions_0250_0500 = apply_model(model, dataloader_0250_0500)
predictions_0400_0800 = apply_model(model, dataloader_0400_0800)
predictions_0700_1200 = apply_model(model, dataloader_0700_1200)

0
0
0
0


## Calculate Metrics

In [55]:
def get_weights(labels, threshold):
    weights = torch.eq(torch.gt(labels, 0) * torch.lt(labels, threshold), 0)
    return weights.float()

def loss_function(y_pred, y_true, weights):

    # Set up the Binary Cross-Entropy term of the loss
    bce_loss = nn.BCELoss(weight=weights)
    if torch.cuda.is_available():
        bce_loss = bce_loss.cuda()

    return bce_loss(y_pred, y_true)

def accuracy(y_true, y_pred):

    # Make sure y_pred is rounded to 0/1
    y_pred = torch.round(y_pred)

    result = torch.mean(torch.abs(y_true - y_pred), dim=1)
    result = torch.mean(result, dim=0)

    return 1 - float(result.data.cpu().numpy())

In [57]:
weights = get_weights(true_labels_0100_0300, 1.4141823e-22)
loss_0100_0300 = loss_function(y_pred=predictions_0100_0300,
                               y_true=torch.ceil(true_labels_0100_0300),
                               weights=weights)
loss_0100_0300 = float(loss_0100_0300.data.cpu().numpy())
accuracy_0100_0300 = accuracy(y_pred=predictions_0100_0300 * weights,
                              y_true=torch.ceil(true_labels_0100_0300 * weights))

In [58]:
print(loss_0100_0300)
print(accuracy_0100_0300)

0.15090875327587128
0.9489949531853199
