In [1]:
#from google.colab import drive
#drive.mount('/content/drive'

In [2]:
#import sys
#sys.path.append("/content/drive/My Drive/IC/mosquito-networking/")



- - - 

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

# Trying out a full pytorch experiment, with tensorboard, // processing, etc

In [3]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

import numpy as np
import pandas as pd
from src.data import make_dataset
from src.data import read_dataset
from src.data import util
from src.data.colab_dataset import MosquitoDatasetColab

from torchsummary import summary

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Experiment params

In [4]:
# Parameters
params = {'batch_size': 32,
          'shuffle': True,
          'num_workers': 0}
max_epochs = 1

version = !python3 --version
version = version[0].split(".")[1]

if int(version) < 7 and params["num_workers"]:
    print("WARNING\n"*10)
    print("Parallel execution only works for python3.7 or above!")
    print("Running in parallel with other versions is not guaranted to work")
    print("See https://discuss.pytorch.org/t/valueerror-signal-number-32-out-of-range-when-loading-data-with-num-worker-0/39615/2")

In [5]:
## Load gpu or cpu
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")

Using device cpu


# load data

In [6]:
#df = pd.read_csv("data/interim/file_names.csv")
df = pd.read_csv("../data/interim/file_names.csv")

x = df["original_name"]
y = df["label"]

train_idx = df["training"] == 1

In [7]:
# Generators
training_set = MosquitoDatasetColab(x[train_idx].values, y[train_idx].values,
                                   device=device)
training_generator = torch.utils.data.DataLoader(training_set, **params)

test_set = MosquitoDatasetColab(x[~train_idx].values, y[~train_idx].values,
                                   device=device)
test_generator = torch.utils.data.DataLoader(test_set, **params)

# Generate some example data
temp_generator = torch.utils.data.DataLoader(training_set, **params)

for (local_batch, local_labels) in temp_generator:
    example_x = local_batch
    example_y = local_labels
    break


In [8]:
example_y.shape

torch.Size([32])

## Basic 1D convolutional network

[1D conv in Pytorch](https://pytorch.org/docs/stable/nn.html#torch.nn.Conv1d) 

In the simplest case, the output value of the layer with input size

$$ (N, C_{\text{in}}, L) $$ and output $$ (N, C_{\text{out}}, L_{\text{out}}) $$ can be

$$ (N, C_{\text{in}}, L) $$ and output $$ (N, C_{\text{out}}, L_{\text{out}}) $$ can be
    precisely described as:

$$
        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
        \sum_{k = 0}^{C_{in} - 1} \text{weight}(C_{\text{out}_j}, k)
        \star \text{input}(N_i, k)
$$ 

where $$ \star $$  is the valid "cross-correlation"  operator,
    N is a batch size, C denotes a number of channels,
    L is a length of signal sequence.

# Create Model

In [9]:
class BasicMosquitoNet(nn.Module):
    """A basic 1D conv net.
    We use 1D convolution, followed by max pool, 1D convolution, max pool, FC, FC.
    """
    
    def __init__(self, conv1_out=100, kernel_1=6, stride_1=3, 
                 conv2_out=10, kernel_2=4, stride_2=2):
        """
        conv1: (22050 - 6)/3  + 1 = 7349
        max_pool_1 = floor((Lin + −dilation×(kernel_size−1)−1)/stride_2) + 1
                   = floor(7349-2 /2) + 1 = 3673 + 1 = 3674
        conv2 = (3674 - 4)/2 + 1 = 1836
        max_pool_2 = floor(1836-2 /2) + 1 = 918
        
        """
        super(BasicMosquitoNet, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=conv1_out, 
                               kernel_size=kernel_1, stride=stride_1)
        self.conv2 = nn.Conv1d(in_channels=conv1_out, out_channels=conv2_out, 
                               kernel_size=kernel_2, stride=stride_2)
        self.fc1 = nn.Linear(918*conv2_out, 1)  
        #self.fc1 = nn.Linear(918*conv2_out, 120)
        #self.fc2 = nn.Linear(120, 84)
        #self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. 
        """
        # Max pooling over a (2, 2) window
        x = F.max_pool1d(F.relu(self.conv1(x)), 2)
        # If the size is a square you can only specify a single number
        x = F.max_pool1d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = self.fc1(x)
        
        """
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        """
        # We use BCEWithLogitsLoss instead of applying sigmoid here
        # It is better computationally
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [10]:
# create your optimizer
net = BasicMosquitoNet()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

In [11]:
summary(net, input_size=example_x.shape[1:])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 100, 7349]             700
            Conv1d-2             [-1, 10, 1836]           4,010
            Linear-3                    [-1, 1]           9,181
Total params: 13,891
Trainable params: 13,891
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.08
Forward/backward pass size (MB): 5.75
Params size (MB): 0.05
Estimated Total Size (MB): 5.88
----------------------------------------------------------------


# Start tensorboard

Run in our terminal: 

`cd notebooks`

`tensorboard --logdir runs`

In [12]:
from torch.utils.tensorboard import SummaryWriter
    
# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# train function

In [14]:
# Simple train function

def train(net, optimizer, max_epochs, testing=False):
    # Loop over epochs
    last_test_loss = 0
    for epoch in range(max_epochs):
        # Training
        for idx, (local_batch, local_labels) in enumerate(training_generator):
            local_batch, local_labels = util.convert_cuda(local_batch, 
                                                         local_labels,
                                                         device)

            optimizer.zero_grad()   # zero the gradient buffers
            output = net(local_batch)
            loss = criterion(output, local_labels)
            loss.backward()
            optimizer.step()    # Does the update
            
            if testing:
                break
            else:
                writer.add_scalar("Train Loss Batch", loss.data.item(), idx)

        # Validation
        with torch.set_grad_enabled(False):
            cumulative_test_loss = 0
            
            for idx, (local_batch, local_labels) in enumerate(training_generator):
                local_batch, local_labels = util.convert_cuda(local_batch, 
                                                 local_labels,
                                                 device)
                output = net(local_batch)
                loss = criterion(output, local_labels)
                if testing:
                    break
                else:
                    writer.add_scalar("Test Loss Batch", loss.data.item(), idx)
                
                cumulative_test_loss += loss.data.item()
            cumulative_test_loss /= (idx+1)
            last_test_loss = cumulative_test_loss
            
            if not testing:
                writer.add_scalar("Test Loss Epoch", loss.data.item(), idx)
    
    return last_test_loss
    writer.close()

In [17]:
%%time
train(net, optimizer, 1, testing=False)

CPU times: user 16min 25s, sys: 3min 11s, total: 19min 37s
Wall time: 5min 4s


0.6931620290690991

In [16]:
# Save the model
#torch.save(net.state_dict(), "../models/0.6-BrunoGomesCoelho-test-experiment.pt")