<a href="https://colab.research.google.com/github/BrunoGomesCoelho/mosquito-networking/blob/master/notebooks/0.11-BrunoGomesCoelho_Colab1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
COLAB_IDX = 1

In [0]:
TESTING = False
COLAB = True

if COLAB:
    BASE_DIR = "/content/drive/My Drive/IC/mosquito-networking/"
else:
    BASE_DIR = "../"

In [0]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import sys
sys.path.append("/content/drive/My Drive/IC/mosquito-networking/")

In [0]:
!python3 -m pip install -qr "/content/drive/My Drive/IC/mosquito-networking/drive_requirements.txt"



- - - 

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

# Trying out a full pytorch experiment, with tensorboard, // processing, etc

In [0]:
# OPTIONAL: Load the "autoreload" extension so that code can change
#%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
#%autoreload 2

import numpy as np
import pandas as pd
from src.data import make_dataset
from src.data import read_dataset
from src.data import util
from src.data.colab_dataset import MosquitoDatasetColab
import joblib

from torchsummary import summary

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Experiment params

In [0]:
# Parameters
params = {'batch_size': 32,
          'shuffle': True,
          'num_workers': 0}
max_epochs = 1

if TESTING:
    params["num_workers"] = 0

version = !python3 --version
version = version[0].split(".")[1]

if int(version) < 7 and params["num_workers"]:
    print("WARNING\n"*10)
    print("Parallel execution only works for python3.7 or above!")
    print("Running in parallel with other versions is not guaranted to work")
    print("See https://discuss.pytorch.org/t/valueerror-signal-number-32-out-of-range-when-loading-data-with-num-worker-0/39615/2")

In [0]:
## Load gpu or cpu
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")

Using device cuda:0


# load data

In [0]:
# Load scaler
#scaler = joblib.load("../data/interim/scaler.pkl")
scaler = joblib.load(BASE_DIR + "data/interim/scaler.pkl")

In [0]:
data = np.load(BASE_DIR + "data/interim/all_wavs.npy", allow_pickle=True)
data = data[data[:, -1].argsort()]

In [0]:
df = pd.read_csv(BASE_DIR + "data/interim/file_names.csv")
df.sort_values("original_name", inplace=True)

In [0]:
errors = (df["original_name"].values != data[:, -1]).sum()
if errors:
    print(f"We have {errors} errors!")
    raise ValueError("Error in WAV/CSV")

In [0]:
x = data[:, 0]
y = df["label"]

train_idx = df["training"] == 1

In [0]:
# Generators
training_set = MosquitoDatasetColab(x[train_idx], y[train_idx].values,
                                    device=device, scaler=scaler)
training_generator = torch.utils.data.DataLoader(training_set, **params, 
                                                 pin_memory=True)

test_set = MosquitoDatasetColab(x[~train_idx], y[~train_idx].values,
                                device=device, scaler=scaler)

                                   
test_generator = torch.utils.data.DataLoader(test_set, **params, 
                                             pin_memory=True)

#sc Generate some example data
temp_generator = torch.utils.data.DataLoader(training_set, **params)

for (local_batch, local_labels) in temp_generator:
    example_x = local_batch
    example_y = local_labels
    break


# Load model

In [0]:
from src.models.BasicMosquitoNet import BasicMosquitoNet

In [0]:
# create your optimizer
net = BasicMosquitoNet()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

if device.type == "cuda":
    net.cuda()

In [0]:
summary(net, input_size=example_x.shape[1:])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 100, 7309]          12,900
            Conv1d-2            [-1, 100, 3527]       1,280,100
            Linear-3                    [-1, 1]         176,301
Total params: 1,469,301
Trainable params: 1,469,301
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.08
Forward/backward pass size (MB): 8.27
Params size (MB): 5.60
Estimated Total Size (MB): 13.96
----------------------------------------------------------------


# Start tensorboard

Run in our terminal: 

`cd notebooks`

`tensorboard --logdir runs`

In [0]:
from torch.utils.tensorboard import SummaryWriter
save_path = BASE_DIR + f"runs/colab/{COLAB_IDX}/"
    
# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter(save_path)

# train function

In [0]:
# Simple train function

def train(net, optimizer, max_epochs, testing=False, testing_idx=0, 
          save_idx=1, save_path=""):
    # Loop over epochs
    last_test_loss = 0
    for epoch in range(max_epochs):
        # Training
        cumulative_train_loss = 0
        for idx, (local_batch, local_labels) in enumerate(training_generator):
            local_batch, local_labels = util.convert_cuda(local_batch, 
                                                         local_labels,
                                                         device)

            optimizer.zero_grad()   # zero the gradient buffers
            output = net(local_batch)
            loss = criterion(output, local_labels)
            cumulative_train_loss += loss.data.item()
            loss.backward()
            optimizer.step()    # Does the update
            
            if testing and idx == testing_idx:
                    break
        
        cumulative_train_loss /= (idx+1)
        writer.add_scalar("Train Loss", cumulative_train_loss, idx)

        # Validation
        with torch.set_grad_enabled(False):
            cumulative_test_loss = 0
            
            for idx, (local_batch, local_labels) in enumerate(training_generator):
                local_batch, local_labels = util.convert_cuda(local_batch, 
                                                 local_labels,
                                                 device)
                output = net(local_batch)
                loss = criterion(output, local_labels)
                cumulative_test_loss += loss.data.item()
                if testing:
                    break
                
            cumulative_test_loss /= (idx+1)
            writer.add_scalar("Test Loss", cumulative_test_loss, idx)

        torch.save(net.state_dict(), save_path + f"model_epoch_{epoch}.pt")
            
    writer.close()
    return cumulative_test_loss

In [0]:
%%time
train(net, optimizer, 100, testing=TESTING, save_path=save_path)

CPU times: user 4h 3min 53s, sys: 2h 48min 43s, total: 6h 52min 37s
Wall time: 6h 54min 29s


0.6932619217293788