# Preparación del entorno
El primer paso es importar las librerías necesarias. 

* En el caso de que la librería no esté disponible en el entorno virtual, podemos instalarla agregando la línea:

```bash
!pip3 install hiddenlayer
```

* Cualquier línea que comience con un signo de exclamación será ejecutada por el sistema.

In [2]:
import os
import torch as tr
import torch.nn as nn
import numpy as np
import pandas as pd
import zipfile as zf

from sklearn.metrics import balanced_accuracy_score
from torch.utils.data import Dataset, DataLoader, random_split
from scipy.io import wavfile as wv


# Lectura de los archivos wav
Creamos una clase que nos permitirá manejar el dataset de forma más cómoda en las próximas etapas.

In [4]:
class WaveDataset(Dataset):
    def __init__(self, data_folder, annotation_file):
        self.data_folder = data_folder
        ds = pd.read_csv(annotation_file)
        self.filenames = list(ds['filename'])
        if 'label' in ds.columns:
            self.labels = list(ds['label'])
        else:
            self.labels = [-1 for i in range(len(self.filenames))]
        self.cache = {}
        
    def __len__(self):
        return(len(self.labels))

    def __getitem__(self, index):
        if index in self.cache:
            data, label = self.cache[index]
        else:
            fname = os.path.join(self.data_folder, "%04d.wav" % self.filenames[index])
            _, data = wv.read(fname)
            label = self.labels[index]
            self.cache[index] = (data, label)
        return tr.Tensor(data), tr.LongTensor([label])
    
dataset = WaveDataset("data/raw/", "data/raw/train_labels.csv")

# Definición el modelo neuronal
Definimos una red convolucional 1D con Pytorch.

In [5]:
class CowCNN(nn.Module):
    def __init__(self):
        super(CowCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.BatchNorm1d(1),
            nn.Conv1d(1, 4, kernel_size=3),
            nn.ReLU(),
            nn.BatchNorm1d(4),
            nn.AvgPool1d(2),
            nn.Conv1d(4, 8, kernel_size=3),
            nn.ReLU(),
            nn.BatchNorm1d(8),
            nn.AvgPool1d(2),
            nn.Conv1d(8, 16, kernel_size=3),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            nn.AvgPool1d(2),
            nn.Conv1d(16, 32, kernel_size=3),
            nn.ReLU(),
            nn.AdaptiveMaxPool1d(1))
        
        self.linear_layers = nn.Sequential(
            nn.BatchNorm1d(32),
            nn.Dropout(0.2),
            nn.Linear(32, 3))
                
    def forward(self, x):
        x = x.view(-1, 1, 17640)
        x = self.conv_layers(x)
        x = x.view(-1, 32)
        x = self.linear_layers(x)
        return x


# Entrenamiento del modelo
El siguiente paso es entrenar el modelo.

* Para utilizar GPU es importante activar el uso de esta en (*Edit -> Notebook Settings -> Hardware Accelerator*)
* La primera época de entrenamiento puede tardar bastante dado que se deben cargar todos los datos en memoria. Las siguientes épocas deberían tardar un par de segundos.

In [8]:
train_samples = int(0.9 * len(dataset))
valid_samples = len(dataset) - train_samples
train, valid = random_split(dataset, [train_samples, valid_samples])
train_loader = DataLoader(train, batch_size=32, shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid, batch_size=32, shuffle=False, pin_memory=True)

model = CowCNN().cuda()
lossfunc = tr.nn.CrossEntropyLoss()
optimizer = tr.optim.Adam(model.parameters())

best_valid_acc = 0
epochs_without_improvement = 0
while epochs_without_improvement < 20:

    train_loss = 0
    model.train()
    for seq, lbl in train_loader:
        seq, lbl = seq.cuda(), lbl.cuda()
        optimizer.zero_grad()
        loss = lossfunc(model(seq), lbl.squeeze())
        loss.backward()
        optimizer.step()
        train_loss += loss.item() / len(train_loader)
        
    
    prediction, ground_truth = tr.LongTensor(), tr.LongTensor()
    model.eval()
    for seq, lbl in valid_loader:
        seq, lbl = seq.cuda(), lbl.cuda()
        prediction = tr.cat([prediction, tr.argmax(model(seq), 1).cpu()])
        ground_truth = tr.cat([ground_truth, lbl.squeeze().cpu()])
    valid_acc = balanced_accuracy_score(ground_truth.numpy(),
                                        prediction.detach().numpy())
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        epochs_without_improvement = 0
        tr.save(model.state_dict(), "best_model.pmt")
    else:
        epochs_without_improvement += 1
        
    print("Train loss: %f\t Valid acc: %f" % (train_loss, valid_acc))

Train loss: 0.763771	 Valid acc: 0.481953
Train loss: 0.704779	 Valid acc: 0.600480
Train loss: 0.671493	 Valid acc: 0.602321
Train loss: 0.690069	 Valid acc: 0.607283
Train loss: 0.671901	 Valid acc: 0.633814
Train loss: 0.650565	 Valid acc: 0.572589
Train loss: 0.659615	 Valid acc: 0.638896
Train loss: 0.635902	 Valid acc: 0.605742
Train loss: 0.650699	 Valid acc: 0.596539
Train loss: 0.666217	 Valid acc: 0.642137
Train loss: 0.625074	 Valid acc: 0.626331
Train loss: 0.661522	 Valid acc: 0.613405
Train loss: 0.608383	 Valid acc: 0.635834
Train loss: 0.618037	 Valid acc: 0.604382
Train loss: 0.600333	 Valid acc: 0.619708
Train loss: 0.656815	 Valid acc: 0.623249
Train loss: 0.621264	 Valid acc: 0.678351
Train loss: 0.621647	 Valid acc: 0.690916
Train loss: 0.587013	 Valid acc: 0.627191
Train loss: 0.615848	 Valid acc: 0.651821
Train loss: 0.588750	 Valid acc: 0.646919
Train loss: 0.605194	 Valid acc: 0.642997
Train loss: 0.636377	 Valid acc: 0.661505
Train loss: 0.606175	 Valid acc: 0

# Generar el archivo de predicciones

Primero cargamos los parámetros del mejor modelo y lo ponemos en modo evaluación.

In [9]:
model.load_state_dict(tr.load("best_model.pmt"))
model = model.eval()

In [10]:
prediction, ground_truth = tr.LongTensor(), tr.LongTensor()
model.eval()
for seq, lbl in valid_loader:
    seq, lbl = seq.cuda(), lbl.cuda()
    prediction = tr.cat([prediction, tr.argmax(model(seq), 1).cpu()])
    ground_truth = tr.cat([ground_truth, lbl.squeeze().cpu()])
valid_acc = balanced_accuracy_score(ground_truth.numpy(),
                                    prediction.detach().numpy())

Cargamos los datos de test y realizamos predicciones sobre estos.

In [0]:
dataset = WaveDataset("./data/", "test_files.csv")
test_loader = DataLoader(dataset, batch_size=32, shuffle=False, pin_memory=True)

pred = []
for seq, _ in test_loader:
        seq = seq.cuda()
        batch_pred = tr.argmax(model(seq), dim=1)
        pred.append(batch_pred.detach().cpu())
pred = tr.cat(pred).numpy()

Escribimos las predicciones junto a los ids en un archivo csv.

In [0]:
ds = pd.DataFrame({'file' : dataset.filenames, 'prediction' : pred})
ds.to_csv("predicciones.csv", index=False, header=None)

La siguiente celda debería descargar el archivo de predicciones. En caso de que falle volver a ejecutar el comando o descargarlo "a mano" en la solapa de archivos a la izquierda.

In [0]:
files.download('predicciones.csv')