In [22]:
import torch; import torch.nn as nn; from torch.optim import lr_scheduler
import torch.optim as optim; from torch.utils.data.dataset import random_split as split 
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from torchaudio import transforms

from tqdm import trange
import time
from tempfile import TemporaryDirectory
import matplotlib.pyplot as plt 
import numpy as np
import librosa 
from sklearn.model_selection import train_test_split
import pandas as pd
import math
import os

MIN_SAMPLES = True
PATH_ANNOTATIONS = 'Wingbeats/min_annotations.txt' if MIN_SAMPLES else 'Wingbeats/annotations.txt'
n_classes = 6
lr = 1e-4
n_epochs = 50
batch_size = 64
SAMPLE_RATE = 8_000
nyquist_frequency = SAMPLE_RATE / 2
classes = ['An. gambiae', 'C. pipiens', 'C. quinquefasciatus', 'Ae. albopictus', 'An. arabiensis', 'Ae. aegypti']
n_fft = 256
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available else 'cpu'

In [23]:
def dividirDatos(archivoAnnotaciones, test_size=0.2, random_state=42):
    df = pd.read_csv(archivoAnnotaciones)
    train_df, test_df = train_test_split(
        df, 
        test_size=test_size,
        random_state=random_state,
        stratify=df['label']
    )
    print('Total = ', len(df))
    print('Total train = ', len(train_df))
    print('Total test = ', len(test_df))
    return train_df, test_df

In [24]:
class AudioDatasetFromTxt(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, index):
        audio_path = self.dataframe.iloc[index]['file_path']
        label = self.dataframe.iloc[index]['label']
        
        waveform, _ = librosa.load(audio_path, sr=SAMPLE_RATE)
        
        waveform = torch.from_numpy(waveform).float().unsqueeze(0)
        
        if self.transform:
            spectrogram = self.transform(waveform)
            return spectrogram, label
        else:
            return waveform, label

In [25]:
spectrogram = transforms.Spectrogram(n_fft=n_fft, hop_length=n_fft//6)

In [26]:
train_df, test_df = dividirDatos(PATH_ANNOTATIONS)

Total =  115782
Total train =  92625
Total test =  23157


In [27]:
train_dataset = AudioDatasetFromTxt(train_df, transform=spectrogram)
test_dataset = AudioDatasetFromTxt(test_df, transform=spectrogram)

subgroups = ['train', 'val', 'test']

n_train = int(len(train_dataset) * 0.9)
n_val = math.ceil(len(train_dataset) * 0.1)
train_dataset, val_dataset = split(train_dataset, [n_train, n_val], 
                                   generator=torch.manual_seed(42))
datasets_Audio = [train_dataset, val_dataset, test_dataset]
audioFolder = {
    x : datasets_Audio[i]
    for i, x in enumerate(subgroups)
}
# Crear dataloaders
dataloaders = { x : torch.utils.data.DataLoader(audioFolder[x], shuffle=True, 
                                                batch_size=batch_size, num_workers=4)
               for x in subgroups}
test_loader = DataLoader(audioFolder['test'], batch_size=batch_size, shuffle=False)
dataset_size = {x : len(audioFolder[x])
                for x in subgroups}
print(dataset_size)
print(n_train, n_val)

{'train': 83362, 'val': 9263, 'test': 23157}
83362 9263


In [28]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    df = pd.DataFrame(
        np.empty([num_epochs, 5]), 
        index=np.arange(num_epochs),
        columns=['loss_train', 'acc_train', 'loss_val', 'acc_val', 'lr']
    )
    since = time.time()

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')
        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0
        rangeEpoch = trange(num_epochs)
        for epoch in rangeEpoch:
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)
            # Each epoch has a training and validation phase 
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()
                
                running_loss = 0.0
                running_correct = 0
                # Iterate over data
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()
                    # forward 
                    # track history only if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)
                        # backward + optimizer only if training phase 
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                        
                    #@ stadistics
                    running_loss =+ loss.item() * inputs.size(0)
                    running_correct += torch.sum(preds == labels.data)

                if phase == 'train':
                    scheduler.step()
                    df.iloc[epoch, 4] = optimizer.param_groups[0]['lr']
                    
                epoch_loss = running_loss / dataset_size[phase]
                epoch_acc = running_correct.double() / dataset_size[phase]
                
                if phase == 'train':
                    df.iloc[epoch, 0], df.iloc[epoch, 1] = epoch_loss, epoch_acc.item()
                else:
                    df.iloc[epoch, 2], df.iloc[epoch, 3] = epoch_loss, epoch_acc.item()
                
                rangeEpoch.set_description("train_loss=%.5f" % df.iloc[epoch, 0])
                rangeEpoch.set_postfix(
                            {'train_acc':df.iloc[epoch,1], 'test_acc':df.iloc[epoch, 3]}
                )
                
                if phase == 'val' and epoch_acc > best_acc:
                    torch.save(model.state_dict(), best_model_params_path)

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60: .0f}m {time_elapsed % 60: .0f}s')
        print(f'Best val Acc {best_acc:4f}')
        model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
    return model, df

In [29]:
class AudioModelResNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.resnet = models.resnet101(weights=models.ResNet101_Weights.DEFAULT)
        original_first_conv = self.resnet.conv1
        self.resnet.conv1 = nn.Conv2d(
            1, 64, kernel_size=7, stride=2, padding=3, bias=False
        )
        with torch.no_grad():
            new_weights = original_first_conv.weight.mean(dim=1, keepdim=True)
            self.resnet.conv1.weight.copy_(new_weights)
        
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

In [30]:
#model_ft = models.resnet18(weights='IMAGENET1K_V1')
model_ft = AudioModelResNet(6)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [31]:
modelo = 'resnet'
version = '101'
PATH_RESNET = f'./resultados/{modelo}{version}AudioBalanceado.pt' if MIN_SAMPLES else f'./resultados/{modelo}{version}AudioNoBalanceado.pt'
PATH_CSV_RESULTADOS = f'./resultados/{modelo}{version}ResultadoBalanceado.csv' if MIN_SAMPLES else f'./resultados/{modelo}{version}ResultadoNoBalanceado.csv'
if not os.path.exists(PATH_RESNET):
    model_ft, df = train_model(model_ft, criterion, optimizer, exp_lr_scheduler, num_epochs=n_epochs)
    df.to_csv(PATH_CSV_RESULTADOS,index=False)
    torch.save(model_ft, PATH_RESNET)

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 0/49
----------


train_loss=0.00035:   2%|▏         | 1/50 [03:53<3:10:57, 233.82s/it, train_acc=0.517, test_acc=0.611]

Epoch 1/49
----------


train_loss=0.00020:   4%|▍         | 2/50 [07:50<3:08:23, 235.48s/it, train_acc=0.753, test_acc=0.597]   

Epoch 2/49
----------


train_loss=0.00024:   6%|▌         | 3/50 [11:50<3:06:07, 237.60s/it, train_acc=0.811, test_acc=0.602]    

Epoch 3/49
----------


train_loss=0.00017:   8%|▊         | 4/50 [15:40<2:59:44, 234.44s/it, train_acc=0.844, test_acc=0.735]    

Epoch 4/49
----------


train_loss=0.00013:  10%|█         | 5/50 [19:13<2:50:08, 226.86s/it, train_acc=0.864, test_acc=0.809]

Epoch 5/49
----------


train_loss=0.00011:  12%|█▏        | 6/50 [22:44<2:42:17, 221.32s/it, train_acc=0.885, test_acc=0.559]    

Epoch 6/49
----------


train_loss=0.00007:  14%|█▍        | 7/50 [26:11<2:35:27, 216.91s/it, train_acc=0.9, test_acc=0.621]    

Epoch 7/49
----------


train_loss=0.00009:  16%|█▌        | 8/50 [29:53<2:32:52, 218.39s/it, train_acc=0.933, test_acc=0.592]

Epoch 8/49
----------


train_loss=0.00006:  18%|█▊        | 9/50 [33:43<2:31:44, 222.06s/it, train_acc=0.942, test_acc=0.589]

Epoch 9/49
----------


train_loss=0.00008:  20%|██        | 10/50 [37:29<2:28:43, 223.10s/it, train_acc=0.947, test_acc=0.557]  

Epoch 10/49
----------


train_loss=0.00010:  22%|██▏       | 11/50 [41:04<2:23:32, 220.83s/it, train_acc=0.951, test_acc=0.58] 

Epoch 11/49
----------


train_loss=0.00005:  24%|██▍       | 12/50 [44:48<2:20:19, 221.56s/it, train_acc=0.954, test_acc=0.552]

Epoch 12/49
----------


train_loss=0.00003:  26%|██▌       | 13/50 [48:28<2:16:30, 221.37s/it, train_acc=0.958, test_acc=0.584]

Epoch 13/49
----------


train_loss=0.00006:  28%|██▊       | 14/50 [52:12<2:13:16, 222.12s/it, train_acc=0.96, test_acc=0.591] 

Epoch 14/49
----------


train_loss=0.00007:  30%|███       | 15/50 [55:52<2:09:12, 221.50s/it, train_acc=0.964, test_acc=0.589]   

Epoch 15/49
----------


train_loss=0.00002:  32%|███▏      | 16/50 [59:35<2:05:41, 221.80s/it, train_acc=0.966, test_acc=0.574]

Epoch 16/49
----------


train_loss=0.00001:  34%|███▍      | 17/50 [1:03:24<2:03:14, 224.08s/it, train_acc=0.965, test_acc=0.576]    

Epoch 17/49
----------


train_loss=0.00006:  36%|███▌      | 18/50 [1:07:13<2:00:14, 225.46s/it, train_acc=0.966, test_acc=0.588]    

Epoch 18/49
----------


train_loss=0.00006:  38%|███▊      | 19/50 [1:11:01<1:56:53, 226.23s/it, train_acc=0.966, test_acc=0.598]

Epoch 19/49
----------


train_loss=0.00006:  40%|████      | 20/50 [1:14:52<1:53:46, 227.56s/it, train_acc=0.967, test_acc=0.584]   

Epoch 20/49
----------


train_loss=0.00003:  42%|████▏     | 21/50 [1:18:40<1:50:08, 227.87s/it, train_acc=0.967, test_acc=0.572]    

Epoch 21/49
----------


train_loss=0.00004:  44%|████▍     | 22/50 [1:22:29<1:46:24, 228.01s/it, train_acc=0.968, test_acc=0.592]

Epoch 22/49
----------


train_loss=0.00005:  46%|████▌     | 23/50 [1:26:11<1:41:53, 226.42s/it, train_acc=0.968, test_acc=0.585]

Epoch 23/49
----------


train_loss=0.00005:  48%|████▊     | 24/50 [1:30:00<1:38:24, 227.09s/it, train_acc=0.967, test_acc=0.594]

Epoch 24/49
----------


train_loss=0.00002:  50%|█████     | 25/50 [1:33:42<1:33:59, 225.60s/it, train_acc=0.968, test_acc=0.592]   

Epoch 25/49
----------


train_loss=0.00007:  52%|█████▏    | 26/50 [1:37:23<1:29:44, 224.34s/it, train_acc=0.968, test_acc=0.597]    

Epoch 26/49
----------


train_loss=0.00007:  54%|█████▍    | 27/50 [1:41:09<1:26:08, 224.71s/it, train_acc=0.967, test_acc=0.58]  

Epoch 27/49
----------


train_loss=0.00003:  56%|█████▌    | 28/50 [1:44:59<1:22:57, 226.24s/it, train_acc=0.967, test_acc=0.59]

Epoch 28/49
----------


train_loss=0.00008:  58%|█████▊    | 29/50 [1:48:46<1:19:18, 226.61s/it, train_acc=0.968, test_acc=0.567]   

Epoch 29/49
----------


train_loss=0.00001:  60%|██████    | 30/50 [1:52:33<1:15:34, 226.70s/it, train_acc=0.969, test_acc=0.567]   

Epoch 30/49
----------


train_loss=0.00002:  62%|██████▏   | 31/50 [1:56:19<1:11:41, 226.38s/it, train_acc=0.968, test_acc=0.58]     

Epoch 31/49
----------


train_loss=0.00005:  64%|██████▍   | 32/50 [2:00:09<1:08:13, 227.44s/it, train_acc=0.968, test_acc=0.591]    

Epoch 32/49
----------


train_loss=0.00006:  66%|██████▌   | 33/50 [2:03:56<1:04:25, 227.40s/it, train_acc=0.968, test_acc=0.604]

Epoch 33/49
----------


train_loss=0.00003:  68%|██████▊   | 34/50 [2:07:47<1:00:56, 228.54s/it, train_acc=0.968, test_acc=0.559]   

Epoch 34/49
----------


train_loss=0.00005:  70%|███████   | 35/50 [2:11:35<57:03, 228.22s/it, train_acc=0.968, test_acc=0.564]  

Epoch 35/49
----------


train_loss=0.00004:  72%|███████▏  | 36/50 [2:15:11<52:25, 224.68s/it, train_acc=0.968, test_acc=0.583] 

Epoch 36/49
----------


train_loss=0.00013:  74%|███████▍  | 37/50 [2:18:50<48:17, 222.88s/it, train_acc=0.968, test_acc=0.589]    

Epoch 37/49
----------


train_loss=0.00004:  76%|███████▌  | 38/50 [2:22:28<44:18, 221.55s/it, train_acc=0.968, test_acc=0.584]    

Epoch 38/49
----------


train_loss=0.00006:  78%|███████▊  | 39/50 [2:26:09<40:33, 221.20s/it, train_acc=0.968, test_acc=0.569]   

Epoch 39/49
----------


train_loss=0.00004:  80%|████████  | 40/50 [2:29:48<36:45, 220.54s/it, train_acc=0.968, test_acc=0.599]

Epoch 40/49
----------


train_loss=0.00002:  82%|████████▏ | 41/50 [2:33:25<32:56, 219.65s/it, train_acc=0.967, test_acc=0.585]    

Epoch 41/49
----------


train_loss=0.00012:  84%|████████▍ | 42/50 [2:37:03<29:12, 219.10s/it, train_acc=0.967, test_acc=0.58] 

Epoch 42/49
----------


train_loss=0.00008:  86%|████████▌ | 43/50 [2:40:41<25:31, 218.73s/it, train_acc=0.968, test_acc=0.573]

Epoch 43/49
----------


train_loss=0.00003:  88%|████████▊ | 44/50 [2:44:36<22:21, 223.57s/it, train_acc=0.968, test_acc=0.574]   

Epoch 44/49
----------


train_loss=0.00003:  90%|█████████ | 45/50 [2:48:34<18:59, 227.95s/it, train_acc=0.968, test_acc=0.574]

Epoch 45/49
----------


train_loss=0.00005:  92%|█████████▏| 46/50 [2:52:32<15:24, 231.10s/it, train_acc=0.968, test_acc=0.594]

Epoch 46/49
----------


train_loss=0.00004:  94%|█████████▍| 47/50 [2:56:30<11:39, 233.20s/it, train_acc=0.969, test_acc=0.581]

Epoch 47/49
----------


train_loss=0.00007:  96%|█████████▌| 48/50 [3:00:30<07:50, 235.05s/it, train_acc=0.968, test_acc=0.54]    

Epoch 48/49
----------


train_loss=0.00003:  98%|█████████▊| 49/50 [3:04:29<03:56, 236.24s/it, train_acc=0.968, test_acc=0.581] 

Epoch 49/49
----------


train_loss=0.00010: 100%|██████████| 50/50 [3:08:27<00:00, 226.15s/it, train_acc=0.967, test_acc=0.593]


Training complete in  188m  28s
Best val Acc 0.000000
