Model

In [1]:
import numpy as np
import math
import pandas as pd
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython
from IPython.display import Audio
from IPython.display import Image
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedGroupKFold
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
import seaborn as sn
import sklearn
from sklearn.manifold import TSNE
from torch.autograd import Function
from imblearn.metrics import sensitivity_specificity_support
from math import floor

In [8]:
class CNN_model(nn.Module):
    
    def __init__(self, img_dim, kernel_size_1=6, kernel_size_2=9, depth_CL=64, neurons_MLP=32, drop_out=0.2):
        
        super().__init__()

        # img_dim = (65, 41)
        height, width = img_dim

        padding=1
        dilation=1
        stride=1

        # Calculo de las dimensiones de la salida de la primera capa convolucional
            # En el codigo del paper pone: math.ceil((math.ceil((65-kernel_size_1)/2+1)-kernel_size_2)/2+1)
            # En el codigo del paper pone: math.ceil((math.ceil((41-kernel_size_1)/2+1)-kernel_size_2)/2+1) 
            # En conv2d pytorch documentation pone: out_height = floor((height + 2*padding - dilation*(kernel_size_1-1) - 1) / stride + 1)
            # En conv2d pytorch documentation pone: out_width = floor((width + 2*padding - dilation*(kernel_size_1-1) - 1) / stride + 1)
        Hout1 = floor((height + 2*padding - dilation*(kernel_size_1-1) - 1) / stride + 1)
        Wout1 = floor((width + 2*padding - dilation*(kernel_size_1-1) - 1) / stride + 1)

        # Pooling
        Hout1p = floor(Hout1/2)
        Wout1p = floor(Wout1/2)

        # Calculo de las dimensiones de la salida de la segunda capa convolucional

        Hout2 = floor((Hout1p + 2*padding - dilation*(kernel_size_2-1) - 1) / stride + 1)
        Wout2 = floor((Wout1p + 2*padding - dilation*(kernel_size_2-1) - 1) / stride + 1)

        # Pooling
        Hout2p = floor(Hout2/2)
        Wout2p = floor(Wout2/2)
        
        input_size = depth_CL*Hout2p*Wout2p

        # 1. Convolutional Layer 1
        self.cl1=nn.Sequential(
            # bias = False porque se aplica BatchNorm2d inmmediatamente despues de la convolucion
            nn.Conv2d(in_channels=1, out_channels=depth_CL, kernel_size=kernel_size_1, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(depth_CL),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=drop_out),
        )

        # 2. Convolutional Layer 2
        self.cl2=nn.Sequential(
            # bias = False porque se aplica BatchNorm2d inmmediatamente despues de la convolucion
            nn.Conv2d(in_channels=depth_CL, out_channels=depth_CL, kernel_size=kernel_size_2, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(depth_CL),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=drop_out),
        )

        # 3. Fully Connected Layer 1
        self.fcl1=nn.Sequential(
            nn.Linear(in_features=input_size, out_features=neurons_MLP),
            nn.ReLU(),
            nn.Dropout(p=drop_out),
        )

        # 4. Fully Connected Layer 2
        self.fcl2=nn.Sequential(
            nn.Linear(in_features=neurons_MLP),
            nn.ReLU(),
            nn.Dropout(p=drop_out),
        )

    def forward(self, x):
        y_cl1=self.cl1(x)
        y_cl2=self.cl2(y_cl1)
        y_cl2_flat=torch.flatten(y_cl2, start_dim=1)
        y_fcl1=self.fcl1(y_cl2_flat)
        y_fcl2=self.fcl2(y_fcl1)
        return y_cl1, y_cl2, y_cl2_flat, y_fcl1, y_fcl2
        

def reset_weights(m):
    for layer in m.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()

model = CNN_model()
model.apply(reset_weights)


model.train()
model.zero_grad()