In [1]:
import numpy as np
import librosa
import glob
import os
from random import randint
import torch
import torch.nn as nn
from torch.utils import data
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import matplotlib.pyplot as plt
import torch.nn.functional as F
%matplotlib inline

import import_ipynb

In [2]:
class SEN_classify(nn.Module):
    def __init__(self):
        super().__init__()
        self.siamese_cnn = nn.Sequential(
            nn.Conv2d(1, 128, kernel_size=[4,128],stride=[1,128]),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            nn.Conv2d(128, 256, kernel_size=[4,1],stride=[1,1]),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            nn.Conv2d(256, 256, kernel_size=[4,1],stride=[1,1]),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            )
        #max_pool 注意保证padding="same"
        self.late_cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=[3,3],stride=[1,1]),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=[3,3],stride=[3,3],padding=1),
            
            nn.Conv2d(64, 128, kernel_size=[3,3],stride=[1,1]),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=[3,3],stride=[3,3],padding=[1,0]),

            nn.Conv2d(128, 256, kernel_size=[3,3],stride=[1,1]),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            )
        self.fcWithDropout = nn.Sequential(
            nn.Linear(768, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            
            nn.Linear(1024, 2),
            nn.Softmax(dim=1)
            )
    def cal_similarity(self,matrix1,matrix2):
        #out1.shape = torch.Size([10, 256, 336])
        out1 = torch.squeeze(matrix1,dim=3)
        out2 = torch.squeeze(matrix2,dim=3)
        #num.shape = torch.Size([10, 336, 336])
        num = torch.bmm(torch.transpose(out1,1,2),out2)
        h1_norm = torch.sqrt(torch.sum(torch.mul(out1,out1), dim=1, keepdim=True))
        h2_norm = torch.sqrt(torch.sum(torch.mul(out2,out2), dim=1, keepdim=True))
        #denom torch.Size([10, 336, 336])
        denom = torch.bmm(torch.transpose(h1_norm,1,2),h2_norm)
        #fms shape torch.Size([10, 1, 336, 336])
        fms = torch.unsqueeze(torch.div(num,denom),dim=1)
        return fms
    
    def _reduce_var(self,inputs):
        m1 = torch.mean(inputs,dim=2,keepdim=True)
        m = torch.mean(m1,dim=3,keepdim=True)
        devs_squared1 = torch.mul(inputs - m,inputs - m)
        #not keep dim
        devs_squared2 = torch.mean(devs_squared1,dim=2)
        devs_squared = torch.mean(devs_squared2,dim=2)
        return devs_squared

    def cal_global_pool(self,matrix):
        # not keep dim
        g_max1,_ = torch.max(matrix,dim=2)
        g_max,_ = torch.max(g_max1,dim=2)
        g_mean1 = torch.mean(matrix,dim=2)
        g_mean = torch.mean(g_mean1,dim=2)
        g_var = self._reduce_var(matrix)
        return torch.cat([g_max, g_mean, g_var], 1)
        
    def forward(self,x1,x2):
        out1 = self.siamese_cnn(x1)
        out2 = self.siamese_cnn(x2)
        similarity = self.cal_similarity(out1,out2)
        late_cnn_out = self.late_cnn(similarity)
        golbal_pool_out = self.cal_global_pool(late_cnn_out)
        predictions = self.fcWithDropout(golbal_pool_out)
        return predictions

In [4]:
def test_SEN_classify():
    #batchsize channel height width
    x1 = torch.rand(16,1,345,128)
    x2 = torch.rand(16,1,345,128)
    model = SEN_classify()
    pre1 = model(x1,x2)
#     print(pre1.shape)
#     print(pre1)
    return pre1

# test_SEN_classify()

tensor([[0.4712, 0.5288],
        [0.4263, 0.5737],
        [0.5564, 0.4436],
        [0.5494, 0.4506],
        [0.3498, 0.6502],
        [0.5229, 0.4771],
        [0.3029, 0.6971],
        [0.4495, 0.5505],
        [0.2766, 0.7234],
        [0.3330, 0.6670],
        [0.4452, 0.5548],
        [0.4032, 0.5968],
        [0.4481, 0.5519],
        [0.2997, 0.7003],
        [0.4722, 0.5278],
        [0.4708, 0.5292]], grad_fn=<SoftmaxBackward>)