### 第二階段剪枝
- Magnitude or Tylor Pruning
- Used in alarm, moaning, help, misc-sounds classification
- pruning ration: 0.85.

In [1]:
project_PATH = '/home/sail/sound_project/sound_ai_v2'
step_now = 'STEP/step_3'

use_model_in_step2 = "/home/sail/sound_project/sound_ai_v2/STEP/step_2/save_pt_model_s2/20240829_14/pruning_4C_time_2024082914_prunratio90.0/uec_4Cmodel_first_stage_prun_haacc_86.0_valacc86.0_tracc75.9765625_epoch_55_20240829150409.pt"

In [2]:
import os;
import sys;
sys.path.append(os.path.abspath(f'{project_PATH}'))

import glob;
import math;
import numpy as np;
import random;
import time;
import torch
import torch.optim as optim;
import torch.nn as nn;

In [3]:
import common.utils as U;
import common.opts as opt;
import th.resources.models as models;
import th.resources.calculator as calc;
from th.resources.pruning_tools import filter_pruning, filter_pruner;

In [4]:
import argparse
import common.tlopts as tlopts
from SharedLibs.datestring import genDataTimeStr, getDateStr
from datetime import datetime;

In [5]:
date_time = datetime.now().strftime("%Y%m%d_%H")  # %M
save_pt_model_path = f'{project_PATH}/{step_now}/save_pt_model_s3/{date_time}'
try:
    os.mkdir(save_pt_model_path)
    print(f"Folder '{save_pt_model_path}' created successfully.")
except FileExistsError:
    print(f"Folder '{save_pt_model_path}' already exists.")

Folder '/home/sail/sound_project/sound_ai_v2/STEP/step_3/save_pt_model_s3/20240829_18' already exists.


In [6]:
inp_len = 20150
sr = 20150
choose_class=[0,1,2,3,4,5,6]
PATH = f'{project_PATH}/STEP/data_v2.npz'

ch_n_class = 7
fcn_no_of_inputs = 7

In [7]:
data = np.load(PATH, allow_pickle=True) 
audio_max_value =  float((data['sounds_train']).max())*2 # the max value of the sound samples
audio_min_value =  float((data['sounds_train']).min())*2 # the max value of the sound samples
audio_max_value, audio_min_value

(3.0242209434509277, -3.1673121452331543)

In [8]:
order = [0,1,2,3,4,5,6]

def one_hot_encode(data, order):
    index_map = {number: index for index, number in enumerate(order)}
    one_hot_list = []

    for num in data:
        one_hot = [0] * len(order)
        if num in index_map:
            one_hot[index_map[num]] = 1
        one_hot_list.append(one_hot)

    return one_hot_list

In [9]:
#log file object
logObj = None;
def ChkAndCreateSingleDir(dir_path):
    if not pathlib.Path(dir_path).is_dir():
        os.mkdir(dir_path);
        print(f"'{dir_path}' folder is created.");

In [10]:
def genDataTimeStr():
    return datetime.today().strftime('%Y-%m-%d %H:%M:%S').replace('-',"").replace(' ',"").replace(':',"");

def getDateStr():
    return datetime.today().strftime('%Y-%m-%d %H').replace('-',"").replace(' ',"")#.replace(':',"");

In [11]:
class TLGenerator():
    #Generates data for Keras
    def __init__(self, samples, labels, options):
        random.seed(seed);
        #Initialization
        print(f"length of samples:{len(samples)}")
        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
        self.opt = options;
        self.batch_size = options.batchSize;
        self.preprocess_funcs = self.preprocess_setup();
        self.mapdict = {0:1, 1:2, 2:3, 3:4, 4:5, 5:6, 6:7,}
        # dict([('52',1),('56',2),('99',3)])

    def __len__(self):
        #Denotes the number of batches per epoch
        return int(np.floor(len(self.data) / self.batch_size));
        #return len(self.samples);

    def __getitem__(self, batchIndex):
        #Generate one batch of data
        batchX, batchY = self.generate_batch(batchIndex);
        batchX = np.expand_dims(batchX, axis=1);
        batchX = np.expand_dims(batchX, axis=3);
        return batchX, batchY

    def generate_batch(self, batchIndex):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            # Training phase of BC learning
            # Select two training examples
            while True:
                sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
                sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
                if label1 != label2:
                    break
            sound1 = self.preprocess(sound1)
            sound2 = self.preprocess(sound2)

            # Mix two examples
            r = np.array(random.random())
            sound = U.mix(sound1, sound2, r, self.opt.sr).astype(np.float32)
            # print(f"sound length after U.mix is {len(sound)}")
            eye = np.eye(self.opt.nClasses)
            idx1 = label1
            idx2 = label2
            label = (eye[idx1] * r + eye[idx2] * (1 - r)).astype(np.float32)
            # label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)

            #For stronger augmentation
            sound = U.random_gain(6)(sound).astype(np.float32)
            # print(f"sound length after U.random_gain is {len(sound)}")
            sounds.append(sound);
            labels.append(label);

        sounds = np.asarray(sounds);
        labels = np.asarray(labels);
        
        return sounds, labels;

    
    def preprocess_setup(self):
        funcs = []
        if self.opt.strongAugment:
            funcs += [U.random_scale(1.25)]

        funcs += [U.padding(self.opt.inputLength // 2),
                  U.random_crop(self.opt.inputLength),
                  U.regularization(audio_max_value, audio_min_value)]
        return funcs
    

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;

In [12]:
def getTrainGen(opt=None, split=None):
    
    dataset = np.load(opt.Data_npz_path, allow_pickle=True);
    train_sounds = []
    train_labels = []

    train_sounds = dataset['sounds_train']
    train_labels = dataset['labels_train']

    trainGen = TLGenerator(train_sounds, train_labels, opt);
    trainGen.preprocess_setup();
    return trainGen

In [13]:
def getOpts():
    parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
    parser.add_argument('--netType', default='TLACDNet',  required=False);
    parser.add_argument('--data', default='./datasets/forOneClassModel_alarm/train_test_npz/',  required=False);
    parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
    parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
    #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
    opt, unknown = parser.parse_known_args()
    return opt

In [14]:
class Customed_ACDNetV2(nn.Module):
    def __init__(self, input_length, n_class, sr, ch_conf=None):
        super(Customed_ACDNetV2, self).__init__()
        self.input_length = input_length
        self.ch_config = ch_conf

        stride1 = 2
        stride2 = 2
        channels = 8
        k_size = (3, 3)
        n_frames = (sr / 1000) * 10  # No of frames per 10ms

        sfeb_pool_size = int(n_frames / (stride1 * stride2))
        if self.ch_config is None:
            self.ch_config = [channels,
                              channels * 8,
                              channels * 4,
                              channels * 8, channels * 8,
                              channels * 16, channels * 16,
                              channels * 32, channels * 32,
                              channels * 64, channels * 64, n_class]

        ch_confing_10 = 8 * 32  # 8 * 64
        ch_n_class = n_class
        conv1, bn1 = self.make_layers(1, self.ch_config[0], (1, 9), (1, stride1))
        conv2, bn2 = self.make_layers(self.ch_config[0], self.ch_config[1], (1, 5), (1, stride2))
        conv3, bn3 = self.make_layers(1, self.ch_config[2], k_size, padding=1)
        conv4, bn4 = self.make_layers(self.ch_config[2], self.ch_config[3], k_size, padding=1)
        conv5, bn5 = self.make_layers(self.ch_config[3], self.ch_config[4], k_size, padding=1)
        conv6, bn6 = self.make_layers(self.ch_config[4], self.ch_config[5], k_size, padding=1)
        conv7, bn7 = self.make_layers(self.ch_config[5], self.ch_config[6], k_size, padding=1)
        conv8, bn8 = self.make_layers(self.ch_config[6], self.ch_config[7], k_size, padding=1)
        conv9, bn9 = self.make_layers(ch_confing_10, ch_n_class, (1, 1))
        # fcn = nn.Linear(fcn_no_of_inputs, ch_n_class)
        fcn = nn.Linear(fcn_no_of_inputs, ch_n_class)
        nn.init.kaiming_normal_(fcn.weight, nonlinearity='sigmoid')  # Kaiming with sigmoid is equivalent to LeCun normal in Keras

        self.sfeb = nn.Sequential(
            # Start: Filter bank
            conv1, bn1, nn.ReLU(),
            conv2, bn2, nn.ReLU(),
            nn.MaxPool2d(kernel_size=(1, sfeb_pool_size))
        )

        tfeb_modules = []
        self.tfeb_width = int(((self.input_length / sr) * 1000) / 10)  # 10ms frames of audio length in seconds
        tfeb_pool_sizes = self.get_tfeb_pool_sizes(self.ch_config[1], self.tfeb_width)
        p_index = 0
        for i in [3, 4, 6, 8]:  # ,10
            tfeb_modules.extend([eval('conv{}'.format(i)), eval('bn{}'.format(i)), nn.ReLU()])
            # print('conv{}'.format(i),'bn{}'.format(i))
            if i != 3:
                tfeb_modules.extend([eval('conv{}'.format(i + 1)), eval('bn{}'.format(i + 1)), nn.ReLU()])
            # print('conv{}'.format(i+ 1),'bn{}'.format(i+ 1))

            h, w = tfeb_pool_sizes[p_index]
            # print('maxpooling hw',h, w)
            if h > 1 or w > 1:
                if i == 8:
                    break
                else:
                    tfeb_modules.append(nn.MaxPool2d(kernel_size=(h, w)))
                    # print('MaxPool2d',i)

            p_index += 1

        tfeb_modules.append(nn.Dropout(0.2))

        # tfeb_modules.extend([conv9, bn9, nn.ReLU()])


        h, w = tfeb_pool_sizes[-1]
        if h > 1 or w > 1:
            tfeb_modules.append(nn.AvgPool2d(kernel_size=(5,7))) # h, w 5,7
            # tfeb_modules.append(nn.Flatten())
        tfeb_modules.extend([nn.Flatten(), fcn])

        self.tfeb = nn.Sequential(*tfeb_modules)
        self.output = nn.Sequential(
            
            nn.Softmax(dim=1)
        )


    def forward(self, x):
        # print(f"sfeb:\n{list(self.sfeb.children())}");
        # print(f"input x shape:{x.size()}");
        """
        input dim should be input x shape:torch.Size([32, 1, 1, 30225])
        if you got input x shape:[32, 30225, 1, 1], that is wrong.
        """
        x = self.sfeb(x);
        #swapaxes
        x = x.permute((0, 2, 1, 3));
        x = self.tfeb(x);
        y = self.output[0](x);
        return y;

    def make_layers(self, in_channels, out_channels, kernel_size, stride=(1, 1), padding=0, bias=False):
        conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
                          stride=stride, padding=padding, bias=bias)
        nn.init.kaiming_normal_(conv.weight, nonlinearity='relu')  # Kaiming with ReLU
        bn = nn.BatchNorm2d(out_channels)
        return conv, bn

    def get_tfeb_pool_sizes(self, con2_ch, width):
        h = self.get_tfeb_pool_size_component(con2_ch)
        w = self.get_tfeb_pool_size_component(width)
        pool_size = []
        for (h1, w1) in zip(h, w):
            pool_size.append((h1, w1))
        # print("pool_size", pool_size)
        return pool_size

    def get_tfeb_pool_size_component(self, length):
        c = []
        index = 1
        while index <= 6:
            if length >= 2:
                if index == 6:
                    c.append(length)
                else:
                    c.append(2)
                    length = length // 2
            else:
                c.append(1)
            index += 1
        # print("c",c)
        return c

def GetCustomedACDNetModel(input_len=inp_len, nclass=ch_n_class, sr=sr, channel_config=None):
    net = Customed_ACDNetV2(input_len, nclass, sr, ch_conf=channel_config);
    return net;

In [15]:
class PruningTrainer:
    def __init__(self, opt):
        self.opt = opt;
        self.opt.channels_to_prune_per_iteration = 1;
        self.opt.finetune_epoch_per_iteration = 2;
        self.opt.lr=0.05;
        self.opt.schedule = [0.1,0.3,0.5,0.9];
        self.opt.prune_type = 1 #determine the prunning algo, 1: Magnitude Pruning ;2: tylor-pruning
        # torch.device("cuda:0" if torch.cuda.is_available() else "cpu"); #in office use 
        self.opt.device = 'cuda:0'#office
        # self.opt.device = 'mps';#home
        self.pruner = None;
        self.iterations = 0;
        self.cur_acc = 0.0;
        self.cur_iter = 1;
        self.cur_lr = self.opt.lr;
        self.net = None;
        self.criterion = torch.nn.KLDivLoss(reduction='batchmean');
        self.trainGen = getTrainGen(opt)#train_generator.setup(self.opt, self.opt.split);
        self.testX = None;
        self.testY = None;
        self.load_test_data();

    def PruneAndTrain(self):
        self.net = GetCustomedACDNetModel();
        trained_model = use_model_in_step2
        # self.net.load_state_dict(torch.load(trained_model, map_location="cuda:0")['weight']);#office
        self.net.load_state_dict(torch.load(trained_model, map_location=self.opt.device)['weight'] ,strict=False);
        self.net = self.net.to('cuda:0');#at home use apple m2
        # self.net = self.net.to(self.opt.device);
        self.pruner = filter_pruning.Magnitude(self.net, self.opt) if self.opt.prune_type == 1 else filter_pruning.Taylor(self.net, self.opt);
        print(f"pruning algorithm is {self.pruner}");
        self.validate();
        calc.summary(self.net, (1, 1, self.opt.inputLength), brief=False); # shape of one sample for inferenceing
        # exit();
        #Make sure all the layers are trainable
        for param in self.net.parameters():
            param.requires_grad = True
        self.iterations = self.estimate_pruning_iterations();
        # exit();
        for i in range(1, self.iterations):
            self.cur_iter = i;
            iter_start = time.time();
            print("\nIteration {} of {} starts..".format(i, self.iterations-1), flush=True);
            print("Ranking channels.. ", flush=True);
            prune_targets = self.get_candidates_to_prune(self.opt.channels_to_prune_per_iteration);
            # prune_targets = [(40,3)];
            print("Pruning channels: {}".format(prune_targets), flush=True);
            
            self.net = filter_pruner.prune_layers(self.net, prune_targets, self.iterations, self.opt.prune_all, self.opt.device);
            # print(self.net)
            
            calc.summary(self.net, (1, 1, self.opt.inputLength), brief=True);
            self.validate();
            print("Fine tuning {} epochs to recover from prunning iteration.".format(self.opt.finetune_epoch_per_iteration), flush=True);

            if self.cur_iter in list(map(int, np.array(self.iterations)*self.opt.schedule)):
                self.cur_lr *= 0.1;
            optimizer = optim.SGD(self.net.parameters(), lr=self.cur_lr, momentum=0.9);
            self.train(optimizer, epoches = self.opt.finetune_epoch_per_iteration);
            print("Iteration {}/{} finished in {}".format(self.cur_iter, self.iterations+1, U.to_hms(time.time()-iter_start)), flush=True);
            print("Total channels prunned so far: {}".format(i*self.opt.channels_to_prune_per_iteration), flush=True);
            self.__save_model(self.net);

        calc.summary(self.net, (1, 1, self.opt.inputLength)); # shape of one sample for inferenceing
        self.__save_model(self.net);

    def get_candidates_to_prune(self, num_filters_to_prune):
        self.pruner.reset();
        if self.opt.prune_type == 1:
            self.pruner.compute_filter_magnitude();
        else:
            self.train_epoch(rank_filters = True);
            self.pruner.normalize_ranks_per_layer();

        return self.pruner.get_prunning_plan(num_filters_to_prune);

    def estimate_pruning_iterations(self):
        # get total number of variables from all conv2d featuremaps
        prunable_count = sum(self.get_channel_list(self.opt.prune_all));
        total_count= sum(self.get_channel_list());
        #iterations_reqired = int((prunable_count * self.opt.prune_ratio) / self.opt.channels_to_prune_per_iteration);
        #prune_ratio works with the total number of channels, not only with the prunable channels. i.e. 80% or total will be pruned from total or from only features
        iterations_reqired = int((total_count * self.opt.prune_ratio) / self.opt.channels_to_prune_per_iteration);
        print('Total Channels: {}, Prunable: {}, Non-Prunable: {}'.format(total_count, prunable_count, total_count - prunable_count), flush=True);
        print('No. of Channels to prune per iteration: {}'.format(self.opt.channels_to_prune_per_iteration), flush=True);
        print('Total Channels to prune ({}%): {}'.format(int(self.opt.prune_ratio*100), int(total_count * self.opt.prune_ratio)-1), flush=True);
        print('Total iterations required: {}'.format(iterations_reqired-1), flush=True);
        return iterations_reqired;

    def get_channel_list(self, prune_all=True):
        ch_conf = [];
        if prune_all:
            for name, module in enumerate(self.net.sfeb):
                if issubclass(type(module), torch.nn.Conv2d):
                    ch_conf.append(module.out_channels);

        for name, module in enumerate(self.net.tfeb):
            if issubclass(type(module), torch.nn.Conv2d):
                ch_conf.append(module.out_channels);

        return ch_conf;

    def load_test_data(self):
        data = np.load(self.opt.Data_npz_path, allow_pickle=True);
        print(f"device is :{self.opt.device}")
        print(f"len of Y:{len(data['labels_val'])}")
        dataX = data['sounds_val'].reshape(data['sounds_val'].shape[0],1,1,data['sounds_val'].shape[1]).astype(np.float32);
        self.testX = torch.tensor(dataX).to(self.opt.device);
        self.testY = torch.tensor(one_hot_encode(data['labels_val'], order)).type(torch.float32).to(self.opt.device);

    #Calculating average prediction (10 crops) and final accuracy
    def compute_accuracy(self, y_pred, y_target):
        with torch.no_grad():

            #Reshape to shape theme like each sample comtains 10 samples, calculate mean and find the indices that has highest average value for each sample
            y_pred = (y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1])).mean(dim=1).argmax(dim=1);
            y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(dim=1).argmax(dim=1);
            # if self.opt.device == "mps":
            #     y_target = y_target.cpu() #use apple m2, in office use cuda
            acc = (((y_pred==y_target)*1).float().mean()*100).item();
            # valLossFunc = torch.nn.KLDivLoss();
            loss = self.criterion(y_pred.float().log(), y_target.float()).item();
            # loss = 0.0;
        return acc, loss;

    def train(self, optimizer = None, epoches=10):
        for i in range(epoches):
            # print("Epoch: ", i);
            self.train_epoch(optimizer);
            self.validate();
        print("Finished fine tuning.", flush=True);

    def train_batch(self, optimizer, batch, label, rank_filters):
        self.net.zero_grad()
        if rank_filters:
            output = self.pruner.forward(batch);
            if self.opt.device == "cuda":
                label = label.cpu() #use apple m2, in office use cuda
                output = output.cpu() #use apple m2, in office use cuda
            self.criterion(output.log(), label).backward();
        else:
            self.criterion(self.net(batch), label).backward();
            optimizer.step();

    def train_epoch(self, optimizer = None, rank_filters = False):
        if rank_filters is False and optimizer is None:
            print('Please provide optimizer to train_epoch', flush=True);
            exit();
        n_batches = math.ceil(len(self.trainGen.data)/self.opt.batchSize);
        for b_idx in range(n_batches):
            x,y = self.trainGen.__getitem__(b_idx)
            # dataX = x.reshape(x.shape[0],1,1,x.shape[1]).astype(np.float32);
            # x = torch.tensor(dataX).to(self.opt.device);
            x = torch.tensor(np.moveaxis(x, 3, 1)).to(self.opt.device);
            y = torch.tensor(y).to(self.opt.device);
            self.train_batch(optimizer, x, y, rank_filters);

    def validate(self):
        self.net.eval();
        with torch.no_grad():
            y_pred = None;
            batch_size = (self.opt.batchSize//self.opt.nCrops)*self.opt.nCrops;
            for idx in range(math.ceil(len(self.testX)/batch_size)):
                x = self.testX[idx*batch_size : (idx+1)*batch_size];

                x = x.type(torch.cuda.FloatTensor);
                scores = self.net(x);
                y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));

            acc, loss = self.compute_accuracy(y_pred, self.testY);
        print('Current Testing Performance - Val: Loss {:.3f}  Acc(top1) {:.3f}%'.format(loss, acc), flush=True);
        self.cur_acc = acc;
        self.net.train();
        return acc, loss;

    def __save_model(self, net):
        net.ch_config = self.get_channel_list();
        dir = os.getcwd();
        fname = self.opt.model_name;
        if os.path.isfile(fname):
            os.remove(fname);
        torch.save({'weight':net.state_dict(), 'config':net.ch_config}, fname);
        print(f" --- save model at {self.opt.model_name} --- ")


In [16]:
seed = 1123;
random.seed(seed);
np.random.seed(seed);
torch.manual_seed(seed);
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed);
torch.backends.cudnn.deterministic = True;
torch.backends.cudnn.benchmark = False;
###########################################

In [17]:
def main():
    opt = getOpts()
    opt.Data_npz_path = PATH
    #Learning settings
    opt.batchSize = 32;
    #Basic Net Settings
    opt.prune_ratio = 0.9
    opt.prune_all = True;
    opt.nClasses = ch_n_class
    opt.nFolds = 1;
    opt.split = [i for i in range(1, opt.nFolds + 1)];
    opt.inputLength = inp_len;
    #Test data
    opt.nCrops = 2;
    opt.sr = sr;
    opt.trainer = None
    opt.device="cuda:0"
    # if torch.backends.mps.is_available():
    #     opt.device="mps"; #for apple m2 gpu
    # elif torch.cuda.is_available():
    #     opt.device="cuda:0"; #for nVidia gpu
    # else:
    #     opt.device="cpu"

    save_dir = f"{save_pt_model_path}/uec_secondPrun_time_{getDateStr()}_prunratio{opt.prune_ratio*100}/"
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
        
    model_name = "model_second_stage_prun_ratio0.9_{}.pt".format(genDataTimeStr());
    opt.model_name = save_dir + model_name;
    
    # valid_path = False;
    print("Initializing PruneAndTrain Object.....")
    trainer = PruningTrainer(opt=opt)
    print("Start to pruning.....")
    trainer.PruneAndTrain();

In [18]:
main()

Initializing PruneAndTrain Object.....
length of samples:1018
device is :cuda:0
len of Y:100
Start to pruning.....
pruning algorithm is <th.resources.pruning_tools.filter_pruning.Magnitude object at 0x7f0092cf59c0>


  self.net.load_state_dict(torch.load(trained_model, map_location=self.opt.device)['weight'] ,strict=False);


Current Testing Performance - Val: Loss nan  Acc(top1) 86.000%
+----------------------------------------------------------------------------+
+                           Pytorch Model Summary                            +
------------------------------------------------------------------------------
   Layer (type)       Input Shape      Output Shape    Param #      FLOPS #
       Conv2d-1     (1, 1, 20150)     (8, 1, 10071)         72      725,112
  BatchNorm2d-2     (8, 1, 10071)     (8, 1, 10071)         16            0
         ReLu-3     (8, 1, 10071)     (8, 1, 10071)          0       80,568
       Conv2d-4     (8, 1, 10071)     (64, 1, 5034)      2,560   12,887,040
  BatchNorm2d-5     (64, 1, 5034)     (64, 1, 5034)        128            0
         ReLu-6     (64, 1, 5034)     (64, 1, 5034)          0      322,176
    MaxPool2d-7     (64, 1, 5034)      (64, 1, 100)          0      320,000
      Permute-8      (64, 1, 100)      (1, 64, 100)          0            0
       Conv2d-9 

RuntimeError: Given input size: (7x4x12). Calculated output size: (7x0x1). Output size is too small