In [7]:
# Download 下载数据

!pip install --upgrade gdown

# Main link
!gdown --id '1o6Ag-G3qItSmYhTheX6DYiuyNzWyHyTc' --output libriphone.zip

# Backup link 1
# !gdown --id '1R1uQYi4QpX0tBfUWt2mbZcncdBsJkxeW' --output libriphone.zip

# Bqckup link 2
# !wget -O libriphone.zip "https://www.dropbox.com/s/wqww8c5dbrl2ka9/libriphone.zip?dl=1"

!unzip -q libriphone.zip
!ls libriphone

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 4.6.4
    Uninstalling gdown-4.6.4:
      Successfully uninstalled gdown-4.6.4
Successfully installed gdown-4.7.1
Downloading...
From (uriginal): https://drive.google.com/uc?id=1o6Ag-G3qItSmYhTheX6DYiuyNzWyHyTc
From (redirected): https://drive.google.com/uc?id=1o6Ag-G3qItSmYhTheX6DYiuyNzWyHyTc&confirm=t&uuid=ee249a4c-3e7c-4277-ba28-3c7778f559fa
To: /content/libriphone.zip
100% 479M/479M [00:06<00:00, 75.3MB/s]
feat  test_split.txt  train_labels.txt	train_split.txt


In [67]:
# Import packages import一堆python库

import os
import random
import pandas as pd
import torch
from tqdm import tqdm

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F

import gc

import numpy as np



In [68]:
# Def functions 定义一堆函数

#fix seed 固定随机数种子和算法
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# 和HW1中的same_seeds几乎一模一样


# load data 载入数据
def load_feat(path):
    feat = torch.load(path)
    return feat

#作用：移位
def shift(x, n):
    if n < 0:
        left = x[0].repeat(-n, 1)
        right = x[:n]

    # tensor.repeat() 相当于复制粘贴
    # x = torch.tensor([1, 2, 3]) x.repeat(4,2) 纵轴复制4次，横轴复制两次 => x = [[1,2,3,1,2,3]*4]

    elif n > 0:
        right = x[-1].repeat(n, 1)
        left = x[n:]
    else:
        return x

    return torch.cat((left, right), dim=0)

    #torch.cat 沿着某一维度（0代表纵轴/列，1代表横轴/行）合成张量


x = torch.tensor([1, 2, 3])
def concat_feat(x, concat_n):
    assert concat_n % 2 == 1 # n must be odd

    # assert断言 如果assert后边的表达式是 False , 触发异常

    if concat_n < 2:
        return x
    #就是concat_n==1时

    seq_len, feature_dim = x.size(0), x.size(1)
    x = x.repeat(1, concat_n) 
    x = x.view(seq_len, concat_n, feature_dim).permute(1, 0, 2) # concat_n, seq_len, feature_dim
    mid = (concat_n // 2)
    for r_idx in range(1, mid+1):
        x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx)
        x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx)

    return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim)


#预处理数据
def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8, train_val_seed=1337):
    class_num = 41 # NOTE: pre-computed, should not need change
    mode = 'train' if (split == 'train' or split == 'val') else 'test'

    label_dict = {}
    if mode != 'test':
      phone_file = open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines()

      for line in phone_file:
          line = line.strip('\n').split(' ')
          label_dict[line[0]] = [int(p) for p in line[1:]]

    if split == 'train' or split == 'val':
        # split training and validation data
        # 分割测试集和验证集
        usage_list = open(os.path.join(phone_path, 'train_split.txt')).readlines()
        random.seed(train_val_seed)
        random.shuffle(usage_list)
        percent = int(len(usage_list) * train_ratio)
        usage_list = usage_list[:percent] if split == 'train' else usage_list[percent:]
    elif split == 'test':
        usage_list = open(os.path.join(phone_path, 'test_split.txt')).readlines()
    else:
        raise ValueError('Invalid \'split\' argument for dataset: PhoneDataset!')

    usage_list = [line.strip('\n') for line in usage_list]
    print('[Dataset] - # phone classes: ' + str(class_num) + ', number of utterances for ' + split + ': ' + str(len(usage_list)))

    max_len = 3000000
    X = torch.empty(max_len, 39 * concat_nframes)
    if mode != 'test':
      y = torch.empty(max_len, dtype=torch.long)

    idx = 0
    for i, fname in tqdm(enumerate(usage_list)):
        feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
        cur_len = len(feat)
        feat = concat_feat(feat, concat_nframes)
        if mode != 'test':
          label = torch.LongTensor(label_dict[fname])
          X[idx: idx + cur_len, :] = feat
        if mode != 'test':
          y[idx: idx + cur_len] = label

        idx += cur_len

    X = X[:idx, :]
    if mode != 'test':
      y = y[:idx]

    print(f'[INFO] {split} set')
    print(X.shape)
    if mode != 'test':
      print(y.shape)
      return X, y
    else:
      return X


"""
output>>

[Dataset] - # phone classes: 41, number of utterances for train: 3428
3428it [00:01, 2432.89it/s]
[INFO] train set
torch.Size([2116368, 39]) X.shape
torch.Size([2116368])   Y.shape


[Dataset] - # phone classes: 41, number of utterances for val: 858
858it [00:00, 2414.58it/s]
[INFO] val set
torch.Size([527790, 39])
torch.Size([527790])
"""



'\noutput>>\n\n[Dataset] - # phone classes: 41, number of utterances for train: 3428\n3428it [00:01, 2432.89it/s]\n[INFO] train set\ntorch.Size([2116368, 39]) X.shape\ntorch.Size([2116368])   Y.shape\n\n\n[Dataset] - # phone classes: 41, number of utterances for val: 858\n858it [00:00, 2414.58it/s]\n[INFO] val set\ntorch.Size([527790, 39])\ntorch.Size([527790])\n'

In [69]:
# Dataset 

class LibriDataset(Dataset):
    def __init__(self, X, y=None):
        self.data = X
        if y is not None:
            self.label = torch.LongTensor(y)
        else:
            self.label = None

    def __getitem__(self, idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)

# https://zhuanlan.zhihu.com/p/87786297
# __init__  初始化
# __getitem__ 允许用idx调用项 xx[123]
# __len__   调用len()

In [70]:
# nnModule 

class BasicBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(BasicBlock, self).__init__()

        self.block = nn.Sequential(
            nn.Linear(input_dim, 2*input_dim),
            nn.ReLU(),
            nn.Linear(2*input_dim, input_dim-5),
            nn.ReLU(),
            #nn.Linear(3*input_dim, input_dim-5),
            #nn.ReLU(),
            #nn.Linear(input_dim-5, input_dim),
            #nn.ReLU(),
            nn.Linear(input_dim-5, output_dim),
            nn.ReLU(),
        )


    def forward(self, x):
        x = self.block(x)
        return x


class Classifier(nn.Module):
    def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256):
        super(Classifier, self).__init__()

        self.fc = nn.Sequential(
            BasicBlock(input_dim, hidden_dim),
            *[BasicBlock(hidden_dim, hidden_dim) for _ in range(hidden_layers)],
            nn.Linear(hidden_dim, output_dim)
        )

#列表解析式
#例：生成一个19x19的二维列表，各项初始化为0
#[ [ 0 for j in range(19) ] for i in range(19) ]
#list_two = [[0] * 19] * 19

    def forward(self, x):
        x = self.fc(x)
        return x

In [71]:
# Configurations 参数设置

# device 设置设备
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f'DEVICE: {device}')
# output >> DEVICE: cuda:0


# data parameters
concat_nframes = 7              # the number of frames to concat with, n must be odd (total 2k+1 = n frames)
train_ratio = 0.8               # the ratio of data used for training, the rest will be used for validation

# training parameters
seed = 114514                        # random seed
batch_size = 256                # batch size
num_epoch = 5                   # the number of training epoch
learning_rate = 0.001          # learning rate
model_path = './model.ckpt'     # the path where the checkpoint will be saved

#.ckpt 二进制文件，用其他后缀也行


# model parameters
input_dim = 39 * concat_nframes # the input dim of the model, you should not change the value
hidden_layers = 3               # the number of hidden layers
hidden_dim = 256                # the hidden dim
     



     

DEVICE: cuda:0


In [72]:
# Dataloader 

# preprocess data
train_X, train_y = preprocess_data(split='train', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio)
val_X, val_y = preprocess_data(split='val', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio)

# get dataset
train_set = LibriDataset(train_X, train_y)
val_set = LibriDataset(val_X, val_y)

# remove raw feature to save memory
del train_X, train_y, val_X, val_y
gc.collect()
#删除这四个类，以回收内存


# get dataloader
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

[Dataset] - # phone classes: 41, number of utterances for train: 3428


3428it [00:04, 766.77it/s]


[INFO] train set
torch.Size([2116368, 273])
torch.Size([2116368])
[Dataset] - # phone classes: 41, number of utterances for val: 858


858it [00:00, 870.28it/s]


[INFO] val set
torch.Size([527790, 273])
torch.Size([527790])


In [73]:
# fix random seed
same_seeds(seed)

# create model, define a loss function, and optimizer
model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate,betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
#optimizer = torch.optim.Rprop(model.parameters(), lr=learning_rate , etas=(0.5, 1.2), step_sizes=(1e-06, 50))

In [74]:
# Start training! 开始训练

best_acc = 0.0
for epoch in range(num_epoch):
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0
    
    # training
    model.train() # set the model to training mode
    for i, batch in enumerate(tqdm(train_loader)):
        features, labels = batch
        features = features.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad() 
        outputs = model(features) 
        
        loss = criterion(outputs, labels)
        loss.backward() 
        optimizer.step() 
        
        _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
        train_acc += (train_pred.detach() == labels.detach()).sum().item()
        train_loss += loss.item()
    
    # validation
    if len(val_set) > 0:
        model.eval() # set the model to evaluation mode
        with torch.no_grad():
            for i, batch in enumerate(tqdm(val_loader)):
                features, labels = batch
                features = features.to(device)
                labels = labels.to(device)
                outputs = model(features)
                loss = criterion(outputs, labels) 
                
                _, val_pred = torch.max(outputs, 1) 
                val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
                val_loss += loss.item()

            print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}'.format(
                epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader), val_acc/len(val_set), val_loss/len(val_loader)
            ))

            # if the model improves, save a checkpoint at this epoch
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model.state_dict(), model_path)
                print('saving model with acc {:.3f}'.format(best_acc/len(val_set)))
    else:
        print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format(
            epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader)
        ))

# if not validating, save the last epoch
if len(val_set) == 0:
    torch.save(model.state_dict(), model_path)
    print('saving model at last epoch')

100%|██████████| 8268/8268 [01:03<00:00, 130.08it/s]
100%|██████████| 2062/2062 [00:05<00:00, 392.03it/s]


[001/005] Train Acc: 0.491608 Loss: 1.766122 | Val Acc: 0.554162 loss: 1.526807
saving model with acc 0.554


100%|██████████| 8268/8268 [01:05<00:00, 126.93it/s]
100%|██████████| 2062/2062 [00:05<00:00, 392.42it/s]


[002/005] Train Acc: 0.581947 Loss: 1.408136 | Val Acc: 0.584755 loss: 1.392214
saving model with acc 0.585


100%|██████████| 8268/8268 [01:03<00:00, 130.56it/s]
100%|██████████| 2062/2062 [00:06<00:00, 342.10it/s]


[003/005] Train Acc: 0.606754 Loss: 1.307156 | Val Acc: 0.607141 loss: 1.306707
saving model with acc 0.607


100%|██████████| 8268/8268 [01:03<00:00, 129.95it/s]
100%|██████████| 2062/2062 [00:05<00:00, 350.46it/s]


[004/005] Train Acc: 0.619047 Loss: 1.255003 | Val Acc: 0.612105 loss: 1.287203
saving model with acc 0.612


100%|██████████| 8268/8268 [01:04<00:00, 128.05it/s]
100%|██████████| 2062/2062 [00:05<00:00, 391.90it/s]

[005/005] Train Acc: 0.627658 Loss: 1.220300 | Val Acc: 0.616095 loss: 1.270588
saving model with acc 0.616





In [75]:
del train_loader, val_loader

# delete two classes

gc.collect()

# gc库 垃圾回收器  
'''
gc.collect(generation=2)
若被调用时不包含参数，则启动完全的垃圾回收。可选的参数 generation 可以是一个整数，指明需要回收哪一代（从 0 到 2 ）的垃圾。
当参数 generation 无效时，会引发 ValueError 异常。返回发现的不可达对象的数目。

每当运行完整收集或最高代 (2) 收集时，为多个内置类型所维护的空闲列表会被清空。 由于特定类型特别是 float 的实现，
在某些空闲列表中并非所有项都会被释放。
'''

'\ngc.collect(generation=2)\n若被调用时不包含参数，则启动完全的垃圾回收。可选的参数 generation 可以是一个整数，指明需要回收哪一代（从 0 到 2 ）的垃圾。\n当参数 generation 无效时，会引发 ValueError 异常。返回发现的不可达对象的数目。\n\n每当运行完整收集或最高代 (2) 收集时，为多个内置类型所维护的空闲列表会被清空。 由于特定类型特别是 float 的实现，\n在某些空闲列表中并非所有项都会被释放。\n'

In [76]:
# Test stage

# load data
test_X = preprocess_data(split='test', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes)
test_set = LibriDataset(test_X, None)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

test_acc = 0.0
test_lengths = 0
pred = np.array([], dtype=np.int32)

model.eval()
with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        features = batch
        features = features.to(device)

        outputs = model(features)

        _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
        pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0)


[Dataset] - # phone classes: 41, number of utterances for test: 1078


1078it [00:02, 392.03it/s]


[INFO] test set
torch.Size([646268, 273])


100%|██████████| 2525/2525 [00:05<00:00, 496.96it/s]


In [77]:
with open('prediction.csv', 'w') as f:
    f.write('Id,Class\n')
    for i, y in enumerate(pred):
        f.write('{},{}\n'.format(i, y))