In [1]:
import torch
%matplotlib inline

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MultiLabelBinarizer
import os
import sys
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
from kddirkit.networks.models import BaselineModel, CNNModel

from ast import literal_eval
from torch import nn
import numpy as np
from torch.nn import functional as F
""
sns.set_context("notebook", font_scale=1.5)
plt.rcParams['figure.figsize'] = (17, 5)

import platform
if 'Windows' in platform.platform():
    ROOT_PATH = "D:/PycharmProjects/HMAN"
else:
    ROOT_PATH = "/home/xkliu/PycharmProjects/HMAN"
RAW_DATA_PATH = ROOT_PATH  + "/raw_data"
DATA_PATH = ROOT_PATH + "/data"
os.chdir(ROOT_PATH)
sys.path.append("./")

from kddirkit.utils import utils


In [2]:
import logging
import torch.nn as nn
import numpy as np

from kddirkit.networks.encoders import SentenceEncoder
from kddirkit.config import *
# from kddirkit.dataloaders import LoadNYT, LoadHierData
from kddirkit.frameworks import Trainer
from kddirkit.losses.FocalLoss import FocalLoss

project_name = 'HMAN'

logger = logging.getLogger(project_name)

## 1. Load Data

In [3]:
track_dtype = {'track_id': int, 'album_id': int, 'album_type': str, 'artist_id': int, 'set_split': str,
               'set_subset': str, 'track_genre_top': str, 'track_genres': str, 'track_genres_all': str,
               'track_title': str}
genres_converters = {'track_genres': literal_eval, 'track_genres_all': literal_eval}
medium_data = pd.read_csv(RAW_DATA_PATH + '/medium_data.csv', converters=genres_converters)

In [5]:
medium_data_train = pd.read_csv(RAW_DATA_PATH + '/medium_data_train.csv', converters=genres_converters)
medium_data_test = pd.read_csv(RAW_DATA_PATH + '/medium_data_test.csv', converters=genres_converters)
medium_data_val = pd.read_csv(RAW_DATA_PATH + '/medium_data_val.csv', converters=genres_converters)

In [6]:
medium_instance_triple = np.load(DATA_PATH + '/' + 'medium_instance_triple.npy')
medium_instance_scope = np.load(DATA_PATH + '/' + 'medium_instance_scope.npy')
medium_label = np.load(DATA_PATH + '/' + 'medium_label.npy')
medium_instance_triple

array([['1', '1', '21'],
       ['6', '6', '10'],
       ['61', '54', '17'],
       ...,
       ['22936', '22050', '17'],
       ['22937', '7820', '38'],
       ['22940', '24357', '12']], dtype='<U11')

In [7]:
medium_train_instance_triple = np.load(DATA_PATH + '/' + 'medium_train_instance_triple.npy')
medium_train_instance_scope = np.load(DATA_PATH + '/' + 'medium_train_instance_scope.npy')
medium_train_label = np.load(DATA_PATH + '/' + 'medium_train_label.npy')
medium_train_label

array([21, 21, 21, ..., 17, 38, 12])

In [8]:
medium_val_instance_triple = np.load(DATA_PATH + '/' + 'medium_val_instance_triple.npy')
medium_val_instance_scope = np.load(DATA_PATH + '/' + 'medium_val_instance_scope.npy')
medium_val_label = np.load(DATA_PATH + '/' + 'medium_val_label.npy')

In [9]:
medium_test_entity_pair = np.load(DATA_PATH + '/' + 'medium_test_entity_pair.npy')
medium_test_entity_scope = np.load(DATA_PATH + '/' + 'medium_test_entity_scope.npy')
medium_test_label = np.load(DATA_PATH + '/' + 'medium_test_label.npy')

In [10]:
medium_label_transform = np.load(DATA_PATH + '/' + 'medium_label_transform.npy')
medium_train_label_transform = np.load(DATA_PATH + '/' + 'medium_train_label_transform.npy')
medium_val_label_transform = np.load(DATA_PATH + '/' + 'medium_val_label_transform.npy')
medium_test_label_transform = np.load(DATA_PATH + '/' + 'medium_test_label_transform.npy')
medium_label_bottom_transform = np.load(DATA_PATH + '/' + 'medium_label_bottom_transform.npy')
medium_train_label_bottom_transform = np.load(DATA_PATH + '/' + 'medium_train_label_bottom_transform.npy')
medium_val_label_bottom_transform = np.load(DATA_PATH + '/' + 'medium_val_label_bottom_transform.npy')
medium_test_label_bottom_transform = np.load(DATA_PATH + '/' + 'medium_test_label_bottom_transform.npy')

In [11]:
col_name = ['track_id', 'album_id', 'album_type', 'artist_id', 'set_split', 'set_subset', 'track_genres_top', 'track_genre', 'track_genres_all']
medium_data_train_sort = pd.read_csv(RAW_DATA_PATH + '/' + 'medium_data_train_sort.txt', sep ='-----',  skiprows =1, names  = col_name)
medium_data_val_sort = pd.read_csv(RAW_DATA_PATH + '/' + 'medium_data_VAL_sort.txt', sep = '-----',  skiprows =1, names  = col_name)
medium_data_test_sort = pd.read_csv(RAW_DATA_PATH + '/' + 'medium_data_test_sort.txt', sep = '-----', skiprows =1, names  = col_name)

  medium_data_train_sort = pd.read_csv(RAW_DATA_PATH + '/' + 'medium_data_train_sort.txt', sep ='-----',  skiprows =1, names  = col_name)
  medium_data_val_sort = pd.read_csv(RAW_DATA_PATH + '/' + 'medium_data_VAL_sort.txt', sep = '-----',  skiprows =1, names  = col_name)
  medium_data_test_sort = pd.read_csv(RAW_DATA_PATH + '/' + 'medium_data_test_sort.txt', sep = '-----', skiprows =1, names  = col_name)


In [12]:
medium_data

Unnamed: 0,track_id,album_id,album_type,artist_id,set_split,set_subset,track_genre_top,track_genres,track_genres_all,track_title
0,2,1,Album,1,training,small,Hip-Hop,[21],[21],Food
1,5,1,Album,1,training,small,Hip-Hop,[21],[21],This World
2,10,6,Album,6,training,small,Pop,[10],[10],Freeway
3,140,61,Album,54,training,small,Folk,[17],[17],Queen Of The Wires
4,141,60,Album,54,training,small,Folk,[17],[17],Ohio
...,...,...,...,...,...,...,...,...,...,...
24995,155297,22935,Album,24354,training,medium,Instrumental,"[18, 107, 1235]","[107, 18, 1235]",Nebula Reborn
24996,155298,22936,Album,22050,training,medium,Folk,"[17, 103]","[17, 103]",An Idiot Abroad
24997,155306,22936,Album,22050,training,medium,Folk,"[17, 103]","[17, 103]",Tiny Man
24998,155307,22937,Live Performance,7820,training,medium,Experimental,[1],"[1, 38]",Kolka


In [13]:
medium_data_train_sort

Unnamed: 0,track_id,album_id,album_type,artist_id,set_split,set_subset,track_genres_top,track_genre,track_genres_all
0,2,1,Album,1,training,small,Hip-Hop,[21],[21]
1,5,1,Album,1,training,small,Hip-Hop,[21],[21]
2,3,1,Album,1,training,medium,Hip-Hop,[21],[21]
3,134,1,Album,1,training,medium,Hip-Hop,[21],[21]
4,10666,1,Album,1,training,medium,Hip-Hop,[21],[21]
...,...,...,...,...,...,...,...,...,...
19917,155297,22935,Album,24354,training,medium,Instrumental,"[18, 107, 1235]","[107, 18, 1235]"
19918,155298,22936,Album,22050,training,medium,Folk,"[17, 103]","[17, 103]"
19919,155306,22936,Album,22050,training,medium,Folk,"[17, 103]","[17, 103]"
19920,155307,22937,Live Performance,7820,training,medium,Experimental,[1],"[1, 38]"


In [14]:
# Load metadata and features.
tracks = utils.load(RAW_DATA_PATH + '/fma_metadata/tracks.csv')
genres = utils.load(RAW_DATA_PATH + '/fma_metadata/genres.csv')
features = utils.load(RAW_DATA_PATH + '/fma_metadata/features.csv')
echonest = utils.load(RAW_DATA_PATH + '/fma_metadata/echonest.csv')

In [15]:
medium_data_train_sort.track_id

0             2
1             5
2             3
3           134
4         10666
          ...  
19917    155297
19918    155298
19919    155306
19920    155307
19921    155314
Name: track_id, Length: 19922, dtype: int64

In [16]:
small = tracks['set', 'subset'] <= 'medium'

train = tracks['set', 'split'] == 'training'
small = tracks['set', 'subset'] <= 'medium'

train = tracks['set', 'split'] == 'training'
val = tracks['set', 'split'] == 'validation'
test = tracks['set', 'split'] == 'test'

y_train = tracks.loc[medium_data_train_sort.track_id, ('track', 'genre_top')]
y_val = tracks.loc[medium_data_val_sort.track_id, ('track', 'genre_top')]
y_test = tracks.loc[medium_data_test_sort.track_id, ('track', 'genre_top')]
X_train = features.loc[medium_data_train_sort.track_id, 'mfcc']
X_val= features.loc[medium_data_val_sort.track_id, 'mfcc']
X_test = features.loc[medium_data_test_sort.track_id, 'mfcc']

print('{} training examples, {} testing examples'.format(y_train.size, y_test.size))
print('{} features, {} classes'.format(X_train.shape[1], np.unique(y_train).size))

19922 training examples, 2573 testing examples
140 features, 16 classes


In [17]:
from sklearn.neural_network import MLPClassifier

# Be sure training samples are shuffled.
X_train, y_train = skl.utils.shuffle(X_train, y_train, random_state=42)


X_train_np = np.array(X_train).astype('float32')
X_test_np = np.array(X_test).astype('float32')
X_val_np = np.array(X_val).astype('float32')

y_train_np = np.argmax(pd.get_dummies(y_train).to_numpy(), axis=1)
y_test_np = np.argmax(pd.get_dummies(y_test).to_numpy(), axis = 1)
y_val_np = np.argmax(pd.get_dummies(y_val).to_numpy(), axis = 1)

# Standardize features by removing the mean and scaling to unit variance.
scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(X_train)
scaler.transform(X_test)

# Support vector classification.




array([[ 2.68300051,  0.75385087,  1.75008045, ..., -0.39059965,
        -0.66829634, -0.62860366],
       [-0.0364069 , -0.41132161, -0.34856613, ...,  0.37020135,
         0.28991045,  0.91080142],
       [-0.09969836, -0.48945548,  0.04735517, ...,  0.17109902,
         0.14077713,  0.01436047],
       ...,
       [-0.21710572, -0.61654383, -0.19618588, ...,  0.55899703,
         1.18222817,  0.72873298],
       [-0.40553948, -0.38936326, -0.36861813, ..., -0.29028054,
        -0.16343454, -0.19068153],
       [-0.27282894, -0.56695434, -0.14396324, ..., -1.12420629,
        -0.09533861, -0.44360725]])

In [18]:
# clf = MLPClassifier()
# clf.fit(X_train_np, y_train_np)
# score = clf.score(X_test_np, y_test_np)
# print('Accuracy: {:.2%}'.format(score))

# Train

In [19]:
from kddirkit.networks.encoders import SentenceEncoder
from kddirkit.networks.models import BaselineModel
from kddirkit.config import *
from kddirkit.dataloaders import LoadFMA, LoadHierData
from kddirkit.frameworks import Trainer
from kddirkit.losses.FocalLoss import FocalLoss

parser = Parser(ROOT_PATH + "/data/config", "HMAN")
oneParser = parser.oneParser
args, _ = oneParser.parse_known_args(args=[])
args = vars(args)

In [20]:
use_cuda = not args['no_cuda'] and torch.cuda.is_available()

torch.manual_seed(args['seed'])

device = torch.device("cuda" if use_cuda else "cpu")

HierDataLoader = LoadHierData.HierDataLoader(workdir=os.getcwd(), pattern='default', device=device)
genre_levels_Tensor = HierDataLoader.genre_levels_Tensor.to(device)
genre_level_layer = HierDataLoader.genre_level_layer

trainDataLoader = LoadFMA.FMATrainDataLoader(device=device)
testDataLoader = LoadFMA.FMATestDataLoader(mode="pr", device=device)

In [21]:
from torch.utils.data import Dataset,DataLoader,TensorDataset
from torch import optim
import torch.nn.functional as F
class argparse():
    pass
args = argparse()
args.epochs, args.learning_rate, args.patience = [10000, 0.001, 4]
# args.hidden_size, args.input_size= [40, 30]
args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

X_train_tensor = torch.Tensor(X_train_np)
X_test_tensor = torch.Tensor(X_test_np)
X_val_tensor = torch.Tensor(X_val_np)

# y_train_tensor = torch.LongTensor(pd.get_dummies(y_train).to_numpy()).to(args.device)
# y_test_tensor = torch.LongTensor(pd.get_dummies(y_test).to_numpy()).to(args.device)
# y_val_tensor = torch.LongTensor(pd.get_dummies(y_val).to_numpy()).to(args.device)

y_train_tensor = np.zeros((len(y_train_np) , 16))  # 相当于 做了一个onehot_dict
y_train_tensor[np.arange(len(y_train_np) ), y_train_np] = 1  # 为onehot_dict 赋值
y_train_tensor = torch.LongTensor(y_train_tensor)
y_test_tensor = np.zeros((len(y_test_np) , 16))  # 相当于 做了一个onehot_dict
y_test_tensor[np.arange(len(y_test_np) ), y_test_np] = 1  # 为onehot_dict 赋值
y_test_tensor = torch.LongTensor(y_test_tensor)

y_val_tensor = np.zeros((len(y_val_np) , 16))  # 相当于 做了一个onehot_dict
y_val_tensor[np.arange(len(y_val_np) ), y_val_np] = 1  # 为onehot_dict 赋值
y_val_tensor = torch.LongTensor(y_val_tensor)


train_dataset=TensorDataset(X_train_tensor,y_train_tensor)
test_dataset=TensorDataset(X_test_tensor,y_test_tensor)
val_dataset=TensorDataset(X_val_tensor,y_val_tensor)

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=1000 , shuffle=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=1000, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader(dataset=val_dataset,batch_size=1000, shuffle=True)

In [22]:
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x1f7ae8b3a30>

In [23]:
class BaseAttentionNetwork(nn.Module):
    def __init__(self, sentence_encoder, genre_levels, genre_level_layer, keep_prob,
                 train_batch_size=None, test_batch_size=262,  num_classes=16, device = "cuda:0"):
        '''
        Pay Attention!
        relation_matrix:
        id = 0: virtual node
        id = 1: NA node
        '''
        super(BaseAttentionNetwork, self).__init__()
        self.keep_prob = keep_prob
        self.genre_levels = genre_levels
        self.hidden_size = sentence_encoder.hidden_size
        self.sentence_encoder = sentence_encoder

        self.train_batch_size = train_batch_size
        self.test_batch_size = test_batch_size

        self.hier = genre_levels.shape[1]
        self.layer = genre_level_layer
        self.num_classes = num_classes
        self.genre_matrixs = []
        self.device = device

        self.discrimitive_matrix = nn.Parameter(torch.Tensor(num_classes, self.hidden_size))
        self.bias = torch.nn.Parameter(torch.Tensor(num_classes))
        self.drop = nn.Dropout(1 - self.keep_prob)
        self.long_tail = []
        self.normal_body = []
        self.short_head = []

    def reset_parameters(self):
        NotImplemented
    def forward(self):
        NotImplemented
    def forward_infer(self):
        NotImplemented
    def query_func(self, attention_weight, label):
        NotImplemented

In [33]:
class AttentionNetwork16(BaseAttentionNetwork):
    def __init__(self, sentence_encoder, genre_levels, genre_level_layer, keep_prob,
                 train_batch_size=None, test_batch_size=262, num_classes=16, device = "cuda:0"):
        '''
        Pay Attention!
        relation_matrix:
        id = 0: virtual node
        id = 1: NA node
        '''
        super(AttentionNetwork16, self).__init__(sentence_encoder = sentence_encoder,
                                                genre_levels= genre_levels,
                                                genre_level_layer= genre_level_layer,
                                                keep_prob = keep_prob,
                                                train_batch_size=train_batch_size,
                                                test_batch_size=test_batch_size,
                                                num_classes=num_classes,
                                                device = device)
        self.discrimitive_matrix = nn.Parameter(torch.Tensor(num_classes, self.hidden_size ))
        self.reset_parameters()

    def reset_parameters(self):
        for i in range(self.hier):
            self.genre_matrixs.append(nn.Embedding(self.layer[i],  self.hidden_size, _weight=nn.init.xavier_uniform_(
                torch.Tensor(self.layer[i],self.hidden_size ))).to(self.device))
            # self.genre_matrixs.append(torch.nn.Parameter(nn.init.xavier_uniform_(
            #     torch.Tensor(self.layer[i],self.hidden_size, self.hidden_size))).to(self.device))
        nn.init.xavier_uniform_(self.discrimitive_matrix)
        nn.init.zeros_(self.bias)

    def forward(self, data):
        # data 包含word, pos1, pos2, mask, label, scope
        x = sentence_encoder(data['mel'])

        label_layer = self.genre_levels[data['label_index']]
        attention_logits = []

        if self.num_classes == '16':
            current_relation = self.genre_matrixs[0](label_layer[:, 0])  # batch * 230
        else:
            current_relation = self.genre_matrixs[1](label_layer[:, 1])  # batch * 230

        # print(label_layer[:, 0])
        # print(current_relation.shape, x.shape)
        attention_logits.append(torch.sum(current_relation * x, 1))  # batch*230 x batch*230
        #             print("torch.sum(current_relation * x, 1):" ,torch.sum(current_relation * x, 1).shape)

        attention_logits_stack = torch.stack(attention_logits)  # 将一个batch的结果堆叠起来   2 * batch_size
        #         print("attention_logits stack shape:", attention_logits_stack.shape)

        attention_score_hidden = torch.cat([
            F.softmax(attention_logits_stack[:, data['scope'][i]:data['scope'][i + 1]], dim = -1) for i in
            range(self.train_batch_size)], 1)  ###这段出了问题

        tower_repre = []
        for i in range(self.train_batch_size):
            sen_matrix = x[data['scope'][i]:data['scope'][i + 1]]
            #             print("sen_matrix shape: ",sen_matrix.shape)
            layer_score = attention_score_hidden[:,
                          data['scope'][i]:data['scope'][i + 1]]  # 查找Layer_score #(2 ,bag_size)
            #             print("layer_score shape: ",layer_score.shape)
            layer_repre = torch.reshape(torch.tanh(layer_score @ sen_matrix), [-1])  # 获得层次化表示表示  # 2 * batch_size @ batch_size *230
            #             print("layer_score @ sen_matrix shape: ", (layer_score @ sen_matrix).shape) #(2, 230)
            tower_repre.append(layer_repre)  # 获得每个句子的表示  append (-1, 460)
        #             print("layer_repre shape: ", (layer_repre).shape)

        stack_repre = self.drop(torch.stack(tower_repre))  # 获得新的表示
        logits = stack_repre @ self.discrimitive_matrix.t() + self.bias  # sen_num * 230 matmul 230 * 53 + 53
        return logits

    def forward_infer(self, data):
        x = sentence_encoder(data['mel'])

        test_attention_scores = []
        if self.num_classes == 16:
            current_relation = self.genre_matrixs[0](self.genre_levels[:16, 0])  # batch * 230
        else:
            current_relation = self.genre_matrixs[1](self.genre_levels[:, 1])  # batch * 230

        # current_relation = self.genre_matrixs[0](self.genre_levels[:, 0])  # 53 * 230
        current_logit = current_relation @ x.t()  # 53 * batch_size
        current_score = torch.cat([F.softmax(current_logit[:, data['scope'][j]:data['scope'][j + 1]], dim = -1) for j in
                                   range(self.test_batch_size)], 1)  ##得到每一个袋子的attention_score
        test_attention_scores.append(
            current_score)  # curr_relation_num * batch_size   堆叠起来，形成两个高度不同的attention_score: 53 * batch_size


        test_attention_scores_stack = torch.stack(test_attention_scores, 1)  # 将一个batch的结果堆叠起来: 53* 2* batch_size
        #         print("attention_logits stack shape:", attention_logits_stack.shape)

        test_tower_output = []
        for i in range(self.test_batch_size):
            test_sen_matrix = (torch.unsqueeze(x[data['scope'][i]:data['scope'][i + 1]], 0)).repeat(self.num_classes, 1,
                                                                                                    1)  # 先将 x[data['scope']] 扩充维度形成  53 * bag_size * 230， 然后在第一维重复53次
            #             print("sen_matrix shape: ",sen_matrix.shape)
            test_layer_score = test_attention_scores_stack[:, :,
                               data['scope'][i]:data['scope'][i + 1]]  # 查找Layer_score #(53, 2 ,bag_size)
            #             print("layer_score shape: ",layer_score.shape)
            test_layer_repre = torch.reshape(torch.tanh(test_layer_score @ test_sen_matrix), [self.num_classes,
                                                                                  -1])  # 获得层次化表示表示  # 53 * 2 * bag_size @ 53* bag_size *230, 53* 2 *230 ,reshape 53 *460
            #             print("layer_score @ sen_matrix shape: ", (layer_score @ sen_matrix).shape) #(2, 230) (r_(h, t)^1), (r_(h, t)^2)
            #             print("layer_repre shape: ", (layer_repre).shape)
            test_logits = test_layer_repre @ self.discrimitive_matrix.t() + self.bias  # 53 * 460 @ 460 * 53 +   (53, )
            test_output = torch.diagonal(F.softmax(test_logits, dim = -1))
            test_tower_output.append(test_output)

        test_stack_output = torch.reshape(torch.stack(test_tower_output), [self.test_batch_size, self.num_classes])
        return test_stack_output

In [34]:
class MLP(nn.Module):
    def __init__(self, num_units, out_units, dropout=0.1):
        super(MLP, self).__init__()
        self.num_units = num_units
        self.linear_1 = nn.Linear(140, 100)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(100, out_units)
        self.hidden_size =out_units

    def forward(self, x):

        x = self.linear_1(x)
        x = F.relu(x)
        x = self.linear_2(x)
        x = F.softmax(x, dim=-1)

        return x

In [35]:
"""@nni.variable(nni.choice(50, 100, 160), name=training_batch_size)"""
training_batch_size = 1000

"""@nni.variable(nni.choice(0.2, 0.3, 0.1), name=learning_rate)"""
learning_rate = 0.2

weight_decay = 0.0001

keep_prob = 0.5

hidden_size = 140

pattern = "default"

num_classes = 16

HierDataLoader = LoadHierData.HierDataLoader(workdir=os.getcwd(), pattern=pattern, device=device)
genre_levels_Tensor = HierDataLoader.genre_levels_Tensor.to(device)
genre_level_layer = HierDataLoader.genre_level_layer

feature_mode = 'mel_128_128'
if num_classes == 16:
    trainDataLoader = LoadFMA.FMATrainDataLoader(use_label = 'top', feature_mode = feature_mode, device=device)
    testDataLoader = LoadFMA.FMATestDataLoader(mode="pr", use_label = 'top' , feature_mode = feature_mode, device=device)
else:
    trainDataLoader = LoadFMA.FMATrainDataLoader(use_label = 'bottom', feature_mode = feature_mode, device=device)
    testDataLoader = LoadFMA.FMATestDataLoader(mode="pr", use_label = 'bottom' , feature_mode = feature_mode, device=device)

print('Data Loaded')

"""@nni.variable(nni.choice("cnn", "crnn"), name=encoderName)"""
encoderName = "MLP"

if encoderName == "cnn":
    sentence_encoder = SentenceEncoder.MusicCnnEncoder(36, 1360, hidden_size)
elif encoderName == "crnn":
    sentence_encoder = SentenceEncoder.MusicCrnnModel(36, 1360, hidden_size)
else:
    sentence_encoder = MLP(140, 50)

modelName = "AttentionNetwork16"

if modelName == 'AttentionNetwork16':
    model = AttentionNetwork16(sentence_encoder = sentence_encoder,
                                            genre_levels=genre_levels_Tensor,
                                            genre_level_layer=genre_level_layer,
                                            keep_prob=keep_prob,
                                            train_batch_size=training_batch_size,
                                            test_batch_size=263,
                                            num_classes=16,
                                            device=device).to(device)
elif modelName == 'CNN':
    model = CNNModel.CnnModel()

criterionName = "cross_entropy"

if criterionName == "cross_entropy":
    criterion = nn.CrossEntropyLoss().to(device)

parameters_to_optimize = filter(lambda x: x.requires_grad, model.parameters())
optimizer = optim.SGD(parameters_to_optimize,
                      learning_rate,
                      weight_decay=weight_decay)


Data Loaded


In [36]:
genre_levels_Tensor

tensor([[ 0,  0],
        [ 1,  1],
        [ 2,  2],
        [ 3,  3],
        [ 4,  4],
        [ 5,  5],
        [ 6,  6],
        [ 7,  7],
        [ 8,  8],
        [ 9,  9],
        [10, 10],
        [11, 11],
        [12, 12],
        [13, 13],
        [14, 14],
        [15, 15],
        [ 3, 16],
        [14, 17],
        [ 3, 18],
        [ 0, 19],
        [ 0, 20],
        [ 0, 21],
        [ 3, 22],
        [ 0, 23],
        [ 2, 24],
        [ 0, 25],
        [11, 26],
        [ 3, 27],
        [14, 28],
        [ 3, 29],
        [ 0, 30],
        [ 0, 31],
        [12, 32],
        [ 0, 33],
        [ 0, 34],
        [ 0, 35],
        [ 5, 36],
        [ 1, 37],
        [ 0, 38],
        [ 0, 39],
        [ 2, 40],
        [ 0, 41],
        [ 2, 42],
        [ 0, 43],
        [ 0, 44],
        [ 1, 45],
        [12, 46],
        [14, 47],
        [ 0, 48],
        [12, 49],
        [ 7, 50],
        [ 7, 51],
        [ 7, 52],
        [ 7, 53],
        [ 3, 54],
        [1

In [None]:
accuracy_list = []
for e in range(args.epochs):
    model.train()

    train_instance_scope = medium_train_instance_scope
    train_order = list(range(len(train_instance_scope)))


    # if(self.epoch == 0):
    #     torch.save(self.model, self.args['model_dir'] + self.args['model'] + '-iter-' + str(0) + '.pkl')  # save entire net
    #     torch.save(self.model.state_dict(), self.args['model_dir'] +  self.args['model'] + '-iter-' + str(0) + '.pkl')  # save o
    np.random.shuffle(train_order)  # 打乱训练集
    s1 = 0.0
    s2 = 0.0
    tot1 = 0.0
    tot2 = 0.0
    loss_sum = 0.0
    accuracy_sum = 0.0
    step_sum = 0.0
    for i in range(int(len(train_order) / float(training_batch_size ))):
        input_scope = np.take(train_instance_scope, train_order[i * training_batch_size :(i + 1) * training_batch_size ],  axis=0)
        index = []
        scope = [0]
        label = []
        for num in input_scope:
            index = index + list(range(num[0], num[1] + 1))  # 放入每一个袋子的起点和重点的每个序号
            label.append(medium_train_label_transform[num[0]])  # 这个是每个袋子的第一个数的序号的标签
            scope.append(scope[len(scope) - 1] + num[1] - num[
                0] + 1)  # 这个存的是，当前的scope 类似于[0,1,2, , 19, 27, 35]， 代表着每一个袋子包含的实例的个数
        #         print("index length:", len(index), "label length", len(label), "scope length", len(scope), "label_", len(label_))
        label_ = np.zeros((training_batch_size , 16))  # 相当于 做了一个onehot_dict
        label_[np.arange(training_batch_size ), label] = 1  # 为onehot_dict 赋值
        #         output, losses, correct_predictions = train_step(train_word[index,:], train_pos1[index,:], train_pos2[index,:],
        #             train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope))

        feed_dict = {
            'mel': X_train_tensor[index, :].to(args.device),
            'label_index': torch.argmax(y_train_tensor[index], dim =1).to(args.device),
            'label_': torch.LongTensor(label_).to(args.device),  # 可以不用
            'scope': scope
        }


        logits = model(feed_dict)
        output = F.softmax(logits, dim = -1)
        optimizer.zero_grad()
        loss = criterion(logits, torch.LongTensor(label).to(args.device))  # 计算损失值
        # loss = self.criterion(logits, torch.LongTensor(label_).to(self._device))  # 计算损失值
        loss.backward()  # 反向传播计算参数的梯度
        optimizer.step()  # 使用优化方法进行梯度更新
        predictions = torch.argmax(logits, 1)
        # print(logits)
        correct_predictions = torch.eq(predictions, torch.argmax(torch.LongTensor(label_).to(args.device), 1))
        accuracy = torch.mean(correct_predictions.float())

        loss_sum += loss.item()  # 更新 losses
        step_sum += 1.0  # 更新一个batch 中的step_sum， 疑似等于 i， 这个可能用在外循环上
        accuracy_sum += accuracy
        step_sum += 0
        time_str = datetime.datetime.now().isoformat().replace('T', ' ')
        temp_str = ' time {0:26} | losses : {1:1.8f} | accuracy: {2:1.6f} \r'.format(
            time_str, loss.item(),
            accuracy)
        sys.stdout.write(temp_str)
        sys.stdout.flush()
        # print(temp_str)


    accuracy_sum = accuracy_sum /step_sum
    losses = loss_sum / step_sum

    # temp_str = 'epoch {0:0>3}/{1:0>3} step {2:0>4} time {3:26} | losses : {4:1.8f} | NA accuracy: {5:1.6f} | not NA accuracy: {6:1.6f}\r'.format(
    #     self.args['restore_epoch'] + self.epoch + 1, self.args['restore_epoch'] + self.args['max_epoch'], i,
    #     time_str, losses,
    #     na_acc, not_na_acc)
    time_str = datetime.datetime.now().isoformat().replace('T', ' ')
    temp_str = ' time {0:26} | losses : {1:1.8f} | accuracy: {2:1.6f} \r'.format(
        time_str, loss.item(),
        accuracy_sum)
    print(temp_str)
    accuracy_list.append(accuracy_sum)


    #     current_step = tf.train.global_step(sess, global_step) #tensorflow 版本的模型保存
    # if (self._epoch + 1) % self.args['save_epoch'] == 0:  # 如果到达一个保存周期
    #     # 2 ways to save the net
    #     torch.save(self.model, self.args['model_dir'] + self.args['model'] + '-epoch-' + str(self.epoch+1) + '.pkl')  # save entire net
    #     torch.save(self.model.state_dict(), self.args['model_dir'] + self.args['model'] + '-epoch-' + str(self.epoch+1) + '.pkl')  # save o

 time 2022-04-13 09:51:41.720598 | losses : 2.58683658 | accuracy: 0.199667 
 time 2022-04-13 09:51:43.546506 | losses : 2.49637198 | accuracy: 0.257167 
 time 2022-04-13 09:51:45.376314 | losses : 2.39335203 | accuracy: 0.255500 
 time 2022-04-13 09:51:47.211450 | losses : 2.36433458 | accuracy: 0.257333 
 time 2022-04-13 09:51:49.017743 | losses : 2.33180594 | accuracy: 0.255333 
 time 2022-04-13 09:51:50.817542 | losses : 2.29049158 | accuracy: 0.256333 
 time 2022-04-13 09:51:52.629129 | losses : 2.30289412 | accuracy: 0.255000 
 time 2022-04-13 09:51:54.406642 | losses : 2.24069977 | accuracy: 0.257167 
 time 2022-04-13 09:51:56.252067 | losses : 2.25721526 | accuracy: 0.256333 
 time 2022-04-13 09:51:58.039536 | losses : 2.21505713 | accuracy: 0.253000 
 time 2022-04-13 09:51:59.841221 | losses : 2.26303077 | accuracy: 0.256500 
 time 2022-04-13 09:52:01.643736 | losses : 2.23578072 | accuracy: 0.255833 
 time 2022-04-13 09:52:03.406576 | losses : 2.19911170 | accurac

In [None]:
model.eval()
with torch.no_grad():
    stack_output = []  # stack_out 干什么的
    accuracy_sum = 0.0
    step_sum = 0.0

    iteration = len(self.test_instance_scope) // self.args['testing_batch_size']
    for i in range(iteration):  # 循环迭代次数
        input_scope = self.test_instance_scope[i * self.args['testing_batch_size']:(i + 1) * self.args['testing_batch_size']]
        index = []
        scope = [0]
        label = []
        for num in input_scope:
            index = index + list(range(num[0], num[1] + 1))
            label.append(self.FMATestDataLoader.label[num[0]])
            scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1)
        label_ = np.zeros((self.args['testing_batch_size'], self.args['num_classes']))
        label_[np.arange(self.args['testing_batch_size']), label] = 1
        feed_dict = {
            # 'word': self.FMATestDataLoader.word_Tensor[index, :],
            # 'pos1': self.FMATestDataLoader.pos1_Tensor[index, :],
            # 'pos2': self.FMATestDataLoader.pos2_Tensor[index, :],
            # 'mask': self.FMATestDataLoader.mask_Tensor[index, :],
            'mel': torch.FloatTensor(self.FMATrainDataLoader.feature[index, :]).to(self._device),
            'label_index': self.FMATrainDataLoader.label_Tensor[index],
            # 'label_': torch.LongTensor(label_).to(self._device),  #
            # 'len': self.FMATestDataLoader.len_Tensor[index],
            'label_': label_,  # 可以不用
            'scope': scope
        }
        output_Tensor = self.model.forward_infer(feed_dict)
        output = output_Tensor.cpu().numpy()
        stack_output.append(output)  # 将输出，拼接到输出stack里边
        predictions = torch.argmax(output_Tensor, 1)
        # print(logits)
        correct_predictions = torch.eq(predictions,
                                       torch.argmax(torch.LongTensor(label_).to(self._device), 1))
        accuracy = torch.mean(correct_predictions.float())
        accuracy_sum += accuracy
        step_sum += 1.0  # 更新一个batch 中的step_sum， 疑似等于 i， 这个可能用在外循环上

    accuracy_ave = accuracy_sum / step_sum

    stack_output = np.concatenate(stack_output, axis=0)  # 拼接输出
    exclude_na_output = stack_output[:, 0:]  # 拼接从排除NA列的输出
    exclude_na_flatten_output = np.reshape(stack_output[:, 0:], (-1))  # 重置stack_output的维度

    auc = average_precision_score(self.exclude_na_flatten_label, exclude_na_flatten_output)
    mi_ma_100 = self.evalMetric.mi_ma_100(exclude_na_output)
    mi_ma_200 = self.evalMetric.mi_ma_200(exclude_na_output)
    pr = self.evalMetric.pr(exclude_na_output, exclude_na_flatten_output)
    return accuracy_ave.cpu().numpy(), auc, mi_ma_100, mi_ma_200, pr, exclude_na_flatten_output

In [None]:
model.eval()
predictions = model(X_test_tensor.to(args.device))
correct_predictions = torch.eq(torch.argmax(predictions, axis= 1), torch.tensor(y_test_np).to(args.device))
accuracy = torch.mean(correct_predictions.float())
accuracy_list.append(accuracy.cpu().numpy())
print("epoch={}/{},{}/{} of train, loss={}, training accuracy = {}, testing accuracy = {}".format(
epoch, args.epochs, idx, len(train_dataloader),loss.item(), accuracy_sum/step, accuracy))