# 准备数据集

In [1]:
from torch.utils.data import Dataset
import torch
import numpy as np
class MUStartDataset(Dataset):
    def __init__(self,mode = 'train',feature_path = './featuresIndepResnet152.pkl'):
        with open(feature_path,'rb') as f:
            import pickle
            data = pickle.load(f)
        self.feature_dict = data[mode]
        # print(self.feature_dict['labels'][:10])
        # [-1,1] -> [0,1]
        self.feature_dict['labels'] = ((self.feature_dict['labels'] + 1)/2).astype(np.int64)
        # print(self.feature_dict['labels'][:10])
        # self.feature_dict['labels'] = np.expand_dims(self.feature_dict['labels'], axis=-1)
        # print('init ', mode, 'dataset ', self.feature_dict.keys())
    def __getitem__(self,index):
        feature ={}
        feature['audio_feature'] = self.feature_dict['audio_feature'][index]
        feature['video_features_p'] = self.feature_dict['video_features_p'][index]
        feature['bert_indices'] = self.feature_dict['bert_indices'][index]
        feature['box_pad_indices'] = self.feature_dict['box_pad_indices'][index]
        feature['big_graphs'] = self.feature_dict['big_graphs'][index]
        feature['labels'] = self.feature_dict['labels'][index]
        
        return feature
    def __len__(self):
        labels = self.feature_dict['labels']
        length = labels.shape[0]
        return length
    
    def get_sample_shape(self,index):
        shape_dict = {}
        shape_dict['audio_feature'] = self.feature_dict['audio_feature'][index].shape
        shape_dict['video_features_p'] = self.feature_dict['video_features_p'][index].shape
        shape_dict['bert_indices'] = self.feature_dict['bert_indices'][index].shape
        shape_dict['box_pad_indices'] = self.feature_dict['box_pad_indices'][index].shape
        shape_dict['big_graphs'] = self.feature_dict['big_graphs'][index].shape
        # shape_dict['labels'] = self.feature_dict['labels'][index].shape
        shape_dict['labels'] = type(self.feature_dict['labels'][index])
        return shape_dict
        
d = MUStartDataset('valid')
d.get_sample_shape(0)

{'audio_feature': (33, 33),
 'video_features_p': (10, 768),
 'bert_indices': (24,),
 'box_pad_indices': (10, 3),
 'big_graphs': (34, 34),
 'labels': numpy.int64}

In [2]:
from torch.utils.data import DataLoader
dl = DataLoader(d, batch_size=2, num_workers=0, shuffle=False)
for batch in dl:
    print(batch.keys())
    for key in batch.keys():
        print(key, batch[key].shape)

dict_keys(['audio_feature', 'video_features_p', 'bert_indices', 'box_pad_indices', 'big_graphs', 'labels'])
audio_feature torch.Size([2, 33, 33])
video_features_p torch.Size([2, 10, 768])
bert_indices torch.Size([2, 24])
box_pad_indices torch.Size([2, 10, 3])
big_graphs torch.Size([2, 34, 34])
labels torch.Size([2])


In [3]:
import torch
use_cuda = torch.cuda.is_available()
use_cuda
device = torch.device('cuda:0') if use_cuda else torch.device('cpu')
device

device(type='cpu')

In [4]:
import torch.nn as nn
from transformers import BertModel
from layers.dynamic_rnn import DynamicLSTM
import torch.nn.functional as F

class GraphConvolution(nn.Module):
    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
        if bias :
            self.bias = nn.Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias',None)
        
    def forward(self, text, adj):
        hidden = torch.matmul(text,self.weight)
        
        denom = torch.sum(adj,dim=2,keepdim=True) + 1
        output = torch.matmul(adj, hidden.float())/denom
        if self.bias is not None:
            output = output + self.bias

        return output

class CMGCN(nn.Module):
    def __init__(self):
        super(CMGCN, self).__init__()
        print('create CMGCN model')
        self.bert = BertModel.from_pretrained('./bert-base-uncased/')
        self.text_lstm = DynamicLSTM(768,4,num_layers=1,batch_first=True,bidirectional=True)
        self.vit_fc = nn.Linear(768,2*4)
        self.gc1 = GraphConvolution(2*4, 2*4)
        self.gc2 = GraphConvolution(2*4, 2*4)
        self.fc = nn.Linear(2*4,2)
        
    def forward(self, inputs):
        bert_indices = inputs['bert_indices']
        graph = inputs['big_graphs']
        box_vit = inputs['video_features_p']
        bert_text_len = torch.sum(bert_indices != 0, dim = -1)
        outputs = self.bert(bert_indices)
        encoder_layer = outputs.last_hidden_state
        pooled_output = outputs.pooler_output
        
        text_out, (_, _) = self.text_lstm(encoder_layer, bert_text_len)
        # 与原始代码不同，这里因为进行了全局的特征填充，导致text_out可能无法达到填充长度，补充为0
        if text_out.shape[1] < encoder_layer.shape[1]:
            pad = torch.zeros((text_out.shape[0],encoder_layer.shape[1]-text_out.shape[1],text_out.shape[2]))
            text_out = torch.cat((text_out,pad),dim=1)

        box_vit = box_vit.float()
        box_vit = self.vit_fc(box_vit)
        features = torch.cat([text_out, box_vit], dim=1)

        graph = graph.float()
        x = F.relu(self.gc1(features, graph))
        x = F.relu(self.gc2(x,graph))
        
        alpha_mat = torch.matmul(features,x.transpose(1,2))
        alpha_mat = alpha_mat.sum(1, keepdim=True)
        alpha = F.softmax(alpha_mat, dim = 2)
        x = torch.matmul(alpha, x).squeeze(1)
        
        output = self.fc(x)
        return output
cmgcn_model = CMGCN().to(device)
len(list(cmgcn_model.named_parameters()))

create CMGCN model


Some weights of the model checkpoint at ./bert-base-uncased/ were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


215

In [5]:
type(cmgcn_model.bert.parameters())

generator

In [6]:
import torch
optimizer = torch.optim.Adam([
    {'params':cmgcn_model.bert.parameters(),'lr':2e-5},
    {'params':cmgcn_model.text_lstm.parameters(),},
    {'params':cmgcn_model.vit_fc.parameters(),},
    {'params':cmgcn_model.gc1.parameters(),},
    {'params':cmgcn_model.gc2.parameters(),},
    {'params':cmgcn_model.fc.parameters(),},
],lr=0.001,weight_decay=1e-5)
# optimizer = torch.optim.Adam(cmgcn_model.parameters(),lr=1e-3,weight_decay=1e-5)
# optimizer

# 初始化训练参数

In [7]:
def init_params():
    for child in cmgcn_model.children():
        # print(type(child) != BertModel)
        if type(child) != BertModel:
            for p in child.parameters():
                # print(type(child))
                # print(p.shape, p.requires_grad)
                if p.requires_grad :
                    # print(len(p.shape))
                    if len(p.shape) > 1:
                        torch.nn.init.xavier_uniform_(p)
                        # print(p[0][:2])
                    else:
                        import math
                        stdv = 1.0 / math.sqrt(p.shape[0])
                        torch.nn.init.uniform_(p, a=-stdv, b=stdv)
                        # print('else', p[:2])
    print('init_params()')
                    
init_params()    

init_params()


# 训练

In [36]:
num_epoch = 1
cmgcn_model_path = 'cmgcn_model.pth'

# def train():
print('start train:' + '-'*10)
train_dataset = MUStartDataset(mode='train')
valid_dataset = MUStartDataset(mode='valid')
test_dataset = MUStartDataset(mode='test')
train_dataloader = DataLoader(train_dataset,batch_size=2,num_workers=0,shuffle=False)
valid_dataloader = DataLoader(valid_dataset,batch_size=2,num_workers=0,shuffle=False)
test_dataloader = DataLoader(test_dataset,batch_size=2,num_workers=0,shuffle=False)

def evaluate_acc_f1(data_loader):
    n_correct, n_total = 0, 0
    targets_all, outputs_all = None, None
    cmgcn_model.eval()
    with torch.no_grad():
        for i_batch,batch in enumerate(data_loader):
            inputs ={}
            for key in batch.keys():
                inputs[key] = batch[key].to(device)
            outputs = cmgcn_model(inputs)
            targets = batch['labels'].to(device)
            
            n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
            n_total += len(outputs)
            
            if targets_all is None:
                targets_all = targets
                outputs_all = outputs
            else:
                targets_all = torch.cat((targets_all,targets), dim=0)
                outputs_all = torch.cat((outputs_all,outputs), dim=0)
    
    # if macro :
    from sklearn import metrics
    acc = n_correct / n_total
    f1 = metrics.f1_score(targets_all.cpu(), torch.argmax(outputs_all,-1).cpu(), labels=[0,1], average='macro', zero_division=0)
    precision = metrics.precision_score(targets_all.cpu(), torch.argmax(outputs_all,-1).cpu(), labels=[0,1], average='macro', zero_division=0)
    recall = metrics.recall_score(targets_all.cpu(), torch.argmax(outputs_all,-1).cpu(), labels=[0,1], average='macro', zero_division=0)

    return acc,f1,precision,recall

max_val_acc , max_val_f1, max_val_epoch, global_step = 0, 0, 0, 0
for i_epoch in range(num_epoch):
    print('i_epoch:', i_epoch)
    n_correct, n_total, loss_total = 0, 0, 0
    for i_batch,batch in enumerate(train_dataloader):
        global_step += 1
        cmgcn_model.train()
        optimizer.zero_grad()
        inputs ={}
        for key in batch.keys():
            inputs[key] = batch[key].to(device)
        # print(inputs.keys()) 
        # dict_keys(['audio_feature', 'video_features_p', 'bert_indices', 'box_pad_indices', 'big_graphs', 'labels'])
        outputs = cmgcn_model(inputs)
        targets = batch['labels'].to(device)

        criterion = nn.CrossEntropyLoss()
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
        n_total += len(outputs)
        loss_total += loss.item() * len(outputs)
        
        train_acc = n_correct / n_total
        train_loss = loss_total / n_total
        
        if global_step % 1 == 0:
            val_acc, val_f1, val_precision, val_recall = evaluate_acc_f1(valid_dataloader)
            if val_acc >= max_val_acc:
                max_val_f1 = val_f1
                max_val_acc = val_acc
                max_val_epoch = i_epoch
                torch.save(cmgcn_model.state_dict(),cmgcn_model_path)
                print('here save the model cmgcn_model.pth')
        
    if i_epoch - max_val_epoch >= 0:
        print('early stop')
        break
        
    break
cmgcn_model.load_state_dict(torch.load(cmgcn_model_path))
test_acc, test_f1,test_precision,test_recall = evaluate_acc_f1(test_dataloader)
# test_acc, test_f1,test_precision,test_recall = evaluate_acc_f1(test_data_loader)
print('test_acc:', test_acc)
print('test_f1:', test_f1)
print('test_precision', test_precision)
print('test_recall', test_recall)

# return 0
# train()

start train:----------
i_epoch: 0
here save the model cmgcn_model.pth
early stop
test_acc: 0.5
test_f1: 0.3333333333333333
test_precision 0.25
test_recall 0.5


In [38]:
!ls -lh {cmgcn_model_path}

18716.53s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


-rw-r--r-- 1 mac staff 418M 10 24 18:10 cmgcn_model.pth


# 调试形状