# CHR2+1D Model train

In [1]:
#基本的引入
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
#from tensorboardX import SummaryWriter
import matplotlib.pyplot as plt
import math
import numpy as np
import os
import cv2
import random
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from torchvision import transforms as T
import matplotlib.pyplot as plt # plt 用于显示图片

from tqdm import tqdm
import time

# 设置随机数种子

In [2]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
# 设置随机数种子
setup_seed(20)

# Transform设定：

In [4]:
transform_train = T.Compose([   
        T.Resize([224, 224]),
        #T.RandomRotation(degrees=5, expand=True),
       # T.CenterCrop([32,32]),
        #T.RandomHorizontalFlip(p=0.5),
       # T.RandomVerticalFlip(p=0.5),
        T.ToTensor(),
        T.Normalize(mean= [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        

transform_test = T.Compose([
        T.Resize([224, 224]),
        T.ToTensor(),
        T.Normalize(mean= [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])  

# 定义MyDataset

# 计算提取视频哪几帧

In [7]:
class MyDataset(Dataset):
    
    
    def __init__(self, file_path=None,data="train",transform=None):   
        """
        初始化自定义Dataset类的参数
        Attributes
            file_path: 字符串，数据集的存储路径，例如‘./UCF101/train’ 或 './UCF101/eval'等
            all_people:P0001-P0250演员
            classes  : 列表，每个元素为一个字符串，代表一个子类别，例如['dog', 'airplane', ...]等
            transform: 传入一个从torchvision.transforms定义的数据预处理
        """
        self.data=data
        self.sample_length= 8
        self.all_people=os.listdir(file_path)
        self.all_people.remove(".ipynb_checkpoints")#删除多于的文件夹
        self.classes = os.listdir(os.path.join(file_path,self.all_people[0]))
        self.cur_classes=["A","G","F","D","B"]
        #class这边要重新写，我只看一些classes
        self.transform = transform
        # 初始化给定文件夹下的所有数据
        self.init_all_data(file_path) 

        return None
        

    def init_all_data(self, file_path):
        """
        初始化该数据集内所有的图像及其对应的标签，保存在self.videos和self.labels两个列表内
        Attributes
            file_path: 字符串，数据集文件夹的存储路径
        """
        # 初始化两个列表，记录该数据集内每一张图片的完整路径及其对应的标签
        self.videos = []
        self.labels = []
        # 遍历所有的子类别，并得到每个子类别对应的文件夹路径
        total_num=0
        for people in self.all_people:
            total_num+=1
            if self.data=="train":
                if total_num<=2:
                    #当前人的所有数据
                    for idx, cls in enumerate(self.cur_classes):
                         #现在只训练5个classes
                        for level in range(8):
                            cls_path = os.path.join(file_path,people,cls,str(level))
                            cams="cam_1"#现在我只取cam_1的数据
                            cur_video = os.path.join(cls_path, cams,"color.avi")
                            if self.is_valid_video(cur_video ):
                                self.videos.append(cur_video )
                                self.labels.append(idx)
                else:
                    break
            elif self.data=="test":
                if total_num>248:
                    #当前人的所有数据
                    for idx, cls in enumerate(self.cur_classes):
                          #现在只训练5个classes
                        for level in range(8):
                            cls_path = os.path.join(file_path,people,cls,str(level))
                            cams="cam_1"#现在我只取cam_1的数据
                            cur_video = os.path.join(cls_path, cams,"color.avi")
                            if self.is_valid_video(cur_video ):
                                self.videos.append(cur_video )
                                self.labels.append(idx)
        return None

        
    def is_valid_video(self, video_path):
        """
        判断图片是否为可以打开的有效文件
        Attributes
            img_path: 字符串，待检测图片的存储路径
        Returns
            valid: 布尔变量，True/False分别表示该图片是否可以正常打开
        """
        try:
            # 若读取成功，设valid为True
            vc=cv2.VideoCapture(video_path)
            valid=vc.isOpened()   
        except:
            # 若读取失败，设valid为False
            valid = False
            
        return valid
        

    
   
    def __getitem__(self, idx):
        """
        按给定索引，获取对应的视频及其标签
        Attributes
            idx: int类型数字，表示目标图像的索引
        Returns
            frames: 一个打开的PIL.Image对象，是PIL库存储图像的一种数据格式（类似于OpenCV利用numpy张量存储图像）
            label: int类型，表示对应的类别，例如假设self.classes=['cat', 'dog', 'airplane']，则label=1代表‘dog'类别；
                   对于pytorch的分类，不需要特意将其变成onehot向量，因为crossentropy函数内置了这部分功能。
        """
        # 利用PIL.Image.open打开图片，并将其强制转化为RGB格式（防止数据集中混杂灰度图，导致读取出单通道图片，送入网络因矩阵维度不一致而报错）
        
        start = time.time()
        frames=[]#储存所有的frames
        cur_video=self.videos[idx]
        vc = cv2.VideoCapture(cur_video) #读入视频文件
        video_len = int(vc.get(cv2.CAP_PROP_FRAME_COUNT))  # 视频总帧数
        gap=video_len//16#计算gap
        rval=vc.isOpened()      #判断视频是否打开  返回True或Flase
        c = 1
        while rval and len(frames)<16:  # 读取视频帧当帧数满16就停止
            rval, frame = vc.read()  # videoCapture.read() 函数，第一个返回值为是否成功获取视频帧，第二个返回值为返回的视频帧：
            if rval:
#                 start = time.time()
                if (c%gap==0): # 如果c在我要读取的帧的集合里，就把它储存起来
                    frame=Image.fromarray(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))#转成PIL
                    frames.append(frame)
#                     end1=time.time()
#                     print("the time to get one frame is ")
#                     print(end1-start)
                c = c + 1
            else:
                break
        vc.release()  
        
        
        # 进行预处理的变换
        #tensor 化
        ts=time.time()
        frames=torch.stack(
                    [self.transform(c) for c in frames]
                )
        # [S, T, H, W, C] -> [S, C, T, H, W]
        frames=frames.permute(1,0,2,3)#相位变化
        # 获取对应的标签
        label = self.labels[idx]
        one_hot=np.zeros(5)
        one_hot[label]=1
        end = time.time()
#         print("我花在get item上的时间 ")
#         print (end-start)
        return frames, one_hot
   
    def __len__(self):
        """
        获取数据集中图像的总数，该方法的作用是用于DataLoader去调用，从而获取在给定Batch Size的情况下，一个Epoch的总长，
        从而可以在一个Epoch结束时实现shuffle数据集的功能
        """
        return len(self.videos)

# 核心model

### 借用他人训练好的参数

In [8]:
from models import r2plus1d_34_32_ig65m,r2plus1d_34_32_kinetics

In [9]:
import torch.nn as nn
import torch.optim as optim
MODELS = {
    # Model name followed by the number of output classes.
    "r2plus1d_34_32_ig65m": 359,
    "r2plus1d_34_32_kinetics": 400,
    "r2plus1d_34_8_ig65m": 487,
    "r2plus1d_34_8_kinetics": 400,
}
def init_model(
    sample_length: int, base_model: str, num_classes: int = None
) -> torchvision.models.video.resnet.VideoResNet:
    """
    Initializes the model by loading it using torch's `hub.load`
    functionality. Uses the model from TORCH_R2PLUS1D.

    Args:
        sample_length: Number of consecutive frames to sample from a video (i.e. clip length).
        base_model: the R2plus1D model is based on either ig65m or kinetics.
        num_classes: the number of classes/actions

    Returns:
        Load a model from a github repo, with pretrained weights
    """
    if base_model not in ("ig65m", "kinetics"):
        raise ValueError(
            f"Not supported model {base_model}. Should be 'ig65m' or 'kinetics'"
        )

    # Decide if to use pre-trained weights for DNN trained using 8 or for 32 frames
    model_name = f"r2plus1d_34_{sample_length}_{base_model}"

    print(f"Loading {model_name} model")
    ##这里之后要稍微加一些数据
    if model_name=="r2plus1d_34_32_ig65m":
        model= r2plus1d_34_32_ig65m(num_classes=MODELS[model_name], pretrained=True)
    else:
        model=r2plus1d_34_32_kinetics(num_classes=MODELS[model_name], pretrained=True)

    # Replace head
    if num_classes is not None:
        model.fc = nn.Linear(model.fc.in_features, num_classes)

    return model, model_name

## Train Model

In [20]:
#只用valid_loss
def train_model(model,device, n_epochs):
    
    # to track the training loss as the model trains
    train_losses = []
    train_loss_min=100
    train_correct_labels=[0,0,0,0,0]#后面可以改的
    train_actual_labels=[0,0,0,0,0]
    # to track the average training loss per epoch as the model trains
    avg_train_losses = []

    for epoch in range(1, n_epochs + 1):
 
        ###################
        # train the model #
        ###################
        model.train() # prep model for training
        correct_labels=[0,0,0,0,0]
        actual_labels=[0,0,0,0,0]
        predict_labels=[0,0,0,0,0]
        for step, (X, y) in enumerate(tqdm(train_loader)):
            X, y = X.to(device), y.type(torch.FloatTensor).to(device)  #,dtype=torch.int64
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(X)
            # calculate the loss
            loss = loss_func(output, y)
            #计算acc
            final_out=output.cpu().detach().numpy()
            labels=y.cpu()
            for i in range(len(labels)):
                actual_labels+=labels[i].numpy()
                predict_label=np.argmax(final_out[i])
                predict_labels[predict_label]+=1
#                 print(labels[i])
#                 print(predict_label)
#                 print(labels[i][predict_label])
                if labels[i][predict_label]==1:
#                     print("Correct!")
                    correct_labels[predict_label]+=1
                    train_correct_labels[predict_label]+=1
            train_actual_labels+=actual_labels
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # record training loss
            train_losses.append(loss.item())
        # print training/validation statistics 
        # calculate average loss over an epoch
        train_loss = np.average(train_losses)
        avg_train_losses.append(train_loss)
        cur_train_acc=np.average(np.array(correct_labels)/np.array(actual_labels))
       
        
        epoch_len = len(str(n_epochs))
        
        print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} '
                   + f'train_acc: {cur_train_acc:.5f}'
                    )
        
        print(print_msg)
        print("the outcome of the predicted lable for this epoch")
        print(predict_labels)
        print("the outcome of the correct lable for this epoch")
        print(correct_labels)
        
        if train_loss<train_loss_min:
            torch.save(model.state_dict(), 'checkpoint.pt')	# 这里会存储迄今最优模型的参数
            train_loss_min = train_loss
        # clear lists to track next epoch
        train_losses = []
#     # load the last checkpoint with the best model
    print("the final correct prediction of each class")
    print(train_correct_labels)
    print("Done!")
    train_acc=np.array(train_correct_labels)/np.array(train_actual_labels)
    return  model, avg_train_losses,train_acc

# 开始训练

## 设立model和device

In [11]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# create a new model with these weights
model, model_name= init_model( num_classes=5,sample_length=32,base_model="ig65m")
#model.load_state_dict(torch.load('checkpoint.pt'))
model=model.to(device)

Loading r2plus1d_34_32_ig65m model


In [12]:
lr = 0.001
optimizer=torch.optim.SGD(model.parameters(), lr=0.001, momentum=0, dampening=0, weight_decay=0, nesterov=False)
# optimizer=torch.optim.Adam(model.parameters(),lr=lr,weight_decay=1e-5)
loss_func = nn.BCELoss().to(device)

## 设立dataset和dataloader

In [13]:
train_data = MyDataset("KONE1-250",
                      data="train",
                       transform=transform_train)
# test_data = MyDataset("KONE1-250",
#                       data="test",
#                        transform=transform_test)

In [14]:
Num_workers=0
Batch_size=2

In [15]:
train_loader=DataLoader(dataset=train_data,batch_size=Batch_size,
                             shuffle=False, num_workers=Num_workers)
# test_loader=DataLoader(dataset=test_data,batch_size=Batch_size,
#                              shuffle=False, num_workers=Num_workers)

In [16]:
# model.load_state_dict(torch.load('checkpoint.pt'))
# for X,y in test_loader:
#     X, y = X.to(device), y.type(torch.FloatTensor).to(device) 
#     labels=y.cpu().tolist()
#     for i in labels[0]:
#         print(type(i))
#     output = model(X)
#     print(output.cpu().detach().numpy())
# #     print("sepereate of loss")
# #     for i in range(len(y[0])):
# #         print(loss_func(output[0][i], y[0][i]))
# #     print("total loss")
#     loss = loss_func(output, y)
#     print(loss)

## 训练

In [21]:
n_epochs=50
#optimizer=torch.optim.Adam(model.parameters(),lr=lr,weight_decay=1e-4)
model, train_loss,train_acc= train_model(model ,device, n_epochs)

100%|██████████| 40/40 [01:56<00:00,  2.90s/it]


[ 1/50] train_loss: 0.65117 train_acc: 0.20000
the outcome of the predicted lable for this epoch
[80, 0, 0, 0, 0]
the outcome of the correct lable for this epoch
[16, 0, 0, 0, 0]


100%|██████████| 40/40 [01:56<00:00,  2.92s/it]


[ 2/50] train_loss: 0.59068 train_acc: 0.20000
the outcome of the predicted lable for this epoch
[80, 0, 0, 0, 0]
the outcome of the correct lable for this epoch
[16, 0, 0, 0, 0]


100%|██████████| 40/40 [01:55<00:00,  2.90s/it]


[ 3/50] train_loss: 0.55288 train_acc: 0.20000
the outcome of the predicted lable for this epoch
[80, 0, 0, 0, 0]
the outcome of the correct lable for this epoch
[16, 0, 0, 0, 0]


100%|██████████| 40/40 [01:56<00:00,  2.92s/it]


[ 4/50] train_loss: 0.52984 train_acc: 0.20000
the outcome of the predicted lable for this epoch
[80, 0, 0, 0, 0]
the outcome of the correct lable for this epoch
[16, 0, 0, 0, 0]


100%|██████████| 40/40 [01:56<00:00,  2.91s/it]


[ 5/50] train_loss: 0.51582 train_acc: 0.18750
the outcome of the predicted lable for this epoch
[79, 0, 0, 1, 0]
the outcome of the correct lable for this epoch
[15, 0, 0, 0, 0]


100%|██████████| 40/40 [01:56<00:00,  2.92s/it]


[ 6/50] train_loss: 0.50706 train_acc: 0.20000
the outcome of the predicted lable for this epoch
[71, 0, 0, 9, 0]
the outcome of the correct lable for this epoch
[14, 0, 0, 2, 0]


 10%|█         | 4/40 [00:15<02:15,  3.76s/it]


KeyboardInterrupt: 

In [None]:
# visualize the loss as the network trained
fig = plt.figure(figsize=(10,8))
plt.plot(range(1,len(train_loss)+1),train_loss, label='Training Loss')
plt.plot(range(1,len(valid_loss)+1),valid_loss,label='Validation Loss')
plt.xlabel('epochs',fontsize=30)
plt.ylabel('loss',fontsize=30)

# find position of lowest validation loss
minposs = valid_loss.index(min(valid_loss))+1 
plt.axvline(minposs, linestyle='--', color='r',label='Early Stopping Checkpoint')

#plt.ylim(0, 0.5) # consistent scale
plt.xlim(1, len(train_loss)+1) # consistent scale
plt.yticks(fontsize=30)
plt.xticks(fontsize=30)
plt.grid(True)
plt.legend(loc = 'best',fontsize=30)
plt.tight_layout()
fig.savefig('Unet_100_loss.png', bbox_inches='tight',dpi=300)
plt.show()
