# 1. データの読み込みと前処理

*必要なライブラリのインポート*

In [None]:
import av
import os
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision

from torchvision import transforms
from torchvision.datasets import UCF101
from torchmetrics import Accuracy

import torch.nn as nn
import torch.nn.functional as F

ダウンロード先のディレクトリを設定

In [None]:
# ダウンロード先のディレクトリ
root = 'C:\\Users\\Username\\Documents\\Python Scripts\\forUCF101\\UCF101_data\\UCF101\\UCF-101'
root_label = 'C:\\Users\\Username\\Documents\\Python Scripts\\forUCF101\\UCF101_data\\UCF101\\UCF101TrainTestSplits-RecognitionTask\\ucfTrainTestlist'

変数の設定

In [None]:
# 1動画における時間の長さ
frames_per_clip = 75
# 動画間の長さ(＝使用する動画の量が決まる)
step_between_clips = 100
# バッチサイズ
batch_size = 1
# 動画のフレームサイズ
height = 255
width = 255
# ラベル数
label_num = 101
# 畳み込み層第1層のチャンネル数
num_channel = 32
# 畳み込み層第1層のフィルタサイズ
filter_size_conv1 = 11
# 訓練(学習)を繰り返す、エポック数
epoch_num = 5

トランスフォーマーオブジェクトを生成
<br>Define the typical preprocessing transforms to be used for training a neural network

In [None]:
# 1動画のフレームが「frames_per_clip」より少ない場合、１フレーム目に巻き戻すための関数
class PadFrames:
    def __init__(self, frames_per_clip):
        self.frames_per_clip = frames_per_clip

    def __call__(self, video):
        num_frames = video.shape[0]
        if num_frames < self.frames_per_clip:
            repeat_frames = self.frames_per_clip - num_frames
            repeated = video[0].unsqueeze(0).repeat(repeat_frames, 1, 1, 1)
            video = torch.cat([video, repeated], dim=0)
        return video


# 訓練データの編集
transform_train = transforms.Compose([
                  # 上記の関数を使用
                  PadFrames(frames_per_clip=frames_per_clip),
                  # scale in [0, 1] of type float
                  transforms.Lambda(lambda x: x / 255.0),
                  # reshape into (THWC→TCHW) for easier convolutions
                  transforms.Lambda(lambda x: x.permute(0, 3, 1, 2)),
                  transforms.RandomGrayscale(p=0.3),
                  # reshape into (TCHW→CTHW) for easier convolutions
                  transforms.Lambda(lambda x: x.permute(1, 0, 2, 3)),
    
                  # rescale to the most common size
                  # ランダムに切り取りを行う
                  #transforms.RandomCrop((height,width), padding_mode='edge'),
                  # 中心だけを切り取るなら↓
                  transforms.CenterCrop(height),
                  # 水平方向の反転
                  transforms.RandomHorizontalFlip(0.5),
                  # 斜めにするなら↓
                  #transforms.RandomRotation(degrees=20)
                  #transforms.Lambda(lambda x: nn.functional.interpolate(x, (height,width)))
])

# テストデータの編集
transform_test = transforms.Compose([
                 PadFrames(frames_per_clip=frames_per_clip),
                 # TODO: this should be done by a video-level transfrom when PyTorch provides transforms.ToTensor() for video transforms.ToTensor(),
                 # scale in [0, 1] of type float
                 transforms.Lambda(lambda x: x / 255.0),
                 # reshape into (T, C, H, W) for easier convolutions
                 transforms.Lambda(lambda x: x.permute(3, 0, 1, 2)),
                 # rescale to the most common size
                 transforms.Lambda(lambda x: nn.functional.interpolate(x, (height,width)))
])

# We also need a custom collation function in order to deal with videos with different number of audio channels (none, mono, stereo, etc.):
# 複数の種類のデータから動画を選択するのに必要？
def custom_collate(batch):
    filtered_batch = []
    for video, _, label in batch:
        filtered_batch.append((video, label))
    return torch.utils.data.dataloader.default_collate(filtered_batch)

データセットの作成

In [None]:
# 訓練用データの読み込み(セット)
train_dataset = torchvision.datasets.UCF101(
    root=root,                                          # データの保存先のディレクトリ
    annotation_path=root_label,                         # データラベルのディレクトリ
    frames_per_clip=frames_per_clip,                    # 動画ごとのフレーム数(時間)
    step_between_clips=step_between_clips,              # 動画間の長さ
    train=True,                                         # 訓練データを指定
    transform=transform_train,                          # トランスフォーマーオブジェクトを指定
    num_workers=2
)

# テスト用データの読み込み(セット)
test_dataset = torchvision.datasets.UCF101(
    root=root,                                          # データの保存先のディレクトリ
    annotation_path=root_label,                         # データラベルのディレクトリ
    frames_per_clip=frames_per_clip,                    # 動画ごとのフレーム数(時間)
    step_between_clips=step_between_clips,              # 動画間の長さ
    train=False,                                        # testデータを指定
    transform=transform_test,                           # トランスフォーマーオブジェクトを指定
    num_workers=2
)

データローダーの作成

In [None]:
# 訓練用のデータローダー
train_dataloader = torch.utils.data.DataLoader(train_dataset,                  # 訓練データ
                                               batch_size=batch_size,          # ミニバッチのサイズ
                                               shuffle=True,                   # シャッフルして抽出
                                               collate_fn=custom_collate,
                                               #num_workers=2,
                                               pin_memory=True
                                              )
# テスト用のデータローダー
test_dataloader = torch.utils.data.DataLoader(test_dataset,                    # テストデータ
                                              batch_size=batch_size,           # ミニバッチのサイズ
                                              shuffle=True,                    # シャッフルして抽出
                                              collate_fn=custom_collate,
                                              #num_workers=2,
                                              pin_memory=True
                                             )

使用するデータサイズの確認

In [None]:
# データ数の表示
print(f"Total number of train samples: {len(train_dataset)}")
print(f"Total number of test samples: {len(test_dataset)}")
print(f"Total number of (train) batches: {len(train_dataloader)}")
print(f"Total number of (test) batches: {len(test_dataloader)}")

使用する動画のサンプルを表示

In [None]:
# 動画の次元を表示
example_img = train_dataset[3000][0].to('cpu').detach().numpy().copy()
print(example_img.shape)

# 複数フレームの表示
im_array = example_img.transpose(1,2,3,0).copy()
print(im_array[0,:,:,:].shape)
plt.imshow(im_array[0,:,:,:])

row = int(frames_per_clip/8)+1 # グラフの行数
col = 8                      # グラフの列数
fig = plt.figure(figsize=(10,10))

for m in range(row):
    for n in range(col):
        if(m*8+n+1>75):
            break
        ax = fig.add_subplot(row, col, m*8+n+1)
        ax.imshow(im_array[m*8+n,:,:,:])
        ax.axis("off")

#plt.suptitle("title",fontsize=20)
plt.subplots_adjust(wspace=0.1,hspace=0.1,bottom=0.1)
plt.show()

# 2. モデルの定義・生成

Parallel-3DCNNの定義

In [None]:
# ベースとなる畳み込み層の設定
class BasicConv3d(nn.Module):
    def __init__(self, input_size, output_size, kernel_size, stride, padding=0):
        super(BasicConv3d, self).__init__()
        self.conv = nn.Conv3d(input_size, output_size, kernel_size=kernel_size, stride=stride, padding=padding,
                              bias=False)  
        self.bn = nn.BatchNorm3d(output_size, eps=1e-3, momentum=0.001, affine=True)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        
        return x

# メインモデル
class Parallel_CNN_3D(nn.Module):
    
    """
    Attributes:
        stream0,stream1:(
            conv1
            maxpool1
            conv2
            maxpool2
            conv3
            maxpool3
            conv4a
            conv4b
            maxpool4
            conv5a
            conv5b
            maxpool5
            dropout(50%)
        )connect
        fc1
        fc2
        fc3
    """
    

    def __init__(self, num_classes=101, dropout_drop_prob = 0.5, input_channel = 3, spatial_squeeze=True):
        super().__init__()
        ## stream0
        self.features0 = nn.Sequential(
            # (3,75,255,255)→(num_channel,71,247,247)
            BasicConv3d(input_channel, num_channel, kernel_size=(11,11,11), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel,71,247,247)→(num_channel,25,81,81)
            nn.MaxPool3d(kernel_size=(5,9,9), stride=(3,3,3), padding=(2,1,1)),
            
            # (num_channel,25,81,81)→(num_channel×2,25,77,77)
            BasicConv3d(num_channel, num_channel*2, kernel_size=(3,7,7), stride=(1,1,1), padding=(1,1,1)), 
            
            # (num_channel×2,25,77,77)→(num_channel×2,12,25,25)
            nn.MaxPool3d(kernel_size=(3,7,7), stride=(2,3,3), padding=(1,1,1)), 

            # (num_channel×2,12,25,25)→(num_channel×2,12,25,25)
            BasicConv3d(num_channel*2, num_channel*2, kernel_size=(5,5,5), stride=(1,1,1), padding=(2,2,2)),
            
            # (num_channel×2,12,25,25)→(num_channel×4,12,25,25)
            BasicConv3d(num_channel*2, num_channel*4, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel×4,12,25,25)→(num_channel×4,6,9,9)
            nn.MaxPool3d(kernel_size=(2,3,3), stride=(2,3,3), padding=(0,1,1)),
            
            # (num_channel×4,6,9,9)→(num_channel×6,6,9,9)
            BasicConv3d(num_channel*4, num_channel*6, kernel_size=(5,5,5), stride=(1,1,1), padding=(2,2,2)),
            
            # (num_channel×6,6,9,9)→(num_channel×8,6,9,9)
            BasicConv3d(num_channel*6, num_channel*8, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel×8,6,9,9)→(num_channel×8,3,5,5)
            nn.MaxPool3d(kernel_size=(2,3,3), stride=(2,2,2), padding=(0,1,1)),
            
            # (num_channel×8,3,5,5)→(num_channel×8,3,5,5)
            BasicConv3d(num_channel*8, num_channel*8, kernel_size=(5,5,5), stride=(1,1,1), padding=(2,2,2)),
            
            # (num_channel×8,3,5,5)→(num_channel×8,3,5,5)
            BasicConv3d(num_channel*8, num_channel*8, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel×8,3,5,5)→(num_channel×8,2,3,3)
            nn.MaxPool3d(kernel_size=(3,3,3), stride=(2,2,2), padding=(1,1,1)),
            
            nn.Dropout3d(dropout_drop_prob),
        )
        
        ## stream1
        self.features1 = nn.Sequential(
            # (3,75,255,255)→(num_channel,71,247,247)
            BasicConv3d(input_channel, num_channel, kernel_size=(11,11,11), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel,71,247,247)→(num_channel,25,81,81)
            nn.MaxPool3d(kernel_size=(5,9,9), stride=(3,3,3), padding=(2,1,1)),
            
            # (num_channel,25,81,81)→(num_channel×2,25,77,77)
            BasicConv3d(num_channel, num_channel*2, kernel_size=(3,7,7), stride=(1,1,1), padding=(1,1,1)), 
            
            # (num_channel×2,25,77,77)→(num_channel×2,12,25,25)
            nn.MaxPool3d(kernel_size=(3,7,7), stride=(2,3,3), padding=(1,1,1)), 

            # (num_channel×2,12,25,25)→(num_channel×2,12,25,25)
            BasicConv3d(num_channel*2, num_channel*2, kernel_size=(5,5,5), stride=(1,1,1), padding=(2,2,2)),
            
            # (num_channel×2,12,25,25)→(num_channel×4,12,25,25)
            BasicConv3d(num_channel*2, num_channel*4, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel×4,12,25,25)→(num_channel×4,6,9,9)
            nn.MaxPool3d(kernel_size=(2,3,3), stride=(2,3,3), padding=(0,1,1)),
            
            # (num_channel×4,6,9,9)→(num_channel×6,6,9,9)
            BasicConv3d(num_channel*4, num_channel*6, kernel_size=(5,5,5), stride=(1,1,1), padding=(2,2,2)),
            
            # (num_channel×6,6,9,9)→(num_channel×8,6,9,9)
            BasicConv3d(num_channel*6, num_channel*8, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel×8,6,9,9)→(num_channel×8,3,5,5)
            nn.MaxPool3d(kernel_size=(2,3,3), stride=(2,2,2), padding=(0,1,1)),
            
            # (num_channel×8,3,5,5)→(num_channel×8,3,5,5)
            BasicConv3d(num_channel*8, num_channel*8, kernel_size=(5,5,5), stride=(1,1,1), padding=(2,2,2)),
            
            # (num_channel×8,3,5,5)→(num_channel×8,3,5,5)
            BasicConv3d(num_channel*8, num_channel*8, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1)),
            
            # (num_channel×8,3,5,5)→(num_channel×8,2,3,3)
            nn.MaxPool3d(kernel_size=(3,3,3), stride=(2,2,2), padding=(1,1,1)),
            
            nn.Dropout3d(dropout_drop_prob),
        )
        
        ## after connection
        # (256,2x3x3 + 256,2x3x3) -> (4096)
        self.fc1 = nn.Linear(256*2*3*3 *2, 4096)
        self.dropout2 = nn.Dropout()
        # (4096) -> (1024)
        self.fc2 = nn.Linear(4096, 1024)
        # (1024) -> (num_classes)
        self.fc3 = nn.Linear(1024, num_classes)
 
        # 初期値設定
        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm3d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
        
        
    def forward(self, x):
        x0 = self.features0(x)
        x1 = self.features1(x)
        ##
        x0 = x0.view(-1, num_channel*8*2*3*3)
        x1 = x1.view(-1, num_channel*8*2*3*3)
        xc = torch.cat((x0, x1), dim=1)
        xc = torch.relu(self.fc1(xc))
        xc = self.dropout2(xc)
        xc = torch.relu(self.fc2(xc))
        out = self.fc3(xc)
            
        return out

モデルを適用し、生成・確認する

In [None]:
# 使用可能なデバイス(CPUまたはGPU）を取得する
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device:",device)
# モデルオブジェクトを生成し、使用可能なデバイスを設定する
model = Parallel_CNN_3D().to(device)
#model.load_state_dict(torch.load('Parallel_3DCNN_00_weight_before.pth'))

# modelの構造を確認するなら↓
#model

適当な値を入力させて、エラーが発生しないことを確認

モデルのパラメータを保存

In [None]:
# モデルデータの保存は↓
torch.save(model.state_dict(), 'Parallel_3DCNN_00_weight_before.pth')

# 3. モデルの可視化

畳み込み層第1層の可視化を行う

In [None]:
# TW面(H=0)の可視化
def filter_show_TW(filters, nx=8, margin=3, scale=10):
    # 畳み込み層第1層のパラメータをそれぞれ抽出する
    FN, C, FT, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx))
    
    # H=0のときのパラメータを選択
    filter_choice = filters[:,:,:,0,:].copy()
    print(filter_choice.shape)
    # (N,C,T,W)→→(N,T,W,C)
    filter_choice = filter_choice.transpose(0,2,3,1)

    # matplotlibで表示
    fig = plt.figure(figsize=(10,5))
    plt.title("x:time,y:width",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for i in range(FN):
        if i < nx * ny:
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[i,:,:,:] 
            filter_img = (filter_img - filter_choice.min()) / (filter_choice.max() - filter_choice.min())  # 正規化
            img = ax.imshow(filter_img, interpolation='nearest')
            
    plt.show()
    
    
# TH面(W=0)の可視化
def filter_show_TH(filters, nx=8, margin=3, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx))
    
    # W=0
    filter_choice = filters[:,:,:,:,0].copy()
    print(filter_choice.shape)
    # (N,C,T,H)→→(N,T,H,C)
    filter_choice = filter_choice.transpose(0,2,3,1)

    fig = plt.figure(figsize=(10,5))
    plt.title("x:time,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for i in range(FN):
        if i < nx * ny:
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[i,:,:,:] 
            filter_img = (filter_img - filter_choice.min()) / (filter_choice.max() - filter_choice.min())  # 正規化
            img = ax.imshow(filter_img, interpolation='nearest')
            
    plt.show()
    

# WH面(T=0)の可視化
def filter_show_WH(filters, nx=8, margin=3, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx))
    
    # T=0
    filter_choice = filters[:,:,0,:,:].copy()
    print(filter_choice.shape)
    # (N,C,H,W)→→(N,W,H,C)
    filter_choice = filter_choice.transpose(0,3,2,1)

    fig = plt.figure(figsize=(10,5))
    plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for i in range(FN):
        if i < nx * ny:
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[i,:,:,:] # 高さと深さの次元を選択し、幅の次元は0に固定する
            filter_img = (filter_img - filter_choice.min()) / (filter_choice.max() - filter_choice.min())  # 正規化
            img = ax.imshow(filter_img, interpolation='nearest')
            
    plt.show()

経路0の畳み込み層第1層のフィルタの各平面表示

In [None]:
print("Branch0_WH")
filter_show_WH(model.features0[0].conv.weight.detach().cpu().numpy())
print("Branch0_TH")
filter_show_TH(model.features0[0].conv.weight.detach().cpu().numpy())
print("Branch0_TW")
filter_show_TW(model.features0[0].conv.weight.detach().cpu().numpy())

経路1の畳み込み層第1層のフィルタの各平面表示

In [None]:
print("Branch1_WH")
filter_show_WH(model.features1[0].conv.weight.detach().cpu().numpy())
print("Branch1_TH")
filter_show_TH(model.features1[0].conv.weight.detach().cpu().numpy())
print("Branch1_TW")
filter_show_TW(model.features1[0].conv.weight.detach().cpu().numpy())

# 4. 損失関数やパラメーターの設定

損失関数・オプティマイザーの設定

In [None]:
import torch.optim

# クロスエントロピー誤差のオブジェクトを生成
criterion = nn.CrossEntropyLoss()
# 勾配降下アルゴリズムを使用するオプティマイザーを生成
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

モデル訓練(学習)時の設定

In [None]:
def train_step(x, t):
    '''バックプロパゲーションによるパラメーター更新を行う
    
    Parameters: x: 訓練データ
                t: 正解ラベル                
    Returns:
      MLPの出力と正解ラベルのクロスエントロピー誤差
    '''
    model.train()    # モデルを訓練(学習)モードにする
    preds = model(x) # モデルの出力を取得
    loss = criterion(preds, t) # 出力と正解ラベルの誤差から損失を取得
    optimizer.zero_grad() # 勾配を0で初期化（累積してしまうため）
    loss.backward()  # 逆伝播の処理(自動微分による勾配計算)
    optimizer.step() # 勾配降下法の更新式を適用してバイアス、重みを更新

    return loss, preds 

モデルテスト時の設定

In [None]:
def test_step(x, t):
    '''テストデータを入力して損失と予測値を返す
    
    Parameters: x: テストデータ
                t: 正解ラベル
    Returns:
      MLPの出力と正解ラベルのクロスエントロピー誤差
    '''
    model.eval()     # モデルを評価モードにする
    preds = model(x) # モデルの出力を取得
    loss = criterion(preds, t) # 出力と正解ラベルの誤差から損失を取得

    return loss, preds 

訓練時に文字数制限などで発生するエラーを無視

In [None]:
import warnings
warnings.simplefilter('ignore', UserWarning)

経過時間・進行度を表示

In [None]:
from tqdm import tqdm
import datetime
dt_now = datetime.datetime.now()
print(dt_now)

# 5. モデルの学習

モデルの訓練(学習)・テスト

In [None]:
%%time
from sklearn.metrics import accuracy_score

# エポック数
epochs = epoch_num
# 損失と精度の履歴を保存するためのdictオブジェクト
history = {'loss':[],'accuracy':[], 'test_loss':[], 'test_accuracy':[], 'top5_accuracy':[]}

# 収束が停滞したら学習率を減衰するスケジューラー
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,   # オプティマイザーを指定
    mode='max',  # 監視対象は最大値
    factor=0.5,  # 学習率を減衰する割合
    patience=5, # 監視対象のエポック数
    min_lr=0.0001, # 最小学習率
    verbose=True # 学習率を減衰した場合に通知する
    )

# Top-5精度の計算器
top5_acc = Accuracy(top_k=5, task='multiclass', num_classes=label_num).to(device)

# 学習を行う
for epoch in tqdm(range(epochs)):
    train_loss = 0. # 訓練1エポックあたりの損失を保持する変数
    train_acc = 0.  # 訓練1エポックごとの精度を保持する変数
    test_loss = 0.  # 評価1エポックごとの損失を保持する変数
    test_acc = 0.   # 評価1エポックごとの精度を保持する変数
    test_top5_acc = 0.   # 評価1エポックごとの精度(top5)を保持する変数

    # 1ステップにおける訓練用ミニバッチを使用した学習
    for (x, t) in tqdm(train_dataloader, mininterval=3600):
        # torch.Tensorオブジェクトにデバイスを割り当てる
        x, t = x.to(device), t.to(device)
        loss, preds = train_step(x, t) # 損失と予測値を取得
        train_loss += loss.item()      # ステップごとの損失を加算
        train_acc += accuracy_score(
            t.tolist(),
            preds.argmax(dim=-1).tolist()
        )   # ステップごとの精度を加算
        
    # 1ステップにおけるテストデータのミニバッチを使用した評価
    for (x, t) in test_dataloader:
        # torch.Tensorオブジェクトにデバイスを割り当てる
        x, t = x.to(device), t.to(device)
        loss, preds = test_step(x, t) # 損失と予測値を取得
        test_loss += loss.item()       # ステップごとの損失を加算
        t_tensor = t.cuda()
        preds_tensor = preds.cuda()
        t_numpy = t_tensor.cpu().detach().numpy()
        preds_numpy = preds_tensor.cpu().detach().numpy()
        
        test_acc += accuracy_score(
            t.tolist(),
            preds.argmax(dim=-1).tolist()
        )                              # ステップごとの精度を加算
        test_top5_acc += top5_acc(preds, t).item()

        
    # 訓練時の損失の平均値を取得
    avg_train_loss = train_loss / len(train_dataloader)
    # 訓練時の精度の平均値を取得
    avg_train_acc = train_acc / len(train_dataloader)
    # 検証時の損失の平均値を取得
    avg_test_loss = test_loss / len(test_dataloader)
    # 検証時の精度の平均値を取得
    avg_test_acc = test_acc / len(test_dataloader)
    # top5の精度の平均値を取得
    avg_top5_acc = test_top5_acc / len(test_dataloader)

    # 訓練データの履歴を保存する
    history['loss'].append(avg_train_loss)
    history['accuracy'].append(avg_train_acc)
    # テストデータの履歴を保存する
    history['test_loss'].append(avg_test_loss)
    history['test_accuracy'].append(avg_test_acc)
    history['top5_accuracy'].append(avg_top5_acc)

    # 1エポックごとに結果を出力
    if (epoch + 1) % 1 == 0:
        print(
            'epoch({}) train_loss: {:.6} train_acc: {:.6} val_loss: {:.6} val_acc: {:.6} val_top5: {:.6}'.format(
                epoch+1,
                avg_train_loss, # 訓練データの損失を出力
                avg_train_acc,  # 訓練データの精度を出力
                avg_test_loss,  # テストデータの損失を出力
                avg_test_acc,   # テストデータの精度を出力
                avg_top5_acc    # top5の精度を出力
    ))
    # スケジューラー、テストデータの精度を監視する
    scheduler.step(avg_test_acc)
    dt_now = datetime.datetime.now()
    print(dt_now)
    
    # エポックごとにモデルのパラメータを保存
    filename = "Parallel_3DCNN_" + str(epoch+1).zfill(2) + "_weight_after.pth"
    torch.save(model.state_dict(), filename)

最後のパラメータを保存する場合

In [None]:
# モデルデータの保存は↓
#torch.save(model.state_dict(), 'Parallel_3DCNN_00_weight_after.pth')

# 6. 損失と精度

結果をグラフに表示する

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

# 学習結果（損失）のグラフを描画
plt.plot(history['loss'],
         marker='.',
         label='loss (Training)')
plt.plot(history['test_loss'],
         marker='.',
         label='loss (Test)')
plt.legend(loc='best')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

# 学習結果（精度）のグラフを描画
plt.plot(history['accuracy'],
         marker='.',
         label='accuracy (Training)')
plt.plot(history['test_accuracy'],
         marker='.',
         label='accuracy (Test)')
plt.plot(history['top5_accuracy'],
         marker='.',
         label='accuracy(Test_top5)')
plt.legend(loc='best')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()

エポックごとの数値を表示する

In [None]:
print("history['loss']")
print(history['loss'])
print("history['test_loss']")
print(history['test_loss'])
print("history['accuracy']")
print(history['accuracy'])
print("history['test_accuracy']")
print(history['test_accuracy'])
print("history['top5_accuracy']")
print(history['top5_accuracy'])

# 7. 結果

訓練(学習)後のフィルタの様子

In [None]:
# 経路0
print("Branch0_WH")
filter_show_WH(model.features0[0].conv.weight.detach().cpu().numpy())
print("Branch0_TH")
filter_show_TH(model.features0[0].conv.weight.detach().cpu().numpy())
print("Branch0_TW")
filter_show_TW(model.features0[0].conv.weight.detach().cpu().numpy())

In [None]:
# 経路1
print("Branch1_WH")
filter_show_WH(model.features1[0].conv.weight.detach().cpu().numpy())
print("Branch1_TH")
filter_show_TH(model.features1[0].conv.weight.detach().cpu().numpy())
print("Branch1_TW")
filter_show_TW(model.features1[0].conv.weight.detach().cpu().numpy())

フィルタを各平面から見たときの軸変化

In [None]:
# 指定したフィルタのT変化(WH平面)　複数ver.
def filter_singlemoves_WH(filnums, filters, nx=11, margin=0.1, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = len(filnums)
    
    fig = plt.figure(figsize=(11,30))
    #plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for j in range(ny):
        filter_choice = filters[filnums[j]-1,:,:,:,:].copy()
        filter_choice = filter_choice.transpose(3,2,1,0) #(WHTC)
    
    
        for i in range(nx):
            if i < nx:
                all_filter_T = filters[:,:,i,:,:].copy()
                all_filter_T = all_filter_T.transpose(0,3,2,1) #(NWHC)
            
                ax = fig.add_subplot(ny, nx, j*11+i+1, xticks=[], yticks=[])
                filter_img = filter_choice[:,:,i,:] #(WHC)
                filter_img = (filter_img - all_filter_T.min()) / (all_filter_T.max() - all_filter_T.min())  # 正規化
                img = ax.imshow(filter_img, interpolation='nearest')
            
    plt.show()
    
    
    
# 指定したフィルタのW変化(TH平面)　複数ver.
def filter_singlemoves_TH(filnums, filters, nx=11, margin=0.1, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = len(filnums)
    
    fig = plt.figure(figsize=(11,30))
    #plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for j in range(ny):
        filter_choice = filters[filnums[j]-1,:,:,:,:].copy()
        filter_choice = filter_choice.transpose(1,2,3,0) #(THWC)
    
    
        for i in range(nx):
            if i < nx:
                all_filter_W = filters[:,:,:,:,i].copy()
                all_filter_W = all_filter_W.transpose(0,2,3,1) #(NTHC)
            
                ax = fig.add_subplot(ny, nx, j*11+i+1, xticks=[], yticks=[])
                filter_img = filter_choice[:,:,i,:] #(THC)
                filter_img = (filter_img - all_filter_W.min()) / (all_filter_W.max() - all_filter_W.min())  # 正規化
                img = ax.imshow(filter_img, interpolation='nearest')
            
    plt.show()

# 指定したフィルタのH変化(TW平面)　複数ver.
def filter_singlemoves_TW(filnums, filters, nx=11, margin=0.1, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = len(filnums)
    
    fig = plt.figure(figsize=(11,30))
    #plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for j in range(ny):
        filter_choice = filters[filnums[j]-1,:,:,:,:].copy()
        filter_choice = filter_choice.transpose(1,3,2,0) #(TWHC)
    
    
        for i in range(nx):
            if i < nx:
                all_filter_H = filters[:,:,:,i,:].copy()
                all_filter_H = all_filter_H.transpose(0,2,3,1) #(NTWC)
            
                ax = fig.add_subplot(ny, nx, j*11+i+1, xticks=[], yticks=[])
                filter_img = filter_choice[:,:,i,:] #(TWC)
                filter_img = (filter_img - all_filter_H.min()) / (all_filter_H.max() - all_filter_H.min())  # 正規化
                img = ax.imshow(filter_img, interpolation='nearest')
            
    plt.show()

指定したフィルタのT変化(WH平面)、経路0

In [None]:
filter_singlemoves_WH(np.arange(1,33),model.features0[0].conv.weight.detach().cpu().numpy())

指定したフィルタのT変化(WH平面)、経路1

In [None]:
filter_singlemoves_WH(np.arange(1,33),model.features1[0].conv.weight.detach().cpu().numpy())

指定したフィルタのW変化(TH平面)、経路0

In [None]:
filter_singlemoves_TH(np.arange(1,33),model.features0[0].conv.weight.detach().cpu().numpy())

指定したフィルタのW変化(TH平面)、経路1

In [None]:
filter_singlemoves_TH(np.arange(1,33),model.features1[0].conv.weight.detach().cpu().numpy())

指定したフィルタのH変化(TW平面)、経路0

In [None]:
filter_singlemoves_TW(np.arange(1,33),model.features0[0].conv.weight.detach().cpu().numpy())

指定したフィルタのH変化(TW平面)、経路1

In [None]:
filter_singlemoves_TW(np.arange(1,33),model.features1[0].conv.weight.detach().cpu().numpy())

指定したフィルタの各平面の拡大表示

In [None]:
def filter_single(filters, filter_number, nx=3, margin=0.1, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = 1

    fig = plt.figure(figsize=(10,10))
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    
    for i in range(nx):
        filter_choice = filters[filter_number-1,:,:,:,:].copy()
        
        if (i==0):
            filter_choice = filter_choice.transpose(3,2,1,0) #WHTC
            all_filter_1 = filters[:,:,0,:,:].copy()
            all_filter_1 = all_filter_1.transpose(0,3,2,1) #NWHC
            
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[:,:,0,:] #WHC
            filter_img = (filter_img - all_filter_1.min()) / (all_filter_1.max() - all_filter_1.min())  # 正規化
            img = ax.imshow(filter_img, interpolation='nearest')
            
        if (i==1):
            filter_choice = filter_choice.transpose(1,3,2,0) #TWHC
            all_filter_1 = filters[:,:,:,:,0].copy()
            all_filter_1 = all_filter_1.transpose(0,2,3,1) #NTHC
            
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[:,0,:,:] #THC
            print(filter_img.shape)
            filter_img = (filter_img - all_filter_1.min()) / (all_filter_1.max() - all_filter_1.min())  # 正規化
            img = ax.imshow(filter_img, interpolation='nearest')
        
        if (i==2):
            filter_choice = filter_choice.transpose(1,3,2,0) #TWHC
            all_filter_1 = filters[:,:,:,0,:].copy()
            all_filter_1 = all_filter_1.transpose(0,2,3,1) #NTWC
            
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[:,:,0,:] #TWC
            filter_img = (filter_img - all_filter_1.min()) / (all_filter_1.max() - all_filter_1.min())  # 正規化
            img = ax.imshow(filter_img, interpolation='nearest')
            
    plt.show()

In [None]:
print("WH,TH,TW")
filter_single(model.features1[0].conv.weight.detach().cpu().numpy(), 1)

モノクロで2値化

In [None]:
# TW面(H=0)の可視化
def filter_show_TWmono(filters, nx=8, margin=3, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx))
    
    # H=0
    filter_choice = filters[:,:,:,0,:].copy()
    print(filter_choice.shape)
    # (N,C,T,W)→→(N,T,W,C)
    filter_choice = filter_choice.transpose(0,2,3,1)

    fig = plt.figure(figsize=(10,5))
    plt.title("x:time,y:width",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for i in range(FN):
        if i < nx * ny:
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[i,:,:,:] 
            filter_img = (filter_img - filter_choice.min()) / (filter_choice.max() - filter_choice.min())  # 正規化
            filter_img = (filter_img[:,:,0]+filter_img[:,:,1]+filter_img[:,:,2])/765
            img = ax.imshow(filter_img, cmap='gray', interpolation='nearest')
            
    plt.show()
    
    
# TH面(W=0)の可視化
def filter_show_THmono(filters, nx=8, margin=3, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx))
    
    # W=0
    filter_choice = filters[:,:,:,:,0].copy()
    print(filter_choice.shape)
    # (N,C,T,H)→→(N,T,H,C)
    filter_choice = filter_choice.transpose(0,2,3,1)

    fig = plt.figure(figsize=(10,5))
    plt.title("x:time,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for i in range(FN):
        if i < nx * ny:
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[i,:,:,:] 
            filter_img = (filter_img - filter_choice.min()) / (filter_choice.max() - filter_choice.min())  # 正規化
            filter_img = (filter_img[:,:,0]+filter_img[:,:,1]+filter_img[:,:,2])/765
            img = ax.imshow(filter_img, cmap='gray', interpolation='nearest')
            
    plt.show()
    

# WH面(T=0)の可視化
def filter_show_WHmono(filters, nx=8, margin=3, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx))
    
    # T=0
    filter_choice = filters[:,:,0,:,:].copy()
    print(filter_choice.shape)
    # (N,C,H,W)→→(N,W,H,C)
    filter_choice = filter_choice.transpose(0,3,2,1)

    fig = plt.figure(figsize=(10, 5))
    plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for i in range(FN):
        if i < nx * ny:
            ax = fig.add_subplot(ny, nx, i+1, xticks=[], yticks=[])
            filter_img = filter_choice[i,:,:,:] # 高さと深さの次元を選択し、幅の次元は0に固定する
            filter_img = (filter_img - filter_choice.min()) / (filter_choice.max() - filter_choice.min())  # 正規化
            filter_img = (filter_img[:,:,0]+filter_img[:,:,1]+filter_img[:,:,2])/765
            img = ax.imshow(filter_img, cmap='gray', interpolation='nearest')
            
    plt.show()

経路0

In [None]:
filter_show_WHmono(model.features0[0].conv.weight.detach().cpu().numpy())
filter_show_THmono(model.features0[0].conv.weight.detach().cpu().numpy())
filter_show_TWmono(model.features0[0].conv.weight.detach().cpu().numpy())

経路1

In [None]:
filter_show_WHmono(model.features1[0].conv.weight.detach().cpu().numpy())
filter_show_THmono(model.features1[0].conv.weight.detach().cpu().numpy())
filter_show_TWmono(model.features1[0].conv.weight.detach().cpu().numpy())

フィルタを各平面から見たときの軸変化、モノクロ版

In [None]:
# 指定したフィルタの時間変化(WH平面)　複数ver.mono
def filter_singlemoves_WHmono(filnums, filters, nx=filter_size_conv1, margin=0.1, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = len(filnums)
    
    fig = plt.figure(figsize=(11,30))
    #plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for j in range(ny):
        filter_choice = filters[filnums[j]-1,:,:,:,:].copy()
        filter_choice = filter_choice.transpose(3,2,1,0) #(WHTC)
    
    
        for i in range(nx):
            if i < nx:
                all_filter_T = filters[:,:,i,:,:].copy()
                all_filter_T = all_filter_T.transpose(0,3,2,1) #(NWHC)
            
                ax = fig.add_subplot(ny, nx, j*nx+i+1, xticks=[], yticks=[])
                filter_img = filter_choice[:,:,i,:] #(WHC)
                filter_img = (filter_img - all_filter_T.min()) / (all_filter_T.max() - all_filter_T.min())  # 正規化
                filter_img = (filter_img[:,:,0]+filter_img[:,:,1]+filter_img[:,:,2])/765
                img = ax.imshow(filter_img, cmap='gray', interpolation='nearest')
            
    plt.show()
    
    
    
# 指定したフィルタの時間変化(TH平面)　複数ver.mono
def filter_singlemoves_THmono(filnums, filters, nx=filter_size_conv1, margin=0.1, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = len(filnums)
    
    fig = plt.figure(figsize=(11,30))
    #plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for j in range(ny):
        filter_choice = filters[filnums[j]-1,:,:,:,:].copy()
        filter_choice = filter_choice.transpose(1,2,3,0) #(THWC)
    
    
        for i in range(nx):
            if i < nx:
                all_filter_W = filters[:,:,:,:,i].copy()
                all_filter_W = all_filter_W.transpose(0,2,3,1) #(NTHC)
            
                ax = fig.add_subplot(ny, nx, j*nx+i+1, xticks=[], yticks=[])
                filter_img = filter_choice[:,:,i,:] #(THC)
                filter_img = (filter_img - all_filter_W.min()) / (all_filter_W.max() - all_filter_W.min())  # 正規化
                filter_img = (filter_img[:,:,0]+filter_img[:,:,1]+filter_img[:,:,2])/765
                img = ax.imshow(filter_img, cmap='gray', interpolation='nearest')
            
    plt.show()

# 指定したフィルタの時間変化(TW平面)　複数ver.mono
def filter_singlemoves_TWmono(filnums, filters, nx=filter_size_conv1, margin=0.1, scale=10):

    FN, C, FT, FH, FW = filters.shape
    ny = len(filnums)
    
    fig = plt.figure(figsize=(11,30))
    #plt.title("x:width,y:height",fontsize=10)
    plt.axis("off")
    fig.subplots_adjust(left=0, right=0.9, bottom=0, top=1, hspace=0.05, wspace=0.05)
    
    for j in range(ny):
        filter_choice = filters[filnums[j]-1,:,:,:,:].copy()
        filter_choice = filter_choice.transpose(1,3,2,0) #(TWHC)
    
    
        for i in range(nx):
            if i < nx:
                all_filter_H = filters[:,:,:,i,:].copy()
                all_filter_H = all_filter_H.transpose(0,2,3,1) #(NTWC)
            
                ax = fig.add_subplot(ny, nx, j*nx+i+1, xticks=[], yticks=[])
                filter_img = filter_choice[:,:,i,:] #(TWC)
                filter_img = (filter_img - all_filter_H.min()) / (all_filter_H.max() - all_filter_H.min())  # 正規化
                filter_img = (filter_img[:,:,0]+filter_img[:,:,1]+filter_img[:,:,2])/765
                img = ax.imshow(filter_img, cmap='gray', interpolation='nearest')
            
    plt.show()

指定したフィルタのT変化(WH平面)、経路0、モノクロ版

In [None]:
filter_singlemoves_WHmono(np.arange(1,num_channel+1),model.features0[0].conv.weight.detach().cpu().numpy())

指定したフィルタのT変化(WH平面)、経路0、モノクロ版

In [None]:
filter_singlemoves_WHmono(np.arange(1,num_channel+1),model.features1[0].conv.weight.detach().cpu().numpy())

指定したフィルタのW変化(TH平面)、経路0、モノクロ版

In [None]:
filter_singlemoves_THmono(np.arange(1,num_channel+1),model.features0[0].conv.weight.detach().cpu().numpy())

指定したフィルタのW変化(TH平面)、経路1、モノクロ版

In [None]:
filter_singlemoves_THmono(np.arange(1,num_channel+1),model.features1[0].conv.weight.detach().cpu().numpy())

指定したフィルタのH変化(TW平面)、経路0、モノクロ版

In [None]:
filter_singlemoves_TWmono(np.arange(1,num_channel+1),model.features0[0].conv.weight.detach().cpu().numpy())

指定したフィルタのH変化(TW平面)、経路1、モノクロ版

In [None]:
filter_singlemoves_TWmono(np.arange(1,num_channel+1),model.features1[0].conv.weight.detach().cpu().numpy())