# ネットワークモデルの作成

# 3.3 学習目標

1.	PSPNetのネットワーク構造をモジュール単位で理解する
2.	PSPNetを構成する各モジュールの役割を理解する
3.	PSPNetのネットワーククラスの実装を理解する

# 3.4 学習目標

1.	Featureモジュールのサブネットワーク構成を理解する
2.	サブネットワークFeatureMap_convolution を実装できるようになる
3.	Residual Blockを理解する
4.	Dilated Convolutionを理解する
5.	サブネットワークbottleNeckPSPとbottleNeckIdentifyPSPを実装できるようになる
6.	Featureモジュールを実装できるようになる

# 3.5 学習目標

1.	Pyramid Poolingモジュールのサブネットワーク構成を理解する
2.	Pyramid Poolingモジュールのマルチスケール処理の実現方法を理解する
3.	Pyramid Poolingモジュールを実装できるようになる

# 3.6 学習目標

1.	Decoderモジュールのサブネットワーク構成を理解する
2.	Decoder モジュールを実装できるようになる
3.	AuxLossモジュールのサブネットワーク構成を理解する
4.	AuxLossモジュールを実装できるようになる

## Library

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

## PSPNetのネットワーク構造
__init__とforwardを実装

In [224]:
class PSPNet(nn.Module):
    def __init__(self, n_classes):
        # コンストラクタを実行
        super(PSPNet, self).__init__()
        
        # パラメータ設定
        block_config = [3, 4, 6, 3]  # resnet50
        img_size = 475
        img_size_8 = 60  # img_sizeの1/8にする
        
        # 4つのモジュールを構成するサブネットワークの用意
        # モデルの__init__の部分でサブネットワークを作ってしまうことが多い
        # forwardでは純粋に順伝播実行するだけ
        self.feature_conv = FeatureMap_convolution()
        self.feature_res_1 = ResidualBlockPSP(n_blocks=block_config[0], in_channels=128, mid_channels=64, out_channels=256, stride=1, dilation=1)
        self.feature_res_2 = ResidualBlockPSP(n_blocks=block_config[1], in_channels=256, mid_channels=128, out_channels=512, stride=2, dilation=1)
        self.feature_dilated_res_1 = ResidualBlockPSP(n_blocks=block_config[2], in_channels=512, mid_channels=256, out_channels=1024, stride=1, dilation=2)
        self.feature_dilated_res_2 = ResidualBlockPSP(n_blocks=block_config[3], in_channels=1024, mid_channels=512, out_channels=2048, stride=1, dilation=4)
        
        self.pyramid_pooling = PyramidPooling(in_channels=2048, pool_sizes=[6, 3, 2, 1], height=img_size_8, width=img_size_8)
        
        self.decode_feature = DecodePSPFeature(height=img_size, width=img_size, n_classes=n_classes)
        
        self.aux = AuxiliaryPSPlayers(in_channels=1024, height=img_size, width=img_size, n_classes=n_classes)
        
    def forward(self, x):
        x = self.feature_conv(x)
        x = self.feature_res_1(x)
        x = self.feature_res_2(x)
        x = self.feature_dilated_res_1(x)
        
        output_aux = self.aux(x)   # featureモジュールの途中をAuxモジュールへ
        # この出力tensorを使用してピクセルごとのクラス分類を行い、損失値を前半4つのサブネットワークの学習に使用する
        # そうしないとinputに近い層の学習が効率的に行われないから？
        
        x = self.feature_dilated_res_2(x)
        
        x = self.pyramid_pooling(x)
        output = self.decode_feature(x)
        
        return (output, output_aux)

## Featureモジュール

In [225]:
class conv2DBatchNormReLU(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        # コンストラクタ実行
        super(conv2DBatchNormReLU, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)   # inplace=Trueでメモリ削減
        
    def forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        outputs = self.relu(x)
        
        return outputs
        

In [226]:
# 最初1回使われる
class FeatureMap_convolution(nn.Module):
    def __init__(self):
        """
        構成するサブネットワークを用意
        conv2DBatchNormReLUを3回とmaxpooling1回
        """
        super(FeatureMap_convolution, self).__init__()
        
        # 畳み込み1
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 3, 64, 3, 2, 1, 1, False
        self.cbnr_1 = conv2DBatchNormReLU(in_channels, out_channels, kernel_size, stride, padding, dilation, bias)
        
        # 畳み込み2
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 64, 3, 1, 1, 1, False
        self.cbnr_2 = conv2DBatchNormReLU(in_channels, out_channels, kernel_size, stride, padding, dilation, bias)
        
        # 畳み込み3
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 128, 3, 1, 1, 1, False
        self.cbnr_3 = conv2DBatchNormReLU(in_channels, out_channels, kernel_size, stride, padding, dilation, bias)
        
        # MaxPooling
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
    def forward(self, x):
        x = self.cbnr_1(x)
        x = self.cbnr_2(x)
        x = self.cbnr_3(x)
        outputs = self.maxpool(x)
        return outputs

In [227]:
# featuremapのあとに4回使われる
# ResNetと同じResidualBlockという構造を利用している
# bottleNeckPSP1回とbottleNeckIdntifyPSP複数回繰り返して作られるblock

class ResidualBlockPSP(nn.Sequential):
    def __init__(self, n_blocks, in_channels, mid_channels, out_channels, stride, dilation):
        super(ResidualBlockPSP, self).__init__()
        
        # bottleNeckPSPの用意
        self.add_module('block1', bottleNeckPSP(in_channels, mid_channels, out_channels, stride, dilation))
        
        # bottleNeckIdentifyPSPの繰り返しの用意
        for i in range(n_blocks-1):
            self.add_module('block'+str(i+2), bottleNeckIdentifyPSP(out_channels, mid_channels, stride, dilation))
        

In [228]:
class conv2DBatchNorm(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNorm, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        
    def forward(self, x):
        x = self.conv(x)
        outputs = self.batchnorm(x)
        
        return outputs

In [229]:
class bottleNeckPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride, dilation):
        super(bottleNeckPSP, self).__init__()
        self.cbr_1 = conv2DBatchNormReLU(in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormReLU(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(mid_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        
        #  スッキプ結合  こうやるのか
        self.cb_residual = conv2DBatchNorm(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, dilation=1, bias=False)
        self.relu = nn.ReLU(inplace=True)  # メモリ節約
        
    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = self.cb_residual(x)
        
        # スキップしたものと全部convしたものを加算してreluに入れて返す
#         print(conv.size())
#         print(residual.size())
#         print((conv+residual).size())
        out = conv + residual
        return self.relu(out)
    

In [230]:
class bottleNeckIdentifyPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, stride, dilation):
        super(bottleNeckIdentifyPSP, self).__init__()
        self.cbr_1 = conv2DBatchNormReLU(in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormReLU(mid_channels, mid_channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(mid_channels, in_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)  # 最後はReLUいらない
        self.relu = nn.ReLU(inplace=True)  # メモリ節約
        
    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = x
        # inputそのままとconvしたものを加算してreluに入れて返す
        return self.relu(conv+residual)

## Pyramid Poolingモジュール

In [231]:
# 結構お手本と違う形にしたがうまくいくか
# うまくいかなかったらお手本に戻す
class PyramidPooling(nn.Module):
    def __init__(self, in_channels, pool_sizes, height, width):
        super(PyramidPooling, self).__init__()
        
        # forwardで使用する画像サイズとpoolsizes, in_channels
        self.height = height
        self.width = width
        self.pool_sizes = pool_sizes
        self.in_channels = in_channels
        
        # 各畳み込み層の出力チャネル数
        self.out_channels = int(in_channels / len(pool_sizes))  # 2048/4 = 512
        
        
    # 各層の作成はforwardでやる
    def forward(self, x):
        outputs = [x]
        
        for i in range(len(self.pool_sizes)):
            x_ = nn.AdaptiveAvgPool2d(output_size=self.pool_sizes[i])(x)
            x_ = conv2DBatchNormReLU(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)(x_)
            x_ = F.interpolate(x_, size=(self.height, self.width), mode='bilinear', align_corners=True)
            outputs.append(x_)
            
        # マルチスケールで特徴抽出したtensorを最終的に結合させる
        output = torch.cat(outputs, dim=1)
        
        return output

In [173]:
# self.~~で層を保持していくには名前ベタが期していくしかないんだあろうか

In [89]:
# add_moduleつかったらどうだろう
class PyramidPooling(nn.Module):
    def __init__(self, in_channels, pool_sizes, height, width):
        super(PyramidPooling, self).__init__()
        
        # forwardで使用する画像サイズとpoolsizes, in_channels
        self.height = height
        self.width = width
        self.pool_sizes = pool_sizes
        self.in_channels = in_channels
        
        # 各畳み込み層の出力チャネル数
        self.out_channels = int(in_channels / len(pool_sizes))  # 2048/4 = 512
        
        
    # 各層の作成はforwardでやる
    def forward(self, x):
        outputs = [x]
        
        for i in range(len(self.pool_sizes)):
            self.add_module('avpool_'+str(i+1), nn.AdaptiveAvgPool2d(output_size=self.pool_sizes[i]))
            self.add_module('cbr_'+str(i+1), conv2DBatchNormReLU(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False))
            x_ = self.parameters()[i*2](x)   # 破綻した？
            x_ = conv2DBatchNormReLU(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)(x_)
            x_ = F.interpolate(x_, size=(self.height, self.width), mode='bilinear', align_corners=True)
            outputs.apped(x_)
            
        # マルチスケールで特徴抽出したtensorを最終的に結合させる
        output = torch.cat(outputs, dim=1)
        
        return output

## Decoderモジュール

In [232]:
class DecodePSPFeature(nn.Module):
    def __init__(self, height, width, n_classes):
        super(DecodePSPFeature, self).__init__()
        
        self.height = height
        self.width = width
        
        # cbr : conv, batchnorm, relu
        self.cbr = conv2DBatchNormReLU(in_channels=4096, out_channels=512, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(in_channels=512, out_channels=n_classes, kernel_size=1, stride=1, padding=0)
        
    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(x, size=(self.height, self.width), mode='bilinear', align_corners=True)
        
        return output

## AuxLossモジュール

In [233]:
class AuxiliaryPSPlayers(nn.Module):
    def __init__(self, in_channels, height, width, n_classes):
        super(AuxiliaryPSPlayers, self).__init__()
        
        self.height = height
        self.width = width
        
        # cbr : conv, batchnorm, relu
        self.cbr = conv2DBatchNormReLU(in_channels=in_channels, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(in_channels=256, out_channels=n_classes, kernel_size=1, stride=1, padding=0)
        
    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(x, size=(self.height, self.width), mode='bilinear', align_corners=True)
        
        return output

In [234]:
# 動作確認
net = PSPNet(n_classes=21)
net

PSPNet(
  (feature_conv): FeatureMap_convolution(
    (cbnr_1): conv2DBatchNormReLU(
      (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_2): conv2DBatchNormReLU(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_3): conv2DBatchNormReLU(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (feature_res_1): ResidualBlockPSP(
    (block1): bottleNec

In [235]:
# pyramidpoolingができていなかった
# というかself.~~で保持していないだけのような気がする
# bottleNeckPSPのcbrが全て１になっていた
# pyramidpoolingをselfで保持しながらfor文で効率的に回すのってどうやるんだろう？

In [236]:
batch_size = 2
dummy_img = torch.rand(batch_size, 3, 475, 475)

# 計算
outputs = net(dummy_img)
print(outputs)


(tensor([[[[-0.0998, -0.0743, -0.0488,  ...,  0.2021,  0.1882,  0.1743],
          [-0.1113, -0.0924, -0.0735,  ...,  0.1828,  0.1665,  0.1502],
          [-0.1228, -0.1105, -0.0982,  ...,  0.1634,  0.1448,  0.1261],
          ...,
          [ 0.1167,  0.1208,  0.1249,  ..., -0.1208, -0.1501, -0.1794],
          [ 0.0936,  0.1007,  0.1078,  ..., -0.1066, -0.1262, -0.1457],
          [ 0.0704,  0.0806,  0.0907,  ..., -0.0924, -0.1022, -0.1120]],

         [[-0.2531, -0.2540, -0.2548,  ...,  0.0965,  0.0247, -0.0471],
          [-0.2291, -0.2282, -0.2273,  ...,  0.1204,  0.0537, -0.0131],
          [-0.2051, -0.2025, -0.1998,  ...,  0.1444,  0.0827,  0.0210],
          ...,
          [-0.1074, -0.1194, -0.1314,  ...,  0.1365,  0.1456,  0.1548],
          [-0.1203, -0.1450, -0.1697,  ...,  0.1627,  0.1771,  0.1916],
          [-0.1332, -0.1706, -0.2080,  ...,  0.1888,  0.2086,  0.2284]],

         [[-0.0372, -0.0161,  0.0049,  ..., -0.5846, -0.5849, -0.5852],
          [-0.0632, -0.0369, 

In [None]:
# サイズは合っているはずなのになんでエラーになるのか？
# 見ている箇所が間違っていた