# ShuffleNet2 구현하기

![image.png](attachment:image.png)

In [5]:
# step1) import
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.utils.data import DataLoader


In [2]:
# step2) device
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(777)
if device == "cuda":
    torch.cuda.manual_seed_all(777)
print(device)

cpu


In [3]:
# step3) hyper-parameter
batch_size = 100
training_epochs = 10
learning_rate = 0.001

In [4]:
# step4) Dataset & Dataloader
trans = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize((0.1307), (0.2890))
])

mnist_train = dsets.MNIST(root = "../data/MNIST_data/", train = True, transform= trans,download = True)
mnist_test = dsets.MNIST(root = "../data/MNIST_data/", train = False, transform= trans,download = True)

train_loader = DataLoader(dataset= mnist_train, batch_size= batch_size, shuffle= True, drop_last= True)
test_loader = DataLoader(dataset= mnist_test, batch_size= batch_size, shuffle= True, drop_last= True)


In [None]:
# 평균-분산 구하기
mu = 0
mu_square = 0
var = 0

for X,Y in train_loader:
    mu += torch.mean(X, dim = [0,2,3])
    mu_square += torch.mean(X**2, dim = [0,2,3])
    
mu /= len(train_loader)
mu_square /= len(train_loader)
var = (mu_square - mu**2)**0.5

print(mu, var)

In [None]:
def get_mean_std(loader):
    mu = 0
    mu_square = 0
    std = 0
    for sample in loader:
        X,Y = sample

        mu += torch.mean(X, dim = [0,2,3])
        mu_square += torch.mean(X**2, dim = [0,2,3])
        std += torch.std(X, dim = [0,2,3])

    mu /= len(loader)
    mu_square /= len(loader)
    std = (mu_square - mu**2)**0.5 # E[X**2] - {E[X]}**2
    return mu, std
get_mean_std(train_loader)

# step5) 모델링

In [62]:
# stride, Repeat, output_channels
cfg = {"ShffleNetV2": [(2,1,116), # stage2
                       (1,3,116),
                       
#                        (2,1,232), # stage3
#                        (1,7,232),
                       
#                        (2,1,464), # stage4
#                        (1,3,464)
                      ]}

# make_layer
def make_layers(cfg):
    in_channel = 24 # 첫번째를 통과하게 되면
    layers = []
    for v in cfg:
        stride, repeat, out_channel = v
        inner_channel = 0
        
        for i in range(repeat):
            left, right = [], []
            if stride == 1:
                inner_channel = in_channel//2 # channel spilt
                left.append(None)

            elif stride == 2:
                inner_channel = in_channel
                left = [nn.Conv2d(inner_channel, inner_channel,kernel_size=3, stride = 2, padding=1),
                        nn.BatchNorm2d(inner_channel),
                        
                        nn.Conv2d(inner_channel, inner_channel, kernel_size=1),
                        nn.BatchNorm2d(inner_channel),
                        nn.ReLU()]

            right = [nn.Conv2d(inner_channel, inner_channel,kernel_size=1),
                     nn.BatchNorm2d(inner_channel),

                     nn.Conv2d(inner_channel, inner_channel, kernel_size=3, groups= inner_channel, stride = stride, padding = 1 if stride == 2 else 0),
                     nn.BatchNorm2d(inner_channel),

                     nn.Conv2d(inner_channel,inner_channel,kernel_size=1),
                     nn.BatchNorm2d(inner_channel),
                     nn.ReLU()]
            layers.append((left,right))
            
            # 중요함
            in_channel = out_channel
            
    return layers
        
            
t = make_layers(cfg["ShffleNetV2"])            
            


In [78]:
for i in t:
    print(i[0])
    

[Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)), BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1)), BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU()]
[None]
[None]
[None]


In [75]:
nn.Sequential(*t[0][1])

Sequential(
  (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
  (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24)
  (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
  (5): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
)

In [15]:
#step5) 모델링
class ShuffleNet2(nn.Module):
    def __init__(self, features, strides):
        super(ShuffleNet2, self).__init__()
        
        # 1*224*224 -> 24*112*112 -> 24*56*56
        self.conv1 = nn.Sequential(
            nn.Conv2d(1,24, kernel_size= 3, stride=2, padding=1),
            nn.MaxPool2d(kernel_size=3, stride=2, padding= 1),
        )
        
        self.left = []
        self.right = []
        
        for layer in features:
            self.left.append(nn.Sequential(*layer[0]))
            self.right.append(nn.Sequential(*layer[1]))
            break
                
    def forward(self,x):
        out = self.conv1(x)
        print(out.shape)
        
        
        

        
        return out
    
# test
model = ShuffleNet2().to(device)

test_data = torch.Tensor(2,1,224,224).to(device)
model(test_data)



torch.Size([2, 24, 56, 56])


tensor([[[[-0.3244, -0.3244, -0.3244,  ..., -0.3244, -0.3244, -0.3244],
          [-0.3244, -0.3244, -0.3244,  ..., -0.3244, -0.3244, -0.3244],
          [-0.3244, -0.3244, -0.3244,  ..., -0.3244, -0.3244, -0.3244],
          ...,
          [-0.3244, -0.3244, -0.3244,  ..., -0.3244, -0.3244, -0.3244],
          [-0.3244, -0.3244, -0.3244,  ..., -0.3244, -0.3244, -0.3244],
          [-0.3244, -0.3244, -0.3244,  ..., -0.3244, -0.3244, -0.3244]],

         [[-0.2204, -0.2204, -0.2204,  ..., -0.2204, -0.2204, -0.2204],
          [-0.2204, -0.2204, -0.2204,  ..., -0.2204, -0.2204, -0.2204],
          [-0.2204, -0.2204, -0.2204,  ..., -0.2204, -0.2204, -0.2204],
          ...,
          [-0.2204, -0.2204, -0.2204,  ..., -0.2204, -0.2204, -0.2204],
          [-0.2204, -0.2204, -0.2204,  ..., -0.2204, -0.2204, -0.2204],
          [-0.2204, -0.2204, -0.2204,  ..., -0.2204, -0.2204, -0.2204]],

         [[-0.0473, -0.0473, -0.0473,  ..., -0.0473, -0.0473, -0.0473],
          [-0.0473, -0.0473, -

In [20]:
x = torch.rand(2,3)

torch.cat((x,x), axis = 1)

tensor([[0.3409, 0.0289, 0.8728, 0.3409, 0.0289, 0.8728],
        [0.0638, 0.2520, 0.1085, 0.0638, 0.2520, 0.1085]])