<a href="https://colab.research.google.com/github/JiNYouNG2222/pattern-recognition/blob/main/EMNIST_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transfroms
import pandas as pd
from collections import OrderedDict
from IPython.display import clear_output

In [3]:
learning_rate = 0.001
batch_size = 100
num_classes = 10
epochs = 5

In [4]:
train_set = torchvision.datasets.EMNIST(
    root = './data/EMNIST',
    split = 'letters',
    train = True,
    download = True,
    transform = transfroms.Compose([
        transfroms.ToTensor() # 데이터를 0에서 255까지 있는 값을 0에서 1사이 값으로 변환
    ])
)
test_set = torchvision.datasets.EMNIST(
    root = './data/EMNIST',
    split = 'letters',
    train = False,
    download = True,
    transform = transfroms.Compose([
        transfroms.ToTensor() # 데이터를 0에서 255까지 있는 값을 0에서 1사이 값으로 변환
    ])
)

print(train_set, test_set)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)

Downloading https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip to ./data/EMNIST/EMNIST/raw/gzip.zip


100%|██████████| 562M/562M [01:01<00:00, 9.10MB/s]


Extracting ./data/EMNIST/EMNIST/raw/gzip.zip to ./data/EMNIST/EMNIST/raw
Dataset EMNIST
    Number of datapoints: 124800
    Root location: ./data/EMNIST
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           ) Dataset EMNIST
    Number of datapoints: 20800
    Root location: ./data/EMNIST
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
           )


In [None]:
import torch
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device('cpu')

# layer 2개.
즉 origin CNN모델



In [None]:
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=10, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=100, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.dropout = nn.Dropout()
        self.fc1 = nn.Linear(in_features=100*7*7, out_features=1000)
        self.fc2 = nn.Linear(in_features=1000, out_features=26)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.reshape(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

# layer 2개 더 추가.
즉 4층 레이어 CNN모델
* layer3: Conv2d에서 100개의 input channel과 200개의 output channel을 사용.
* layer4: Conv2d에서 200개의 input channel과 400개의 output channel을 사용.


Feature Map 크기 계산:

eMNIST 입력 크기가 (1, 28, 28)일 경우, 각 layer의 MaxPooling으로 인해 feature map 크기가 28 -> 14 -> 7 -> 3 -> 1로 감소합니다.
따라서 최종적으로 400 * 1 * 1을 fc1의 입력 크기로 설정.


유지된 구조:

Dropout과 Fully Connected Layer (fc1, fc2)는 기존과 동일하지만, in_features를 최종 feature map 크기에 맞게 조정

In [6]:
import torch
import torch.nn as nn

class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=10, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=100, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # 추가된 layer3
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=100, out_channels=200, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # 추가된 layer4
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=200, out_channels=400, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.dropout = nn.Dropout()
        # layer4 이후의 크기에 맞춰 in_features를 조정
        self.fc1 = nn.Linear(in_features=400*1*1, out_features=1000)
        self.fc2 = nn.Linear(in_features=1000, out_features=26)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.reshape(x.size(0), -1)  # Flatten
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [7]:
learning_rate = 0.001
batch_size = 128
num_classes = 26
epochs = 10

In [8]:
net = NeuralNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [9]:
pd_results = []

for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        out = net(images)
        loss = criterion(out, labels-1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total = labels.size(0)
        preds = torch.max(out.data, 1)[1]
        correct = (preds==labels-1).sum().item()

        if (i+1)%200==0:
            results = OrderedDict()
            results['epoch'] = epoch+1
            results['idx'] = i+1
            results['loss'] = loss.item()
            results['accuracy'] = 100.*correct/total
            pd_results.append(results)
            df = pd.DataFrame.from_dict(pd_results, orient='columns')

            clear_output(wait=True)
            display(df)

Unnamed: 0,epoch,idx,loss,accuracy
0,1,200,0.748262,80.0
1,1,400,0.665744,76.0
2,1,600,0.454343,87.0
3,1,800,0.381153,90.0
4,1,1000,0.617954,82.0
5,1,1200,0.262312,88.0
6,2,200,0.204612,92.0
7,2,400,0.262162,89.0
8,2,600,0.190049,92.0
9,2,800,0.370978,88.0


In [13]:
# 모델 초기화
model = NeuralNet()

# 옵티마이저 초기화
optimizer = optim.SGD(model.parameters(), lr=0.001)

# 모델의 state_dict 출력
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# 옵티마이저의 state_dict 출력
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
layer1.0.weight 	 torch.Size([10, 1, 3, 3])
layer1.0.bias 	 torch.Size([10])
layer2.0.weight 	 torch.Size([100, 10, 3, 3])
layer2.0.bias 	 torch.Size([100])
layer3.0.weight 	 torch.Size([200, 100, 3, 3])
layer3.0.bias 	 torch.Size([200])
layer4.0.weight 	 torch.Size([400, 200, 3, 3])
layer4.0.bias 	 torch.Size([400])
fc1.weight 	 torch.Size([1000, 400])
fc1.bias 	 torch.Size([1000])
fc2.weight 	 torch.Size([26, 1000])
fc2.bias 	 torch.Size([26])
Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]}]


In [15]:
torch.save(model.state_dict(), 'emnist_4layer_ver2.pt')

In [1]:
net.eval()

correct, total = 0, 0
with torch.no_grad():
    for i, (images, labels) in enumerate(test_loader):
        out = net(images)
        preds = torch.max(out.data, 1)[1]
        correct += (preds==labels-1).sum().item()
        total += len(labels)

    print("Test accuracy: ", 100.*correct/total)

NameError: name 'net' is not defined

origin (2 layer)

learning_rate = 0.001
batch_size = 100
num_classes = 26
epochs = 3

**Test accuracy: 91.55**


---


develop1 (4 layer ver1)  
learning_rate = 0.001
batch_size = 100
num_classes = 26
epochs = 3

**Test accuracy: 92.92**


---


develop2 (4 layer ver2)  
learning_rate = 0.001
batch_size = 128
num_classes = 26
epochs = 10

**Test accuracy: 94.05**