In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
from tqdm import tqdm
import os
from PIL import Image
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
path = '/Users/ppangppang/Documents/dev/deeplearning_with_pytorch/data/'

## Dataset custom

### RGB 평균 추출

- 모든 폴더마다 파일 읽어오기
- 모든 파일들 rgb mean, std 구하기

In [2]:
# class_ = os.listdir(path+'/train/')
# try:
#     class_.remove('.DS_Store')
# except:
#     pass

# files=[]
# imgs=[]
# for cls in tqdm(class_):
#     file = os.listdir(path+f'/train/{cls}')
#     files.extend(file)

#     for f in tqdm(files):
#         img = cv2.imread(path+f'/train/{cls}/{f}')
#         img_r = (img[:,:,0]/255.).mean()
#         img_g = (img[:,:,1]/255.).mean()
#         img_b = (img[:,:,2]/255.).mean()
#         imgs.append(img)
#         files=[]

# files_cnt = len(files)

In [3]:
## rgb평균 확인해서 normalize하는 기능도 해보기

transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    # transforms.RandomHorizontalFlip(), # 데이터 증진(augmentation)
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 정규화(normalization)
])

transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_datasets = datasets.ImageFolder(os.path.join(path, 'train'), transforms_train)
test_datasets = datasets.ImageFolder(os.path.join(path, 'test'), transforms_test)

train_dataloader = DataLoader(train_datasets, batch_size=4, shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(test_datasets, batch_size=4, shuffle=False, num_workers=4)

print('학습 데이터셋 크기:', len(train_datasets))
print('테스트 데이터셋 크기:', len(test_datasets))

class_names = train_datasets.classes
print('클래스 수:', len(class_names))
print('클래스:', class_names)

학습 데이터셋 크기: 600
테스트 데이터셋 크기: 150
클래스 수: 3
클래스: ['galaxy', 'glass', 'iphone']


## CNN

In [25]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1) ## 32 -> 28
        self.pool1 = nn.MaxPool2d(2,2) ## 28->14
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5, stride=1) ## 14-> 10
        self.pool2 = nn.MaxPool2d(2,2) ## 10->5
        self.fc1 = nn.Linear(12*5*5, 50)
        self.fc2 = nn.Linear(50,3)

    def forward(self,x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))

        x = x.view(-1, 12*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x)) 

        return x

print(CNN())

CNN(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=300, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=3, bias=True)
)


In [26]:
cnn = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=1e-3)

In [27]:
loss_=[]
n = len(train_dataloader)

for epoch in tqdm(range(5)):

    train_loss = 0.0

    for i, data in enumerate(tqdm(train_dataloader, 0)):

        inputs, labels = data[0], data[1]

        optimizer.zero_grad()

        outputs = cnn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    loss_.append(train_loss / n)
    print(f'epoch{epoch+1} : loss {train_loss/n}')

100%|██████████| 150/150 [00:21<00:00,  6.89it/s] 
 20%|██        | 1/5 [00:21<01:27, 21.77s/it]

epoch1 : loss 1.007309015194575


100%|██████████| 150/150 [00:21<00:00,  6.87it/s] 
 40%|████      | 2/5 [00:43<01:05, 21.81s/it]

epoch2 : loss 0.8364158415794373


100%|██████████| 150/150 [00:21<00:00,  6.86it/s] 
 60%|██████    | 3/5 [01:05<00:43, 21.84s/it]

epoch3 : loss 0.7463068726658821


100%|██████████| 150/150 [00:21<00:00,  6.84it/s] 
 80%|████████  | 4/5 [01:27<00:21, 21.87s/it]

epoch4 : loss 0.6837194767594338


100%|██████████| 150/150 [00:21<00:00,  6.90it/s] 
100%|██████████| 5/5 [01:49<00:00, 21.83s/it]

epoch5 : loss 0.6128644086917241





In [29]:
corret = 0
total = 0
with torch.no_grad():
    for data in tqdm(test_dataloader):
        images, labels = data[0], data[1]
        outputs = cnn(images)
        _, pred = torch.max(outputs.data, 1)
        total += labels.size(0)
        corret += (pred==labels).sum().item()

print(f'test_image : {len(test_dataloader)}, acc : {100*corret/total}')

100%|██████████| 38/38 [00:21<00:00,  1.77it/s]

test_image : 38, acc : 65.33333333333333





## VGG

In [4]:
class VGG19(nn.Module):
    def __init__(self):
        super(VGG19, self).__init__()
        self.conv = nn.Sequential(
            #3 224 128
            nn.Conv2d(3, 64, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(64, 64, 3, padding=1), nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),

            #64 112 64
            nn.Conv2d(64, 128, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(128, 128, 3, padding=1), nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),

            #128 56 32
            nn.Conv2d(128, 256, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(256, 256, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(256, 256, 3, padding=1), nn.LeakyReLU(0.2),            
            nn.MaxPool2d(2, 2),

            #256 28 16
            nn.Conv2d(256, 512, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1), nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),

            #512 14 8
            nn.Conv2d(512, 512, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1), nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1), nn.LeakyReLU(0.2),            
            nn.MaxPool2d(2, 2)
        )
        #512 7 4

        self.avg_pool = nn.AvgPool2d(7)
        #512 1 1
        self.classifier = nn.Linear(512, 3)

    def forward(self, x):
        features = self.conv(x)
        x = self.avg_pool(features)
        x = x.view(features.size(0), -1)
        x = self.classifier(x)
        return x, features


In [5]:
vgg19 = VGG19()
param = list(vgg19.parameters())
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg19.parameters(),lr=1e5)

In [6]:
for epoch in tqdm(range(3)):
    running_loss = 0.0

    if(epoch>0):
        vgg19 = VGG19()
        vgg19.load_state_dict(torch.load(save_path))

    for i, data in tqdm(enumerate(train_dataloader, 0)):
        inputs, labels = data
        optimizer.zero_grad()
        outputs,f = vgg19(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        if(loss.item() > 1000):
            print(f'loss : {loss.item()}')
            for param in vgg19.parameters():
                print(param.data)
        
        running_loss += loss.item()
        if i%50 == 49:
            print(f'epoch{epoch+1}, loss{running_loss/50}')
            running_loss = 0.0
    save_path = '/Users/ppangppang/Documents/dev/deeplearning_with_pytorch/'
    torch.save(vgg19.state_dict(), save_path)

print('Done')

  0%|          | 0/3 [00:00<?, ?it/s]

epoch1, lossnan


81it [14:13, 10.54s/it]


In [None]:
corret = 0
total = 0
with torch.no_grad():
    for data in tqdm(test_dataloader):
        images, labels = data[0], data[1]
        outputs = vgg19(images)
        _, pred = torch.max(outputs.data, 1)
        total += labels.size(0)
        corret += (pred==labels).sum().item()

print(f'test_image : {len(test_dataloader)}, acc : {100*corret/total}')