In [144]:
import torch # 파이토치 기본 라이브러리

# torchvision : 데이터셋, 모델 아키텍처, 컴퓨터 비전의 이미지 변환 기능 제공
from torchvision import datasets # torchvision에서 제공하는 데이터셋
from torchvision import transforms # 이미지 변환기능을 제공하는 패키지

# torch.utils.data : 파이토치 데이터 로딩 유틸리티
from torch.utils.data import DataLoader # 모델 훈련에 사용할 수 있는 미니 배치 구성하고
                                        # 매 epoch마다 데이터를 샘플링, 병렬처리 등의 일을 해주는 함수

from torch.utils.data import random_split

import numpy as np
import matplotlib.pyplot as plt

from torch.utils.tensorboard import SummaryWriter
from copy import deepcopy

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [145]:
!nvidia-smi

Tue Apr 18 07:48:16 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    33W / 250W |   1473MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [146]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [14]:
from google.colab import drive
drive.mount('/content/drive')
!cp '/content/drive/MyDrive/pj3/open.zip' './'
!unzip -q open.zip -d open/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
/kaggle/input/minipj3/train

## transform

In [163]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [151]:
transform = transforms.Compose([A.Resize([224, 224]),A.HorizontalFlip(p=0.3),A.VerticalFlip(p=0.3), transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [164]:
transform = A.Compose([A.Resize(400, 400), A.Normalize(),
                    A.HorizontalFlip(p=0.3), A.VerticalFlip(p=0.3), ToTensorV2()])

In [11]:
import pandas as pd

## 데이터셋 만들기

In [12]:
from torch.utils.data import Dataset
import glob
from PIL import Image # Image.open(path)
test_dict={'몰딩수정':0, '석고수정':1, '훼손':2, '피스':3, '녹오염':4, '가구수정':5, '오염':6, '들뜸':7, '곰팡이':8,
       '창틀,문틀수정':9, '울음':10, '오타공':11, '반점':12, '면불량':13, '터짐':14, '틈새과다':15, '걸레받이수정':16,
       '이음부불량':17, '꼬임':18}
class remodelDataset(Dataset):
    def __init__(self, root, transform):
        self.filepaths = glob.glob(root + '*/*.png')
        self.transform = transform

    def __len__(self):  # len(MyDataset)
        return len(self.filepaths)

    def __getitem__(self, index): # MyDataset[index]

        # (1) image 준비
        image_filepath = self.filepaths[index]
        image = Image.open(image_filepath)         

        
        transformed_image = self.transform(image) # Resize -> To Tensor

        # (2) label 준비
        dir_label = image_filepath.split('/')[-2]
        label_=test_dict[dir_label]
        return transformed_image ,label_
    

In [13]:
trainset = remodelDataset(root ='/kaggle/input/minipj3/train/', transform=transform)

In [24]:
all_img_list = glob.glob('/kaggle/input/minipj3/train/*/*.png')

In [25]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x : str(x).split('/')[-2])

In [26]:
from sklearn import preprocessing

In [27]:
le = preprocessing.LabelEncoder()
df['label_num'] = le.fit_transform(df['label'])

In [28]:
df

Unnamed: 0,img_path,label,label_num
0,/kaggle/input/minipj3/train/가구수정/11.png,가구수정,0
1,/kaggle/input/minipj3/train/가구수정/4.png,가구수정,0
2,/kaggle/input/minipj3/train/가구수정/9.png,가구수정,0
3,/kaggle/input/minipj3/train/가구수정/1.png,가구수정,0
4,/kaggle/input/minipj3/train/가구수정/2.png,가구수정,0
...,...,...,...
3452,/kaggle/input/minipj3/train/몰딩수정/19.png,몰딩수정,7
3453,/kaggle/input/minipj3/train/몰딩수정/110.png,몰딩수정,7
3454,/kaggle/input/minipj3/train/몰딩수정/26.png,몰딩수정,7
3455,/kaggle/input/minipj3/train/몰딩수정/79.png,몰딩수정,7


## train  vadlid

In [29]:
from sklearn.model_selection import train_test_split

train_indices, valid_indices = train_test_split(
                            range(len(df)), # X의 index
                             # y
                            stratify=df.label_num, # target의 비율이 train과 valid에 그대로 반영되게
                            test_size= 0.2, random_state=42)

In [30]:
from torch.utils.data import Subset
train_set = Subset(trainset, train_indices)
valid_set = Subset(trainset, valid_indices)

In [31]:
batch_size = 16 # 100 -> 16
# dataloader = DataLoader(데이터셋, 배치사이즈, 셔플여부.....)
trainloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) # 훈련용 50000개의 데이터를 100개씩 준비
validloader = DataLoader(valid_set, batch_size=batch_size, shuffle=False) # 검증용 10000개의 데이터를 100개씩 준비


In [32]:
print(type(trainloader), len(trainloader))
print(type(validloader), len(validloader))

<class 'torch.utils.data.dataloader.DataLoader'> 173
<class 'torch.utils.data.dataloader.DataLoader'> 44


## new 전처리

In [165]:
filepath = glob.glob('/kaggle/input/minipj3/train/*/*.png')

In [166]:
df = pd.DataFrame(columns=['image','label'])
df['image'] = filepath
df['label']=df['image'].str.split('/').str[-2]

df['label'].nunique()

19

In [167]:
from sklearn.preprocessing import LabelEncoder

In [168]:
encoding = LabelEncoder()
df['label_encoding'] = encoding.fit_transform(df['label'])
df['label_encoding'].nunique()

19

In [169]:
train_set, valid_set , _,_ = train_test_split(df,df['label_encoding'],test_size=0.2, stratify=df['label_encoding'],random_state=42)

In [177]:
class MyData(Dataset):
    def __init__(self, image_filepath, label_filepath, transform=None):
        self.image_filepath = image_filepath
        self.label_filepath = label_filepath
        self.transform = transform

    def __len__(self):
        return len(self.image_filepath)

    def __getitem__(self,index):

        image = self.image_filepath[index]
        image = Image.open(image) # .convert('RGB')
        image = np.asarray(image)
        # image = cv2.imread(image)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            image = self.transform(image=image)['image']

        if self.label_filepath is not None:
            label = self.label_filepath[index]
            return image, label
        else:
            return image

In [178]:
trainset = MyData(train_set['image'].values, train_set['label_encoding'].values,transform=transform)
validset = MyData(valid_set['image'].values, valid_set['label_encoding'].values,transform=transform)

In [179]:
print(type(trainset), len(trainset))
print(type(validset), len(validset))

<class '__main__.MyData'> 2765
<class '__main__.MyData'> 692


In [180]:
batch_size = 32
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
validloader = DataLoader(validset, batch_size=batch_size, shuffle=False)

print(type(trainloader),len(trainloader))
print(type(validloader), len(validloader))

<class 'torch.utils.data.dataloader.DataLoader'> 87
<class 'torch.utils.data.dataloader.DataLoader'> 22


In [181]:
images, labels = next(iter(trainloader))
images.size(),labels.size()

(torch.Size([32, 3, 400, 400]), torch.Size([32]))

In [182]:
import torch.nn as nn # 파이토치에서 제공하는 다양한 계층 (Linear Layer, ....)
import torch.optim as optim # 옵티마이저 (경사하강법...)
import torch.nn.functional as F 

In [183]:
train_iter = iter(trainloader)
images, labels = next(train_iter)
images.shape

torch.Size([32, 3, 400, 400])

In [184]:
import torch.nn as nn # 파이토치에서 제공하는 다양한 계층 (Linear Layer, ....)
import torch.optim as optim # 옵티마이저 (경사하강법...)
import torch.nn.functional as F

In [185]:
import torchvision.models as models
#https://github.com/pytorch/vision/blob/6db1569c89094cf23f3bc41f79275c45e9fcb3f3/torchvision/models/googlenet.py

In [187]:
model = models.googlenet(pretrained=True, progress=True)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


In [188]:
model

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [189]:
for parameter in model.parameters():
  print(parameter.requires_grad)

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [190]:
for parameter in model.parameters():
  parameter.requires_grad = False # 학습이 안되게 고정

for parameter in model.fc.parameters():
  parameter.requires_grad = True

In [191]:
model.fc=nn.Linear(in_features=1024, out_features=19, bias=True)

In [42]:
from torchsummary import summary

ModuleNotFoundError: No module named 'torchsummary'

In [192]:
model.to(device)
model

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [38]:
summary(model, (3, 32, 32))

----------------------------------------------------------------

        Layer (type)               Output Shape         Param #


            Conv2d-1           [-1, 64, 16, 16]           9,408

       BatchNorm2d-2           [-1, 64, 16, 16]             128

       BasicConv2d-3           [-1, 64, 16, 16]               0

         MaxPool2d-4             [-1, 64, 8, 8]               0

            Conv2d-5             [-1, 64, 8, 8]           4,096

       BatchNorm2d-6             [-1, 64, 8, 8]             128

       BasicConv2d-7             [-1, 64, 8, 8]               0

            Conv2d-8            [-1, 192, 8, 8]         110,592

       BatchNorm2d-9            [-1, 192, 8, 8]             384

      BasicConv2d-10            [-1, 192, 8, 8]               0

        MaxPool2d-11            [-1, 192, 4, 4]               0

           Conv2d-12             [-1, 64, 4, 4]          12,288

      BatchNorm2d-13             [-1, 64, 4, 4]             128

      BasicConv2d-14   

In [44]:
def validate(model, validloader, loss_fn):
  total = 0   
  correct = 0
  valid_loss = 0
  valid_accuracy = 0

  # 전방향 예측을 구할 때는 gradient가 필요가 없음음
  with torch.no_grad():
    for images, labels in validloader: # 이터레이터로부터 next()가 호출되며 미니배치 100개씩을 반환(images, labels)      
      # images, labels : (torch.Size([16, 3, 32, 32]), torch.Size([16]))
      # 0. Data를 GPU로 보내기
      images, labels = images.to(device), labels.to(device)

      # 1. 입력 데이터 준비
      # not Flatten !!
      # images.resize_(images.size()[0], 784)

      # 2. 전방향(Forward) 예측
      logit = model(images) # 예측 점수
      _, preds = torch.max(logit, 1) # 배치에 대한 최종 예측
      # preds = logit.max(dim=1)[1] 
      correct += int((preds == labels).sum()) # 배치 중 맞은 것의 개수가 correct에 누적
      total += labels.shape[0] # 배치 사이즈만큼씩 total에 누적

      loss = loss_fn(logit, labels)
      valid_loss += loss.item() # tensor에서 값을 꺼내와서, 배치의 loss 평균값을 valid_loss에 누적

    valid_accuracy = correct / total
  
  return valid_loss, valid_accuracy

In [196]:
learning_rate = 0.001
# 손실함수
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저(최적화함수, 예:경사하강법)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# 규제의 강도 설정 weight_decay
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.001)

# Learning Rate Schedule
# https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html

# 모니터링하고 있는 값(예:valid_loss)의 최소값(min) 또는 최대값(max) patience 기간동안 줄어들지 않을 때(OnPlateau) lr에 factor(0.1)를 곱해주는 전략
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, verbose=True)

In [197]:
writer = SummaryWriter()

def train_loop(model, trainloader, loss_fn, epochs, optimizer):  
  steps = 0
  steps_per_epoch = len(trainloader) 
  min_loss = 1000000
  max_accuracy = 0
  trigger = 0
  patience = 7 

  for epoch in range(epochs):
    model.train() # 훈련 모드
    train_loss = 0
    for images, labels in trainloader: # 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)
      steps += 1
      # images, labels : (torch.Size([16, 3, 32, 32]), torch.Size([16]))
      # 0. Data를 GPU로 보내기
      images, labels = images.to(device), labels.to(device)

      # 1. 입력 데이터 준비
      # not Flatten !!
      # images.resize_(images.shape[0], 784) 

      # 2. 전방향(forward) 예측
      predict = model(images) # 예측 점수
      
      loss = loss_fn(predict, labels) # 예측 점수와 정답을 CrossEntropyLoss에 넣어 Loss값 반환

      # 3. 역방향(backward) 오차(Gradient) 전파
      optimizer.zero_grad() # Gradient가 누적되지 않게 하기 위해
      loss.backward() # 모델파리미터들의 Gradient 전파

      # 4. 경사 하강법으로 모델 파라미터 업데이트
      optimizer.step() # W <- W -lr*Gradient

      train_loss += loss.item()
      if (steps % steps_per_epoch) == 0 : 
        model.eval() # 평가 모드 : 평가에서 사용하지 않을 계층(배치 정규화, 드롭아웃)들을 수행하지 않게 하기 위해서
        valid_loss, valid_accuracy = validate(model, validloader, loss_fn)

        # tensorboard 시각화를 위한 로그 이벤트 등록
        writer.add_scalar('Train Loss', train_loss/len(trainloader), epoch+1)
        writer.add_scalar('Valid Loss', valid_loss/len(validloader), epoch+1)
        writer.add_scalars('Train Loss and Valid Loss',
                          {'Train' : train_loss/len(trainloader),
                            'Valid' : valid_loss/len(validloader)}, epoch+1)
        writer.add_scalar('Valid Accuracy', valid_accuracy, epoch+1)
        # -------------------------------------------

        print('Epoch : {}/{}.......'.format(epoch+1, epochs),            
              'Train Loss : {:.3f}'.format(train_loss/len(trainloader)), 
              'Valid Loss : {:.3f}'.format(valid_loss/len(validloader)), 
              'Valid Accuracy : {:.3f}'.format(valid_accuracy)            
              )
        
        # Best model 저장    
        # option 1 : valid_loss 모니터링
        # if valid_loss < min_loss: # 바로 이전 epoch의 loss보다 작으면 저장하기
        #   min_loss = valid_loss
        #   best_model_state = deepcopy(model.state_dict())          
        #   torch.save(best_model_state, 'best_checkpoint.pth')     
        
        # option 2 : valid_accuracy 모니터링      
        if valid_accuracy > max_accuracy : # 바로 이전 epoch의 accuracy보다 크면 저장하기
          max_accuracy = valid_accuracy
          best_model_state = deepcopy(model.state_dict())          
          torch.save(best_model_state, 'best_checkpoint.pth')  
        # -------------------------------------------

        # Early Stopping (조기 종료)
        if valid_loss > min_loss: # valid_loss가 min_loss를 갱신하지 못하면
          trigger += 1
          print('trigger : ', trigger)
          if trigger > patience:
            print('Early Stopping !!!')
            print('Training loop is finished !!')
            writer.flush()   
            return
        else:
          trigger = 0
          min_loss = valid_loss
        # -------------------------------------------

        # Learning Rate Scheduler
        scheduler.step(valid_loss)
        # -------------------------------------------
        
  writer.flush()
  return  

In [198]:
epochs = 55
%time train_loop(model, trainloader, loss_fn, epochs, optimizer)
writer.close()

Epoch : 1/55....... Train Loss : 1.903 Valid Loss : 1.624 Valid Accuracy : 0.512
Epoch : 2/55....... Train Loss : 1.526 Valid Loss : 1.389 Valid Accuracy : 0.590
Epoch : 3/55....... Train Loss : 1.363 Valid Loss : 1.250 Valid Accuracy : 0.639
Epoch : 4/55....... Train Loss : 1.246 Valid Loss : 1.192 Valid Accuracy : 0.639
Epoch : 5/55....... Train Loss : 1.176 Valid Loss : 1.100 Valid Accuracy : 0.655
Epoch : 6/55....... Train Loss : 1.125 Valid Loss : 1.056 Valid Accuracy : 0.669
Epoch : 7/55....... Train Loss : 1.095 Valid Loss : 1.034 Valid Accuracy : 0.682
Epoch : 8/55....... Train Loss : 1.044 Valid Loss : 1.027 Valid Accuracy : 0.673
Epoch : 9/55....... Train Loss : 1.018 Valid Loss : 0.989 Valid Accuracy : 0.679
Epoch : 10/55....... Train Loss : 0.986 Valid Loss : 0.976 Valid Accuracy : 0.694
Epoch : 11/55....... Train Loss : 0.971 Valid Loss : 0.943 Valid Accuracy : 0.694
Epoch : 12/55....... Train Loss : 0.953 Valid Loss : 0.971 Valid Accuracy : 0.699
trigger :  1
Epoch : 13/5

## 세번째 

learning rate=0.001
batchsize = 32
epochs=55

In [124]:
learning_rate = 0.001
# 손실함수
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저(최적화함수, 예:경사하강법)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# 규제의 강도 설정 weight_decay
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.001)

# Learning Rate Schedule
# https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html

# 모니터링하고 있는 값(예:valid_loss)의 최소값(min) 또는 최대값(max) patience 기간동안 줄어들지 않을 때(OnPlateau) lr에 factor(0.1)를 곱해주는 전략
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, verbose=True)

In [48]:
batch_size = 32 # 100 -> 16
# dataloader = DataLoader(데이터셋, 배치사이즈, 셔플여부.....)
trainloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) # 훈련용 50000개의 데이터를 100개씩 준비
validloader = DataLoader(valid_set, batch_size=batch_size, shuffle=False)

In [129]:
epochs = 55
%time train_loop(model, trainloader, loss_fn, epochs, optimizer)
writer.close()

Epoch : 1/55....... Train Loss : 2.104 Valid Loss : 1.851 Valid Accuracy : 0.471
Epoch : 2/55....... Train Loss : 1.841 Valid Loss : 1.646 Valid Accuracy : 0.513
Epoch : 3/55....... Train Loss : 1.636 Valid Loss : 1.490 Valid Accuracy : 0.540
Epoch : 4/55....... Train Loss : 1.474 Valid Loss : 1.403 Valid Accuracy : 0.582
Epoch : 5/55....... Train Loss : 1.369 Valid Loss : 1.293 Valid Accuracy : 0.610
Epoch : 6/55....... Train Loss : 1.263 Valid Loss : 1.249 Valid Accuracy : 0.624
Epoch : 7/55....... Train Loss : 1.178 Valid Loss : 1.185 Valid Accuracy : 0.630
Epoch : 8/55....... Train Loss : 1.135 Valid Loss : 1.140 Valid Accuracy : 0.639
Epoch : 9/55....... Train Loss : 1.087 Valid Loss : 1.119 Valid Accuracy : 0.642
Epoch : 10/55....... Train Loss : 1.047 Valid Loss : 1.073 Valid Accuracy : 0.663
Epoch : 11/55....... Train Loss : 0.986 Valid Loss : 1.070 Valid Accuracy : 0.656
Epoch : 12/55....... Train Loss : 0.948 Valid Loss : 1.055 Valid Accuracy : 0.679
Epoch : 13/55....... Trai

1.
Epoch : 27/55....... Train Loss : 0.641 Valid Loss : 0.952 Valid Accuracy : 0.684

learning rate=0.001
batchsize = 16
epochs=55


2.
learning rate=0.00001
batchsize = 16
epochs=55
Epoch : 54/55....... Train Loss : 1.491 Valid Loss : 1.509 Valid Accuracy : 0.536


3.
learning rate=0.001
batchsize = 32
epochs=55

위세가지는 전처리 transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

손실함수: nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [130]:
class MyData(Dataset):
    def __init__(self, image_filepath, label_filepath, transform=None):
        self.image_filepath = image_filepath
        self.label_filepath = label_filepath
        self.transform = transform

    def __len__(self):
        return len(self.image_filepath)

    def __getitem__(self,index):

        image = self.image_filepath[index]
        image = Image.open(image) # .convert('RGB')
         # 알버트 이미지 변환을 하기 위한 작업 

        if self.transform is not None: # 트랜스폼 처리가 존재하면 
            image = self.transform(image)

        if self.label_filepath is not None: # 해당 데이터에 파일 패스가 존재하면 레이블도 함께 받아오기
            label = self.label_filepath[index]
            return image, label
        else:
            return image

In [199]:
test_set = glob.glob('/kaggle/input/minipj3/test/*.png')
test_set.sort()

In [200]:
testset = MyData(test_set, label_filepath= None,transform=transform)

In [201]:
test_set[0]

'/kaggle/input/minipj3/test/000.png'

In [202]:
testloader = DataLoader(testset,batch_size=792, shuffle=False)
images = next(iter(testloader))

In [203]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in next(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs.unsqueeze(0))
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    
    preds = le.inverse_transform(preds)
    return preds

In [204]:
preds = inference(model, testloader, device)

In [206]:
test = pd.read_csv('/kaggle/input/minipj3/test.csv')

In [208]:
submit = pd.read_csv('/kaggle/input/minipj3/sample_submission.csv')

In [209]:
submit['label'] = preds

In [210]:
submit

Unnamed: 0,id,label
0,TEST_000,훼손
1,TEST_001,훼손
2,TEST_002,피스
3,TEST_003,훼손
4,TEST_004,오염
...,...,...
787,TEST_787,꼬임
788,TEST_788,훼손
789,TEST_789,훼손
790,TEST_790,오염


In [211]:
submit['label'].nunique()


15

In [212]:
submit['label'].value_counts()

훼손         456
오염         152
꼬임          51
걸레받이수정      23
곰팡이         22
터짐          21
오타공         17
몰딩수정        16
피스          12
면불량          8
들뜸           4
석고수정         3
창틀,문틀수정      3
가구수정         2
이음부불량        2
Name: label, dtype: int64

In [213]:
submit.to_csv('hyeji_third_sub.csv', index=False)

1.
Epoch : 27/55....... Train Loss : 0.641 Valid Loss : 0.952 Valid Accuracy : 0.684

learning rate=0.001
batchsize = 16
epochs=55

transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


2.
learning rate=0.00001
batchsize = 16
epochs=55
Epoch : 54/55....... 

Train Loss : 1.491 Valid Loss : 1.509 Valid Accuracy : 0.536


3.
learning rate=0.001
batchsize = 32
epochs=55

Epoch : 23/55....... Train Loss : 0.657 Valid Loss : 0.900 Valid Accuracy : 0.698