In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=5, stride=1, padding=2, padding_mode='zeros')
        self.LRN1 = nn.LocalResponseNorm(size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2, padding_mode='zeros')
        self.LRN2 = nn.LocalResponseNorm(size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1, padding_mode='zeros')
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1, padding_mode='zeros')
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, padding_mode='zeros')
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.fc1 = nn.Linear(256 * 7 * 7, 4096)
        self.Drop1 = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(4096, 2)
        self.Drop2 = nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.pool1(self.LRN1(F.relu(self.conv1(x),inplace=True)))
        x = self.pool2(self.LRN2(F.relu(self.conv2(x),inplace=True)))
        x = F.relu(self.conv3(x),inplace=True)
        x = F.relu(self.conv4(x),inplace=True)
        x = self.pool3(F.relu(self.conv5(x),inplace=True))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x),inplace=True)
        x = self.Drop1(x)
        x = self.fc2(x)
        x = self.Drop2(x)
        return x


In [3]:
def train(net, epoch, optimizer, criterion, train_dataloader):
    print('[ Train epoch: %d ]' % epoch)
    net.train() # 모델을 학습 모드로 설정
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad() # 기울기(gradient) 초기화

        outputs = net(inputs) # 모델 입력하여 결과 계산
        loss = criterion(outputs, targets) # 손실(loss) 값 계산
        loss.backward() # 역전파를 통해 기울기(gradient) 계산

        optimizer.step() # 계산된 기울기를 이용해 모델 가중치 업데이트
        train_loss += loss.item()
        _, predicted = outputs.max(1)

        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    print('Train accuarcy:', 100. * correct / total)
    print('Train average loss:', train_loss / total)
    return (100. * correct / total, train_loss / total)


def validate(net, epoch, val_dataloader):
    print('[ Validation epoch: %d ]' % epoch)
    net.eval() # 모델을 평가 모드로 설정
    val_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(val_dataloader):
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = net(inputs) # 모델 입력하여 결과 계산
        val_loss += criterion(outputs, targets).item()
        _, predicted = outputs.max(1)

        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    print('Accuarcy:', 100. * correct / total)
    print('Average loss:', val_loss / total)
    return (100. * correct / total, val_loss / total)

In [4]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): csv 파일의 경로.
            transform (callable, optional): 샘플에 적용될 Optional transform.
        """
        self.df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df['img_path'].iloc[idx]
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        return image

In [5]:
path = os.getcwd()
train_path = os.path.join(path,"train_folder")
val_path = os.path.join(path,"val_folder")
test_path = os.path.join(path,"test")

In [6]:
# 데이터셋 및 데이터로더 준비
# 이미지 전처리 및 임베딩
transforms_train = transforms.Compose([
    transforms.RandomResizedCrop((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 정규화(normalization)
])

transforms_val = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transforms_test = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


train_dataset = datasets.ImageFolder(train_path, transforms_train)
val_dataset = datasets.ImageFolder(val_path, transforms_val)
test_dataset = datasets.ImageFolder(test_path, transforms_test)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=2)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [7]:
# 모델, 손실 함수, 최적화기 초기화
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
file_name = "AlexNet.pt"
num_epochs = 10
learning_rate = 0.001
torch.cuda.manual_seed(42)
net = AlexNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0002)

train_result = []
val_result = []

start_time = time.time() # 시작 시간
for i in range(num_epochs):
    train_acc, train_loss = train(net, i, optimizer, criterion, train_dataloader) # 학습(training)
    val_acc, val_loss = validate(net, i + 1, val_dataloader) # 검증(validation)

    # 학습된 모델 저장하기
    state = {
        'net': net.state_dict()
    }
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    torch.save(state, './checkpoint/' + file_name)
    print(f'Model saved! (time elapsed: {time.time() - start_time})')

    # 현재 epoch에서의 정확도(accuracy)와 손실(loss) 값 저장하기
    train_result.append((train_acc, train_loss))
    val_result.append((val_acc, val_loss))


[ Train epoch: 0 ]
Train accuarcy: 78.1094527363184
Train average loss: 0.023701516549978683
[ Validation epoch: 1 ]
Accuarcy: 100.0
Average loss: 0.05501251916090647
Model saved! (time elapsed: 4.493528366088867)
[ Train epoch: 1 ]
Train accuarcy: 99.50248756218906
Train average loss: 0.022245697714203032
[ Validation epoch: 2 ]
Accuarcy: 100.0
Average loss: 0.04975422223409017
Model saved! (time elapsed: 7.2047834396362305)
[ Train epoch: 2 ]
Train accuarcy: 100.0
Train average loss: 0.01965516864956908
[ Validation epoch: 3 ]
Accuarcy: 100.0
Average loss: 0.04171905914942423
Model saved! (time elapsed: 9.845531463623047)
[ Train epoch: 3 ]
Train accuarcy: 100.0
Train average loss: 0.01648885545445912
[ Validation epoch: 4 ]
Accuarcy: 100.0
Average loss: 0.027362192670504253
Model saved! (time elapsed: 12.446391820907593)
[ Train epoch: 4 ]
Train accuarcy: 100.0
Train average loss: 0.010287872223711726
[ Validation epoch: 5 ]
Accuarcy: 100.0
Average loss: 0.008057402446866035
Model s

In [8]:
# 테스트 데이터로 평가
test_pred = []
threshold = 0.99
with torch.no_grad():
    for inputs, targets in test_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs) # 모델 입력하여 결과 계산
        probabilities = F.softmax(outputs, dim=1)
        max_probability, _ = torch.max(probabilities, dim=1)
for prob in max_probability:
    if prob < threshold:
        test_pred.append(1)
    else:
        test_pred.append(0)

In [9]:
test_pred

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [10]:
submit = pd.read_csv('./sample_submission.csv')
submit['label'] = test_pred
submit.head()

Unnamed: 0,id,label
0,TEST_000,0
1,TEST_001,0
2,TEST_002,0
3,TEST_003,0
4,TEST_004,0


In [11]:
submit.to_csv('./save/submit11.csv', index=False)