In [1]:
import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

cuda:0


In [2]:
import pandas as pd
import numpy as np
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import torch.optim as optim


# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch.nn as nn

In [15]:
# 데이터 로딩 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): csv 파일의 경로.
            transform (callable, optional): 샘플에 적용될 Optional transform.
        """
        self.df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df['img_path'].iloc[idx]
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        target = torch.tensor([0.]).float()
        return image, target

# 이미지 전처리 및 임베딩
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_data = CustomDataset(csv_file='../data/train.csv', transform=transform)
train_loader = DataLoader(train_data, batch_size=16, shuffle=False)

test_data = CustomDataset(csv_file='../data/test.csv', transform=transform)
test_loader = DataLoader(test_data, batch_size=16 ,shuffle=False)

In [42]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd

class CNNAutoencoder(nn.Module):
    def __init__(self):
        super(CNNAutoencoder, self).__init__()

        # 인코더 정의
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Flatten(),
            nn.Linear(512 * 32 * 32, 1024),  # 32*32는 입력 이미지 크기에 따라 조정해야 할 수 있음
            nn.ReLU(True)
        )

        # 디코더 정의
        self.decoder = nn.Sequential(
            nn.Linear(1024, 512 * 32 * 32),
            nn.ReLU(True),
            nn.Unflatten(1, (512, 32, 32)),  # Unflatten 레이어로 차원 복원
            nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


In [43]:
from torch.optim.lr_scheduler import StepLR

# 모델, 손실 함수 및 최적화 알고리즘 설정
autoencoder = CNNAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.01)
# 스케줄러 설정
scheduler = StepLR(optimizer, step_size=1, gamma=0.9)  # 10 에폭마다 학습률을 0.1배로 감소

In [44]:
# 학습 루프 정의
def train_model(dataloader, model, criterion, optimizer, scheduler, epochs=30):
    model.train()  # 모델을 학습 모드로 설정
    for epoch in range(epochs):
        running_loss = 0.0
        for data in dataloader:
            inputs, _ = data
            inputs = inputs.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        scheduler.step()  # 에폭마다 스케줄러를 업데이트

        epoch_loss = running_loss / len(dataloader.dataset)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, LR: {scheduler.get_last_lr()[0]}')

In [45]:
# 학습 루프 실행
train_model(train_loader, autoencoder, criterion, optimizer, scheduler, epochs=50)

Epoch 1/50, Loss: 0.7288, LR: 0.009000000000000001
Epoch 2/50, Loss: 0.2396, LR: 0.008100000000000001
Epoch 3/50, Loss: 0.1937, LR: 0.007290000000000001
Epoch 4/50, Loss: 0.1818, LR: 0.006561000000000002
Epoch 5/50, Loss: 0.1749, LR: 0.005904900000000002
Epoch 6/50, Loss: 0.1679, LR: 0.005314410000000002
Epoch 7/50, Loss: 0.1610, LR: 0.004782969000000002
Epoch 8/50, Loss: 0.1551, LR: 0.004304672100000002
Epoch 9/50, Loss: 0.1495, LR: 0.003874204890000002
Epoch 10/50, Loss: 0.1443, LR: 0.003486784401000002
Epoch 11/50, Loss: 0.1399, LR: 0.003138105960900002
Epoch 12/50, Loss: 0.1352, LR: 0.0028242953648100018
Epoch 13/50, Loss: 0.1300, LR: 0.0025418658283290017


KeyboardInterrupt: 

In [47]:
def anomaly_detection(test_loader, model, threshold):
    model.eval()  # 모델을 평가 모드로 설정
    anomalies = []  # 이상 탐지 결과를 저장할 리스트

    with torch.no_grad():  # 그래디언트 계산을 비활성화
        for data in test_loader:
            images, _ = data
            images = images.to(device)
            outputs = model(images)
            loss = nn.functional.mse_loss(outputs, images, reduction='none')
            loss = loss.view(loss.size(0), -1).mean(1)  # 각 이미지에 대한 평균 MSE 손실 계산
            anomalies.extend(loss > threshold)  # 임계값을 기준으로 이상 여부 판단

    anomalies = torch.stack(anomalies)
    return anomalies

# 임계값 설정 (실제 값은 데이터에 따라 조정 필요)
threshold = 0.1400

# 이상 탐지 실행
anomalies = anomaly_detection(test_loader, autoencoder, threshold)

# 이상 탐지 결과 출력
for i, is_anomaly in enumerate(anomalies):
    print(f'Image {i}: {"Anomaly" if is_anomaly else "Normal"}')

Image 0: Anomaly
Image 1: Normal
Image 2: Normal
Image 3: Anomaly
Image 4: Normal
Image 5: Normal
Image 6: Anomaly
Image 7: Normal
Image 8: Normal
Image 9: Anomaly
Image 10: Normal
Image 11: Anomaly
Image 12: Normal
Image 13: Normal
Image 14: Normal
Image 15: Anomaly
Image 16: Anomaly
Image 17: Anomaly
Image 18: Anomaly
Image 19: Normal
Image 20: Normal
Image 21: Normal
Image 22: Anomaly
Image 23: Normal
Image 24: Normal
Image 25: Normal
Image 26: Anomaly
Image 27: Anomaly
Image 28: Anomaly
Image 29: Anomaly
Image 30: Anomaly
Image 31: Normal
Image 32: Normal
Image 33: Anomaly
Image 34: Anomaly
Image 35: Anomaly
Image 36: Anomaly
Image 37: Normal
Image 38: Anomaly
Image 39: Anomaly
Image 40: Normal
Image 41: Anomaly
Image 42: Normal
Image 43: Anomaly
Image 44: Normal
Image 45: Anomaly
Image 46: Normal
Image 47: Normal
Image 48: Normal
Image 49: Anomaly
Image 50: Normal
Image 51: Anomaly
Image 52: Normal
Image 53: Anomaly
Image 54: Normal
Image 55: Anomaly
Image 56: Normal
Image 57: Nor

In [53]:
anomalies = anomalies.int().cpu()

In [54]:
submission = pd.read_csv("../submission/sample_submission.csv")
submission['label'] = anomalies

In [55]:
submission

Unnamed: 0,id,label
0,TEST_000,1
1,TEST_001,0
2,TEST_002,0
3,TEST_003,1
4,TEST_004,0
...,...,...
95,TEST_095,0
96,TEST_096,1
97,TEST_097,1
98,TEST_098,1


In [56]:
submission.to_csv("../submission/submission_5.csv",index = False)

In [47]:
submit.to_csv("../submission/submission_4.csv",index = False)