In [1]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
import torch
torch.backends.cudnn.benchmark = True

In [2]:
# 구글 드라이브를 마운트합니다.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# 데이터셋 준비
data_dir = '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset'

In [4]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, classification_report
import matplotlib.pyplot as plt

# 데이터 로드
train_df = pd.read_csv('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/TrainData.csv')
valid_df = pd.read_csv('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/ValidData.csv')
test_df = pd.read_csv('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/TestData.csv')

# 데이터 경로 설정
image_path = "/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/images"

# Label Encoding
le = LabelEncoder()
train_df['Class'] = le.fit_transform(train_df['Class'])
valid_df['Class'] = le.transform(valid_df['Class'])
test_df['Class'] = le.transform(test_df['Class'])


In [None]:
le.classes_

array(['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC'], dtype=object)

In [5]:
class SkinDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0]) + ".jpg"
        image = Image.open(img_name)
        image = np.array(image)  # PIL 이미지를 numpy 배열로 변환
        label = self.dataframe.iloc[idx, 1]
        if self.transform:
            augmented = self.transform(image=image)  # 명명된 인수로 전달
            image = augmented["image"]
        return image, label

# 데이터 증강 정의
train_transform = A.Compose([
    A.CenterCrop(300, 300),             # 이미지의 중앙을 자릅니다
    A.Resize(128, 128),                 # 이미지를 128x128로 크기 조정합니다
    A.Rotate(limit=180, p=0.5),         # 이미지를 -180도에서 180도 사이로 랜덤하게 회전합니다
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=0, p=0.5),  # 수평, 수직 이동 및 확대/축소
    A.HorizontalFlip(p=0.5),            # 이미지를 수평으로 뒤집습니다
    A.VerticalFlip(p=0.5),              # 이미지를 수직으로 뒤집습니다
    A.RandomBrightnessContrast(p=0.2),  # 랜덤하게 밝기 및 대비 조정
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

transform = A.Compose([
    A.CenterCrop(300, 300),
    A.Resize(128, 128),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

train_dataset = SkinDataset(train_df, image_path, transform=train_transform)
valid_dataset = SkinDataset(valid_df, image_path, transform=transform)
test_dataset = SkinDataset(test_df, image_path, transform=transform)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [7]:
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
model = models.efficientnet_b3(pretrained=True)
# EfficientNet 모델의 마지막 레이어 수정
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 7)  # 7개의 클래스 출력
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20
train_losses, valid_losses = [], []
train_accuracies, valid_accuracies = [], []

Downloading: "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_rwightman-b3899882.pth
100%|██████████| 47.2M/47.2M [00:00<00:00, 148MB/s]


In [None]:
for epoch in range(num_epochs):
    model.train()
    running_loss, correct = 0.0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct.double() / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc.item())

    model.eval()
    running_loss, correct = 0.0, 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(valid_loader.dataset)
    epoch_acc = correct.double() / len(valid_loader.dataset)
    valid_losses.append(epoch_loss)
    valid_accuracies.append(epoch_acc.item())

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.4f}, Val Loss: {valid_losses[-1]:.4f}, Val Acc: {valid_accuracies[-1]:.4f}')


Downloading: "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_rwightman-b3899882.pth
100%|██████████| 47.2M/47.2M [00:00<00:00, 124MB/s]


Epoch 1/20, Train Loss: 0.8256, Train Acc: 0.7086, Val Loss: 0.8158, Val Acc: 0.7560
Epoch 2/20, Train Loss: 0.6922, Train Acc: 0.7496, Val Loss: 0.5692, Val Acc: 0.8000
Epoch 3/20, Train Loss: 0.6456, Train Acc: 0.7749, Val Loss: 0.5825, Val Acc: 0.7900
Epoch 4/20, Train Loss: 0.6008, Train Acc: 0.7854, Val Loss: 0.4883, Val Acc: 0.8250
Epoch 5/20, Train Loss: 0.5715, Train Acc: 0.7936, Val Loss: 0.4984, Val Acc: 0.8360
Epoch 6/20, Train Loss: 0.5440, Train Acc: 0.8056, Val Loss: 0.5106, Val Acc: 0.8190
Epoch 7/20, Train Loss: 0.5187, Train Acc: 0.8177, Val Loss: 0.6019, Val Acc: 0.7890
Epoch 8/20, Train Loss: 0.5102, Train Acc: 0.8145, Val Loss: 0.5290, Val Acc: 0.8260
Epoch 9/20, Train Loss: 0.4949, Train Acc: 0.8221, Val Loss: 0.4432, Val Acc: 0.8370
Epoch 10/20, Train Loss: 0.4830, Train Acc: 0.8276, Val Loss: 0.4473, Val Acc: 0.8460
Epoch 11/20, Train Loss: 0.4585, Train Acc: 0.8340, Val Loss: 0.4802, Val Acc: 0.8410
Epoch 12/20, Train Loss: 0.4345, Train Acc: 0.8409, Val Loss: 0

In [None]:
# 모델 저장
torch.save(model.state_dict(), '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_cls_V1.pth')

In [None]:
# loss, accuracy 저장
# 학습 및 검증 loss와 accuracy 저장
with open('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_metrics_V1.pkl', 'wb') as f:
    pickle.dump({
        'train_losses': train_losses,
        'valid_losses': valid_losses,
        'train_accuracies': train_accuracies,
        'valid_accuracies': valid_accuracies
    }, f)

In [None]:
# 추가 학습 진행
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss, correct = 0.0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct.double() / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc.item())

    model.eval()
    running_loss, correct = 0.0, 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(valid_loader.dataset)
    epoch_acc = correct.double() / len(valid_loader.dataset)
    valid_losses.append(epoch_loss)
    valid_accuracies.append(epoch_acc.item())

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.4f}, Val Loss: {valid_losses[-1]:.4f}, Val Acc: {valid_accuracies[-1]:.4f}')

Epoch 1/20, Train Loss: 0.3390, Train Acc: 0.8764, Val Loss: 0.4175, Val Acc: 0.8690
Epoch 2/20, Train Loss: 0.3337, Train Acc: 0.8792, Val Loss: 0.4191, Val Acc: 0.8680
Epoch 3/20, Train Loss: 0.3172, Train Acc: 0.8841, Val Loss: 0.4566, Val Acc: 0.8540
Epoch 4/20, Train Loss: 0.3064, Train Acc: 0.8854, Val Loss: 0.4494, Val Acc: 0.8560
Epoch 5/20, Train Loss: 0.2919, Train Acc: 0.8960, Val Loss: 0.4514, Val Acc: 0.8590
Epoch 6/20, Train Loss: 0.2840, Train Acc: 0.8939, Val Loss: 0.4328, Val Acc: 0.8650
Epoch 7/20, Train Loss: 0.2712, Train Acc: 0.9029, Val Loss: 0.4502, Val Acc: 0.8590
Epoch 8/20, Train Loss: 0.2827, Train Acc: 0.8979, Val Loss: 0.4554, Val Acc: 0.8610
Epoch 9/20, Train Loss: 0.2613, Train Acc: 0.9049, Val Loss: 0.4937, Val Acc: 0.8490
Epoch 10/20, Train Loss: 0.2584, Train Acc: 0.9057, Val Loss: 0.4186, Val Acc: 0.8660
Epoch 11/20, Train Loss: 0.2496, Train Acc: 0.9099, Val Loss: 0.5361, Val Acc: 0.8410
Epoch 12/20, Train Loss: 0.2418, Train Acc: 0.9115, Val Loss: 0

In [None]:
# 모델 저장
torch.save(model.state_dict(), '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_cls_V1.pth')

In [None]:
# 추가 학습 진행
num_epochs = 160
for epoch in range(num_epochs):
    model.train()
    running_loss, correct = 0.0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct.double() / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc.item())

    model.eval()
    running_loss, correct = 0.0, 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(valid_loader.dataset)
    epoch_acc = correct.double() / len(valid_loader.dataset)
    valid_losses.append(epoch_loss)
    valid_accuracies.append(epoch_acc.item())

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.4f}, Val Loss: {valid_losses[-1]:.4f}, Val Acc: {valid_accuracies[-1]:.4f}')
    # 모델 저장
    torch.save(model.state_dict(), '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_cls_V1.pth')

Epoch 1/160, Train Loss: 0.1905, Train Acc: 0.9298, Val Loss: 0.4779, Val Acc: 0.8460
Epoch 2/160, Train Loss: 0.1755, Train Acc: 0.9362, Val Loss: 0.4964, Val Acc: 0.8580
Epoch 3/160, Train Loss: 0.1967, Train Acc: 0.9308, Val Loss: 0.4770, Val Acc: 0.8570
Epoch 4/160, Train Loss: 0.1837, Train Acc: 0.9355, Val Loss: 0.4344, Val Acc: 0.8670
Epoch 5/160, Train Loss: 0.1768, Train Acc: 0.9361, Val Loss: 0.4884, Val Acc: 0.8690
Epoch 6/160, Train Loss: 0.1729, Train Acc: 0.9378, Val Loss: 0.5030, Val Acc: 0.8760
Epoch 7/160, Train Loss: 0.1643, Train Acc: 0.9414, Val Loss: 0.4995, Val Acc: 0.8650
Epoch 8/160, Train Loss: 0.1669, Train Acc: 0.9418, Val Loss: 0.4877, Val Acc: 0.8650
Epoch 9/160, Train Loss: 0.1600, Train Acc: 0.9422, Val Loss: 0.5126, Val Acc: 0.8480
Epoch 10/160, Train Loss: 0.1609, Train Acc: 0.9419, Val Loss: 0.4648, Val Acc: 0.8870
Epoch 11/160, Train Loss: 0.1537, Train Acc: 0.9464, Val Loss: 0.4413, Val Acc: 0.8690
Epoch 12/160, Train Loss: 0.1520, Train Acc: 0.9476,

In [9]:
# 2. 저장된 모델 가중치 로드
model.load_state_dict(torch.load('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_cls_V1.pth'))

<All keys matched successfully>

In [None]:
# 추가 학습 진행
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss, correct = 0.0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct.double() / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc.item())

    model.eval()
    running_loss, correct = 0.0, 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(valid_loader.dataset)
    epoch_acc = correct.double() / len(valid_loader.dataset)
    valid_losses.append(epoch_loss)
    valid_accuracies.append(epoch_acc.item())

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.4f}, Val Loss: {valid_losses[-1]:.4f}, Val Acc: {valid_accuracies[-1]:.4f}')
    # 모델 저장
    torch.save(model.state_dict(), '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_cls_V1.pth')

In [None]:
!pip install pickle-mixin

Collecting pickle-mixin
  Downloading pickle-mixin-1.0.2.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pickle-mixin
  Building wheel for pickle-mixin (setup.py) ... [?25l[?25hdone
  Created wheel for pickle-mixin: filename=pickle_mixin-1.0.2-py3-none-any.whl size=5991 sha256=1104a0e67a3fcee4da99ff6913f2548a1711ca130a7dad4125cc51af320caae5
  Stored in directory: /root/.cache/pip/wheels/3e/c6/e9/d1b0a34e1efc6c3ec9c086623972c6de6317faddb2af0a619c
Successfully built pickle-mixin
Installing collected packages: pickle-mixin
Successfully installed pickle-mixin-1.0.2


In [None]:
import pickle

In [None]:
# loss, accuracy 저장
# 학습 및 검증 loss와 accuracy 저장
with open('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_metrics_V1.pkl', 'wb') as f:
    pickle.dump({
        'train_losses': train_losses,
        'valid_losses': valid_losses,
        'train_accuracies': train_accuracies,
        'valid_accuracies': valid_accuracies
    }, f)

In [None]:
# 학습 및 검증 결과 불러오기
with open('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB3_metrics_V1.pkl', 'rb') as f:
    metrics = pickle.load(f)
    train_losses = metrics['train_losses']
    valid_losses = metrics['valid_losses']
    train_accuracies = metrics['train_accuracies']
    valid_accuracies = metrics['valid_accuracies']

In [10]:
# 성능 평가
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
print('Confusion Matrix:')
print(cm)

# Class별 Accuracy
report = classification_report(all_labels, all_preds, target_names=le.classes_)
print('Classification Report:')
print(report)

Confusion Matrix:
[[ 23   3   5   0   0   2   0]
 [  1  46   2   0   0   3   0]
 [  9   2  73   0   3  23   0]
 [  0   1   1   8   1   1   0]
 [  2   2   6   1  59  42   0]
 [  0   2   3   2   4 659   1]
 [  0   0   1   0   0   0  14]]
Classification Report:
              precision    recall  f1-score   support

       AKIEC       0.66      0.70      0.68        33
         BCC       0.82      0.88      0.85        52
         BKL       0.80      0.66      0.73       110
          DF       0.73      0.67      0.70        12
         MEL       0.88      0.53      0.66       112
          NV       0.90      0.98      0.94       671
        VASC       0.93      0.93      0.93        15

    accuracy                           0.88      1005
   macro avg       0.82      0.76      0.78      1005
weighted avg       0.88      0.88      0.87      1005

