In [1]:
!pip install torch torchvision torchaudio
!pip install albumentations opencv-python-headless

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [1]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
import torch
torch.backends.cudnn.benchmark = True

In [2]:
# 구글 드라이브를 마운트합니다.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# 데이터셋 준비
data_dir = '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset'

In [4]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, classification_report
import matplotlib.pyplot as plt

# 데이터 로드
train_df = pd.read_csv('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/TrainData.csv')
valid_df = pd.read_csv('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/ValidData.csv')
test_df = pd.read_csv('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/TestData.csv')

# 데이터 경로 설정
image_path = "/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/dataset/images"

# Label Encoding
le = LabelEncoder()
train_df['Class'] = le.fit_transform(train_df['Class'])
valid_df['Class'] = le.transform(valid_df['Class'])
test_df['Class'] = le.transform(test_df['Class'])


In [5]:
le.classes_

array(['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC'], dtype=object)

In [6]:
class SkinDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0]) + ".jpg"
        image = Image.open(img_name)
        label = self.dataframe.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

transform = transforms.Compose([
    transforms.CenterCrop((375, 375)),  # 중앙을 기준으로 375x375 크기로 자르기
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = SkinDataset(train_df, image_path, transform=transform)
valid_dataset = SkinDataset(valid_df, image_path, transform=transform)
test_dataset = SkinDataset(test_df, image_path, transform=transform)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Using device: cpu


In [8]:
# 1. 모델 아키텍처 정의
model = models.efficientnet_b0(pretrained=False)  # pretrained=False로 설정
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 7)  # 7개의 클래스 출력
model = model.to(device)



In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20
train_losses, valid_losses = [], []
train_accuracies, valid_accuracies = [], []

for epoch in range(num_epochs):
    model.train()
    running_loss, correct = 0.0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct.double() / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc.item())

    model.eval()
    running_loss, correct = 0.0, 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(valid_loader.dataset)
    epoch_acc = correct.double() / len(valid_loader.dataset)
    valid_losses.append(epoch_loss)
    valid_accuracies.append(epoch_acc.item())

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.4f}, Val Loss: {valid_losses[-1]:.4f}, Val Acc: {valid_accuracies[-1]:.4f}')

# 모델 저장
torch.save(model.state_dict(), '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB0_cls_V1.pth')

Using device: cpu


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 48.3MB/s]


Epoch 1/20, Train Loss: 0.7997, Train Acc: 0.7179, Val Loss: 0.6709, Val Acc: 0.7600
Epoch 2/20, Train Loss: 0.6004, Train Acc: 0.7794, Val Loss: 0.5511, Val Acc: 0.8110
Epoch 3/20, Train Loss: 0.5167, Train Acc: 0.8116, Val Loss: 0.5214, Val Acc: 0.8120
Epoch 4/20, Train Loss: 0.4501, Train Acc: 0.8350, Val Loss: 0.5185, Val Acc: 0.8110
Epoch 5/20, Train Loss: 0.3740, Train Acc: 0.8624, Val Loss: 0.7277, Val Acc: 0.7950
Epoch 6/20, Train Loss: 0.3123, Train Acc: 0.8873, Val Loss: 0.5179, Val Acc: 0.8460
Epoch 7/20, Train Loss: 0.2596, Train Acc: 0.9054, Val Loss: 0.5116, Val Acc: 0.8350
Epoch 8/20, Train Loss: 0.2005, Train Acc: 0.9275, Val Loss: 0.7020, Val Acc: 0.8240
Epoch 9/20, Train Loss: 0.1736, Train Acc: 0.9362, Val Loss: 0.5698, Val Acc: 0.8380
Epoch 10/20, Train Loss: 0.1429, Train Acc: 0.9479, Val Loss: 0.5746, Val Acc: 0.8310
Epoch 11/20, Train Loss: 0.1341, Train Acc: 0.9517, Val Loss: 0.6684, Val Acc: 0.8250
Epoch 12/20, Train Loss: 0.1279, Train Acc: 0.9558, Val Loss: 0

In [9]:
# 2. 저장된 모델 가중치 로드
model.load_state_dict(torch.load('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB0_cls_V1.pth'))

<All keys matched successfully>

In [10]:
# 성능 평가
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
print('Confusion Matrix:')
print(cm)

# Class별 Accuracy
report = classification_report(all_labels, all_preds, target_names=le.classes_)
print('Classification Report:')
print(report)

Confusion Matrix:
[[ 17   2  12   0   1   1   0]
 [  7  33  12   0   0   0   0]
 [  2   3  81   0   4  20   0]
 [  2   0   0   7   2   1   0]
 [  2   0  18   0  35  57   0]
 [  0   6  34   1   9 620   1]
 [  0   2   1   1   0   2   9]]
Classification Report:
              precision    recall  f1-score   support

       AKIEC       0.57      0.52      0.54        33
         BCC       0.72      0.63      0.67        52
         BKL       0.51      0.74      0.60       110
          DF       0.78      0.58      0.67        12
         MEL       0.69      0.31      0.43       112
          NV       0.88      0.92      0.90       671
        VASC       0.90      0.60      0.72        15

    accuracy                           0.80      1005
   macro avg       0.72      0.62      0.65      1005
weighted avg       0.80      0.80      0.79      1005



### GPU 학습

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [28]:
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
model = models.efficientnet_b0(pretrained=True)
# EfficientNet 모델의 마지막 레이어 수정
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 7)  # 7개의 클래스 출력
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20
train_losses, valid_losses = [], []
train_accuracies, valid_accuracies = [], []

for epoch in range(num_epochs):
    model.train()
    running_loss, correct = 0.0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct.double() / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc.item())

    model.eval()
    running_loss, correct = 0.0, 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(valid_loader.dataset)
    epoch_acc = correct.double() / len(valid_loader.dataset)
    valid_losses.append(epoch_loss)
    valid_accuracies.append(epoch_acc.item())

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.4f}, Val Loss: {valid_losses[-1]:.4f}, Val Acc: {valid_accuracies[-1]:.4f}')


Epoch 1/25, Train Loss: 0.8098, Train Acc: 0.7187, Val Loss: 0.5965, Val Acc: 0.7880
Epoch 2/25, Train Loss: 0.6089, Train Acc: 0.7782, Val Loss: 0.5520, Val Acc: 0.7940
Epoch 3/25, Train Loss: 0.5255, Train Acc: 0.8137, Val Loss: 0.5109, Val Acc: 0.8130
Epoch 4/25, Train Loss: 0.4495, Train Acc: 0.8365, Val Loss: 0.5278, Val Acc: 0.8050
Epoch 5/25, Train Loss: 0.3890, Train Acc: 0.8603, Val Loss: 0.5324, Val Acc: 0.8190
Epoch 6/25, Train Loss: 0.3181, Train Acc: 0.8876, Val Loss: 0.6095, Val Acc: 0.7830
Epoch 7/25, Train Loss: 0.2620, Train Acc: 0.9064, Val Loss: 0.5731, Val Acc: 0.8310
Epoch 8/25, Train Loss: 0.2103, Train Acc: 0.9238, Val Loss: 0.5572, Val Acc: 0.8200
Epoch 9/25, Train Loss: 0.1849, Train Acc: 0.9318, Val Loss: 0.6806, Val Acc: 0.8280
Epoch 10/25, Train Loss: 0.1590, Train Acc: 0.9437, Val Loss: 0.6282, Val Acc: 0.8380
Epoch 11/25, Train Loss: 0.1308, Train Acc: 0.9521, Val Loss: 0.5792, Val Acc: 0.8220
Epoch 12/25, Train Loss: 0.1168, Train Acc: 0.9593, Val Loss: 0

In [None]:
# 모델 저장
torch.save(model.state_dict(), '/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB0_cls_V1.pth')

In [None]:
!pip install pickle-mixin

Collecting pickle-mixin
  Downloading pickle-mixin-1.0.2.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pickle-mixin
  Building wheel for pickle-mixin (setup.py) ... [?25l[?25hdone
  Created wheel for pickle-mixin: filename=pickle_mixin-1.0.2-py3-none-any.whl size=5991 sha256=1104a0e67a3fcee4da99ff6913f2548a1711ca130a7dad4125cc51af320caae5
  Stored in directory: /root/.cache/pip/wheels/3e/c6/e9/d1b0a34e1efc6c3ec9c086623972c6de6317faddb2af0a619c
Successfully built pickle-mixin
Installing collected packages: pickle-mixin
Successfully installed pickle-mixin-1.0.2


In [None]:
import pickle

In [None]:
# loss, accuracy 저장
# 학습 및 검증 loss와 accuracy 저장
with open('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB0_metrics_V1.pkl', 'wb') as f:
    pickle.dump({
        'train_losses': train_losses,
        'valid_losses': valid_losses,
        'train_accuracies': train_accuracies,
        'valid_accuracies': valid_accuracies
    }, f)

In [None]:
# 학습 및 검증 결과 불러오기
with open('/content/drive/MyDrive/클라우드컴퓨팅 텀프로젝트/model/EfficientNetB0_metrics_V1.pkl', 'rb') as f:
    metrics = pickle.load(f)
    train_losses = metrics['train_losses']
    valid_losses = metrics['valid_losses']
    train_accuracies = metrics['train_accuracies']
    valid_accuracies = metrics['valid_accuracies']