In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import copy
import random

import cv2
import torch
import numpy as np
from torch import nn
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from ipywidgets import interact

random_seed = 2022

random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
def list_image_files(data_dir, sub_dir):
    image_format = ["jpeg", "jpg", "png"]

    image_files = []
    images_dir = os.path.join(data_dir, sub_dir)
    for file_path in os.listdir(images_dir):
        if file_path.split(".")[-1] in image_format:
            image_files.append(os.path.join(sub_dir, file_path))
    return image_files

In [None]:
data_dir = "/content/drive/MyDrive/lettuce/train/"

Bacterial_list = list_image_files(data_dir, "Bacterial")
fungal_list = list_image_files(data_dir, "fungal")
healthy_list = list_image_files(data_dir, "healthy")

In [None]:
def get_RGB_image(data_dir, file_name):
    image_file = os.path.join(data_dir, file_name)
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

In [None]:
min_num_files = min(len(Bacterial_list), len(fungal_list), len(healthy_list))

@interact(index=(0, min_num_files-1))
def show_samples(index=0):
    Bacterial_image = get_RGB_image(data_dir, Bacterial_list[index])
    fungal_image = get_RGB_image(data_dir, fungal_list[index])
    healthy_image = get_RGB_image(data_dir, healthy_list[index])

    plt.figure(figsize=(12, 8))
    plt.subplot(131)
    plt.title("Bacterial")
    plt.imshow(Bacterial_image)
    plt.subplot(132)
    plt.title("fungal")
    plt.imshow(fungal_image)
    plt.subplot(133)
    plt.title("healthy")
    plt.imshow(healthy_image)
    plt.tight_layout()

In [None]:
train_data_dir = "/content/drive/MyDrive/lettuce/train/"
class_list = ["Bacterial", "fungal", "healthy"]

class Chest_dataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        Bacterial = list_image_files(data_dir, "Bacterial")
        fungal = list_image_files(data_dir, "fungal")
        healthy = list_image_files(data_dir, "healthy")


        self.files_path = Bacterial + fungal + healthy
        self.transform = transform

    def __len__(self):
        return len(self.files_path)

    def __getitem__(self, index):
        image_file = os.path.join(self.data_dir, self.files_path[index])
        image = cv2.imread(image_file)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # target = class_list.index(self.files_path[index].split(os.sep)[-2])

        target = class_list.index(self.files_path[index].split(os.sep)[0])

        if self.transform:
            image = self.transform(image)
            target = torch.Tensor([target]).long()

        return {"image":image, "target":target}

dset = Chest_dataset(train_data_dir)

In [None]:
index = 150
plt.title(class_list[dset[index]["target"]])
plt.imshow(dset[index]["image"])

In [None]:
transformer = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

train_dset = Chest_dataset(train_data_dir, transformer)
index = 200
image = train_dset[index]["image"]
label = train_dset[index]["target"]

print(image.shape, label)

In [None]:
def build_dataloader(train_data_dir, val_data_dir):
    dataloaders = {}
    train_dset = Chest_dataset(train_data_dir, transformer)
    dataloaders["train"] = DataLoader(train_dset, batch_size=4, shuffle=True, drop_last=True)
    val_dset = Chest_dataset(val_data_dir, transformer)
    dataloaders["val"] = DataLoader(val_dset, batch_size=1, shuffle=False, drop_last=False)
    return dataloaders

In [None]:
train_data_dir = "/content/drive/MyDrive/lettuce/train/"
val_data_dir = "/content/drive/MyDrive/lettuce/test/"
dataloaders = build_dataloader(train_data_dir, val_data_dir)

In [None]:
model = models.vgg19(pretrained=True)

from torchsummary import summary
summary(model, (3, 224, 224), batch_size=1, device="cpu")


In [None]:
model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
model.classifier = nn.Sequential(
    nn.Flatten(),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(256, 3),  #len(class_list)
    nn.Sigmoid()
)

In [None]:
def build_vgg19_based_model(device_name='c'):
    device = torch.device(device_name)
    model = models.vgg19(pretrained=True)
    model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
    model.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(512, 256),
        nn.ReLU(),
        nn.Linear(256, 3), #len(class_list)
        nn.Softmax(dim=1)
    )
    return model.to(device)

In [None]:
model = build_vgg19_based_model(device_name='cpu')

from torchsummary import summary
summary(model, (3, 224, 224), batch_size=1, device="cpu")

In [None]:
loss_func = nn.CrossEntropyLoss(reduction="mean")
optimizer = torch.optim.SGD(model.parameters(), lr= 1E-3, momentum=0.9)

@torch.no_grad()
def get_accuracy(image, target, model):
    batch_size = image.shape[0]
    prediction = model(image)
    _, pred_label = torch.max(prediction, dim=1)
    is_correct = (pred_label == target)
    return is_correct.cpu().numpy().sum() / batch_size

In [None]:
device = torch.device("cpu")

train_data_dir = "/content/drive/MyDrive/lettuce/train/"
val_data_dir = "/content/drive/MyDrive/lettuce/test/"

dataloaders = build_dataloader(train_data_dir, val_data_dir)
model = build_vgg19_based_model()
loss_func = nn.CrossEntropyLoss(reduction="mean")
optimizer = torch.optim.SGD(model.parameters(), lr= 1E-3, momentum=0.9)

In [None]:
import torch
import torch.optim as optim
import os
import copy

# 모델을 저장하는 함수
def save_best_model(model_state, model_name, save_dir="/content/drive/MyDrive/lettuce"):
    os.makedirs(save_dir, exist_ok=True)
    torch.save(model_state, os.path.join(save_dir, model_name))

# 모델 학습 함수
def train_one_epoch(dataloaders, model, optimizer, loss_func, device):
    losses = {}
    accuracies = {}
    for phase in ["train", "val"]:
        running_loss = 0.0
        running_correct = 0

        if phase == "train":
            model.train()
        else:
            model.eval()

        for index, batch in enumerate(dataloaders[phase]):
            image = batch["image"].to(device)
            target = batch["target"].squeeze(1).to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == "train"):
                prediction = model(image)
                loss = loss_func(prediction, target)

                if phase == "train":
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item()
            running_correct += get_accuracy(image, target, model)

            if phase == "train":
                if index % 10 == 0:
                    print(f"{index}/{len(dataloaders[phase])} - Running Loss: {loss.item()}")

        losses[phase] = running_loss / len(dataloaders[phase])
        accuracies[phase] = running_correct / len(dataloaders[phase])
    return losses, accuracies

# 기본 설정
device = torch.device("cpu")
train_data_dir = "/content/drive/MyDrive/lettuce/train/"
val_data_dir = "/content/drive/MyDrive/lettuce/test/"

dataloaders = build_dataloader(train_data_dir, val_data_dir)
model = build_vgg19_based_model()
loss_func = nn.CrossEntropyLoss(reduction="mean")
optimizer = optim.SGD(model.parameters(), lr=1E-3, momentum=0.9)

num_epochs = 2
best_acc = 0.0

# 학습 루프
for epoch in range(num_epochs):
    losses, accuracies = train_one_epoch(dataloaders, model, optimizer, loss_func, device)
    print(f"{epoch+1}/{num_epochs}-Train Loss: {losses['train']}, Val Loss: {losses['val']}")
    print(f"{epoch+1}/{num_epochs}-Train Acc: {accuracies['train']}, Val Acc: {accuracies['val']}")

    # 최고의 검증 정확도를 가진 모델 저장
    if accuracies["val"] > best_acc:
        best_acc = accuracies["val"]
        best_model = copy.deepcopy(model.state_dict())

# 최고의 모델 저장
save_best_model(best_model, "best_model.pth")
print("Best model saved successfully.")


In [None]:
import os

print("Current working directory:", os.getcwd())
print("Is directory exists:", os.path.isdir("/content/drive/MyDrive/tomatos/save_best_model"))


In [None]:
plt.figure(figsize=(6, 5))
plt.subplot(211)
plt.plot(train_loss, label="train")
plt.plot(val_loss,  label="val")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.grid("on")
plt.legend()
plt.subplot(212)
plt.plot(train_accuracy, label="train")
plt.plot(val_accuracy, label="val")
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.grid("on")
plt.legend()
plt.tight_layout()

In [None]:
@torch.no_grad()
def model_predict(image, model):
    tensor_image = preprocess_image(image)
    prediction = model(tensor_image)

    _, pred_label = torch.max(prediction.detach(), dim=1)
    pred_label = pred_label.squeeze(0)
    return pred_label.item()

In [None]:
ckpt = torch.load("/content/drive/MyDrive/lettuce/best_model2.pth")

model = build_vgg19_based_model()
model.load_state_dict(ckpt)
model.eval()