# Freshwater Fish Disease Detection

- Nama : Rizky Maulana Saputra
- NIM : L200220230

## 1. Business Understanding

## 2. Data Understanding

## 3. Data Preparation

In [1]:
!pip install plotly torch torchvision numpy pandas tqdm pillow gdown



In [3]:
import numpy as np
from collections import defaultdict
import torch
import plotly.express as px
import plotly.figure_factory as ff
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
from torchvision import models
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split, Subset
from torchvision.transforms import v2
import torch.nn as nn
import torch.optim as optim
from google.colab import drive

#### Local Dataset

In [None]:
data_dir = '../dataset/Fish Disease/Train'
test_dir = "../dataset/Fish Disease/Test"

#### Gdrive Dataset

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
train_dir = '/content/drive/MyDrive/Dataset/Fish Disease/Train'
test_dir = '/content/drive/MyDrive/Dataset/Fish Disease/Test'

### Augmentation

In [6]:
data = datasets.ImageFolder(
    root=train_dir,
    transform=None
)
targets = np.array(data.targets)
class_indices = defaultdict(list)

for idx, label in enumerate(targets):
    class_indices[label].append(idx)

train_idx, val_idx = [], []

for label, idxs in class_indices.items():
    np.random.shuffle(idxs)
    train_idx.extend(idxs[:175])
    val_idx.extend(idxs[175:250])

In [7]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )

])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


train_dataset = Subset(
    datasets.ImageFolder(train_dir, transform=train_transform),
    train_idx
)

val_dataset = Subset(
    datasets.ImageFolder(train_dir, transform=val_transform),
    val_idx
)

train_load = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_load = DataLoader(val_dataset, batch_size=32, shuffle=False)

## 4. Modeling

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = 6
eff_model = models.efficientnet_v2_s(weights='IMAGENET1K_V1')
eff_model.classifier[1] = nn.Linear(eff_model.classifier[1].in_features, num_classes)
eff_model = eff_model.to(device)


shuffle_model = models.shufflenet_v2_x1_5(weights='IMAGENET1K_V1')
shuffle_model.fc = nn.Linear(shuffle_model.fc.in_features, num_classes)
shuffle_model = shuffle_model.to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth


100%|██████████| 82.7M/82.7M [00:00<00:00, 196MB/s]


Downloading: "https://download.pytorch.org/models/shufflenetv2_x1_5-3c479a10.pth" to /root/.cache/torch/hub/checkpoints/shufflenetv2_x1_5-3c479a10.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 23.6MB/s]


In [9]:
print(torch.version.cuda)

12.6


In [10]:
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0.0, save_path='best_model.pt'):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.save_path = save_path

    def __call__(self, val_loss, model):
        # Set first loss
        if self.best_loss is None:
            self.best_loss = val_loss
            torch.save(model.state_dict(), self.save_path)
            return

        # Check no improvement
        if val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            print(f"No improvement ({self.counter}/{self.patience})")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            # Improvement
            self.best_loss = val_loss
            torch.save(model.state_dict(), self.save_path)
            print("Validation improved. Model saved.")
            self.counter = 0


In [11]:
def train_model(model, train_loader, val_loader, epochs=10, lr=1e-5, patience=3, model_name='best_model.pt'):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    # early_stopper = EarlyStopping(patience=patience, save_path=f'../model/{model_name}')
    early_stopper = EarlyStopping(patience=patience, save_path=f'model/{model_name}')



    history = {
        "train_loss": [],
        "val_loss": [],
        "train_acc": [],
        "val_acc": [],
    }

    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0

        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = 100 * correct / total
        train_loss = total_loss / len(train_loader)

        # Validation
        model.eval()
        val_correct, val_total, val_loss = 0, 0, 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_acc = 100 * val_correct / val_total
        val_loss = val_loss / len(val_loader)

        print(f"Epoch [{epoch+1}/{epochs}] "
              f"Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}% "
              f"| Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["train_acc"].append(train_acc)
        history["val_acc"].append(val_acc)

        # Early stopping check
        early_stopper(val_loss, model)
        if early_stopper.early_stop:
            print("Early stopping activated. Training stopped.")
            break

    # Load best model
    # model.load_state_dict(torch.load(f'../model/{model_name}'))
    model.load_state_dict(torch.load(f'model/{model_name}'))

    return model, history


In [12]:
def plot_training_plotly(history):
    epochs = list(range(1, len(history["train_loss"]) + 1))

    df = pd.DataFrame({
        "Epoch": epochs,
        "Train Loss": history["train_loss"],
        "Val Loss": history["val_loss"],
        "Train Acc": history["train_acc"],
        "Val Acc": history["val_acc"]
    })

    fig_loss = px.line(df, x="Epoch", y=["Train Loss", "Val Loss"],
                       title="Training vs Validation Loss",
                       markers=True)
    fig_loss.show()

    fig_acc = px.line(df, x="Epoch", y=["Train Acc", "Val Acc"],
                      title="Training vs Validation Accuracy",
                      markers=True)
    fig_acc.show()

In [13]:
s_model, history= train_model(shuffle_model, train_load, val_load, epochs=200, model_name='shuffle_model.pt')
plot_training_plotly(history)

Epoch [1/200] Train Acc: 19.24% | Val Acc: 27.11% | Train Loss: 1.7894 | Val Loss: 1.7640
Epoch [2/200] Train Acc: 26.48% | Val Acc: 31.33% | Train Loss: 1.7573 | Val Loss: 1.7358
Validation improved. Model saved.
Epoch [3/200] Train Acc: 32.57% | Val Acc: 35.11% | Train Loss: 1.7096 | Val Loss: 1.7072
Validation improved. Model saved.
Epoch [4/200] Train Acc: 38.29% | Val Acc: 39.33% | Train Loss: 1.6827 | Val Loss: 1.6753
Validation improved. Model saved.
Epoch [5/200] Train Acc: 43.24% | Val Acc: 44.89% | Train Loss: 1.6503 | Val Loss: 1.6434
Validation improved. Model saved.
Epoch [6/200] Train Acc: 48.57% | Val Acc: 48.44% | Train Loss: 1.6135 | Val Loss: 1.6086
Validation improved. Model saved.
Epoch [7/200] Train Acc: 51.90% | Val Acc: 54.67% | Train Loss: 1.5718 | Val Loss: 1.5749
Validation improved. Model saved.
Epoch [8/200] Train Acc: 54.86% | Val Acc: 56.22% | Train Loss: 1.5452 | Val Loss: 1.5432
Validation improved. Model saved.
Epoch [9/200] Train Acc: 58.19% | Val Acc:

In [14]:
e_model, history = train_model(eff_model, train_load, val_load, epochs=200, model_name='eff_model.pt')
plot_training_plotly(history)

Epoch [1/200] Train Acc: 23.33% | Val Acc: 26.67% | Train Loss: 1.7738 | Val Loss: 1.7398
Epoch [2/200] Train Acc: 31.62% | Val Acc: 36.44% | Train Loss: 1.7015 | Val Loss: 1.6786
Validation improved. Model saved.
Epoch [3/200] Train Acc: 40.86% | Val Acc: 47.11% | Train Loss: 1.6382 | Val Loss: 1.6046
Validation improved. Model saved.
Epoch [4/200] Train Acc: 47.05% | Val Acc: 51.33% | Train Loss: 1.5511 | Val Loss: 1.5229
Validation improved. Model saved.
Epoch [5/200] Train Acc: 55.43% | Val Acc: 57.56% | Train Loss: 1.4478 | Val Loss: 1.4306
Validation improved. Model saved.
Epoch [6/200] Train Acc: 63.05% | Val Acc: 63.33% | Train Loss: 1.3551 | Val Loss: 1.3415
Validation improved. Model saved.
Epoch [7/200] Train Acc: 65.24% | Val Acc: 66.00% | Train Loss: 1.2728 | Val Loss: 1.2646
Validation improved. Model saved.
Epoch [8/200] Train Acc: 73.14% | Val Acc: 71.56% | Train Loss: 1.1752 | Val Loss: 1.1779
Validation improved. Model saved.
Epoch [9/200] Train Acc: 76.00% | Val Acc:

## 5. Evaluation

In [15]:
def plot_confusion_matrix_plotly(model, dataloader, class_names):
    model.eval()
    preds, targets = [], []

    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, predicted = torch.max(outputs, 1)

            preds.extend(predicted.cpu().numpy())
            targets.extend(labels.cpu().numpy())

    cm = confusion_matrix(targets, preds)

    fig = ff.create_annotated_heatmap(
        z=cm,
        x=class_names,
        y=class_names,
        colorscale="Blues",
        showscale=True
    )

    fig.update_layout(
        title="Confusion Matrix",
        xaxis_title="Predicted",
        yaxis_title="Actual"
    )
    fig.show()

    print("\nClassification Report:")
    print(classification_report(targets, preds, target_names=class_names))

In [16]:
class_names = train_dataset.dataset.classes
plot_confusion_matrix_plotly(s_model, val_load, class_names)


Classification Report:
                                   precision    recall  f1-score   support

            Bacterial Red disease       0.91      0.92      0.91        75
Bacterial diseases - Aeromoniasis       0.99      0.97      0.98        75
           Bacterial gill disease       0.93      0.93      0.93        75
   Fungal diseases Saprolegniasis       0.96      0.92      0.94        75
                     Healthy Fish       0.97      0.91      0.94        75
               Parasitic diseases       0.83      0.92      0.87        75

                         accuracy                           0.93       450
                        macro avg       0.93      0.93      0.93       450
                     weighted avg       0.93      0.93      0.93       450



In [17]:
class_names = train_dataset.dataset.classes
plot_confusion_matrix_plotly(e_model, val_load, class_names)


Classification Report:
                                   precision    recall  f1-score   support

            Bacterial Red disease       0.89      0.91      0.90        75
Bacterial diseases - Aeromoniasis       1.00      0.96      0.98        75
           Bacterial gill disease       0.96      0.99      0.97        75
   Fungal diseases Saprolegniasis       0.96      0.97      0.97        75
                     Healthy Fish       0.96      0.97      0.97        75
               Parasitic diseases       0.95      0.92      0.93        75

                         accuracy                           0.95       450
                        macro avg       0.95      0.95      0.95       450
                     weighted avg       0.95      0.95      0.95       450



## 6. Deployment