# Model Training (First Pass)

In [13]:
# Setting up saving model path
import os
import sys

folder_to_save = 'trained_models/first_pass_augmented'
data_source = 'augmented_data_300'

## Generating Raw Pixel Embeddings

In [14]:
import os
import numpy as np
import cv2

# Load images and create raw pixel embeddings
def load_images_and_labels(image_dir):
    images = []
    labels = []
    label_map = {
        'Monkeypox': 0,
        'Chickenpox': 1,
        'Measles': 2,
        'Normal': 3
    }

    for class_name, label in label_map.items():
        class_dir = os.path.join(image_dir, class_name)
        for image_file in os.listdir(class_dir):
            img_path = os.path.join(class_dir, image_file)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (224, 224))  # Resize to 224x224
            img = img.flatten()  # Flatten the image to raw pixel embedding
            images.append(img)
            labels.append(label)
    
    return np.array(images), np.array(labels)

# Load images from the dataset
image_dir = data_source  # Base directory
X, y = load_images_and_labels(image_dir)

## Logistic Regression

In [15]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# First, split the dataset into 85% train+validation and 15% test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

# Next, split the train+validation set into 70% train and 15% validation
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42, stratify=y_train_val)

# Train the logistic regression model with validation-based selection
def train_logistic_regression(X_train, y_train, X_val, y_val):
    best_model = None
    best_acc = 0.0

    # Train Logistic Regression model
    model = LogisticRegression(max_iter=10000, solver='lbfgs', multi_class='multinomial')
    model.fit(X_train, y_train)

    # Validation phase
    val_preds = model.predict(X_val)
    val_acc = accuracy_score(y_val, val_preds)
    print(f'Validation Accuracy: {val_acc:.4f}')

    # Check if this is the best accuracy
    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model  # Save the best model
        print(f"Validation accuracy improved to {val_acc:.4f}, saving model...")

    return best_model, best_acc

# Train the logistic regression model
logistic_regression_best_model, best_val_acc = train_logistic_regression(X_train, y_train, X_val, y_val)

# Evaluate the model on the test set
def evaluate_logistic_regression(model, X_test, y_test):
    test_preds = model.predict(X_test)

    # Calculate accuracy
    test_acc = accuracy_score(y_test, test_preds)
    print(f'Test Accuracy: {test_acc:.4f}')

    # Classification report and confusion matrix
    print("\nClassification Report:\n", classification_report(y_test, test_preds, target_names=['Monkeypox', 'Chickenpox', 'Measles', 'Normal']))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, test_preds))

# Evaluate the best model on the test set
evaluate_logistic_regression(logistic_regression_best_model, X_test, y_test)



Validation Accuracy: 0.8287
Validation accuracy improved to 0.8287, saving model...
Test Accuracy: 0.8122

Classification Report:
               precision    recall  f1-score   support

   Monkeypox       0.96      0.96      0.96        45
  Chickenpox       0.65      0.69      0.67        45
     Measles       0.67      0.67      0.67        46
      Normal       1.00      0.93      0.97        45

    accuracy                           0.81       181
   macro avg       0.82      0.81      0.82       181
weighted avg       0.82      0.81      0.81       181


Confusion Matrix:
 [[43  0  2  0]
 [ 1 31 13  0]
 [ 0 15 31  0]
 [ 1  2  0 42]]


In [16]:
# Export the model
import joblib
model_file = f'{folder_to_save}/logistic_regression.pkl'

joblib.dump(logistic_regression_best_model, model_file)
print(f"Model saved to {model_file}")

Model saved to trained_models/first_pass_augmented/logistic_regression.pkl


## SVM for Classification

In [17]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# First, split the dataset into 85% train+validation and 15% test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

# Next, split the train+validation set into 70% train and 15% validation
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42, stratify=y_train_val)

# Train the SVM model with validation-based selection
def train_svm(X_train, y_train, X_val, y_val):
    best_model = None
    best_acc = 0.0

    # Train the SVM model
    model = SVC(kernel='linear', C=1, decision_function_shape='ovr')  # Linear kernel
    model.fit(X_train, y_train)

    # Validation phase
    val_preds = model.predict(X_val)
    val_acc = accuracy_score(y_val, val_preds)
    print(f'Validation Accuracy: {val_acc:.4f}')

    # Check if this is the best accuracy
    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model  # Save the best model
        print(f"Validation accuracy improved to {val_acc:.4f}, saving model...")

    return best_model, best_acc

# Train the SVM model
best_svm_model, best_val_acc = train_svm(X_train, y_train, X_val, y_val)

# Evaluate the model on the test set
def evaluate_svm(model, X_test, y_test):
    test_preds = model.predict(X_test)

    # Calculate accuracy
    test_acc = accuracy_score(y_test, test_preds)
    print(f'Test Accuracy: {test_acc:.4f}')

    # Classification report and confusion matrix
    print("\nClassification Report:\n", classification_report(y_test, test_preds, target_names=['Monkeypox', 'Chickenpox', 'Measles', 'Normal']))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, test_preds))

# Evaluate the best model on the test set
evaluate_svm(best_svm_model, X_test, y_test)

Validation Accuracy: 0.8287
Validation accuracy improved to 0.8287, saving model...
Test Accuracy: 0.8398

Classification Report:
               precision    recall  f1-score   support

   Monkeypox       0.91      0.96      0.93        45
  Chickenpox       0.70      0.78      0.74        45
     Measles       0.76      0.70      0.73        46
      Normal       1.00      0.93      0.97        45

    accuracy                           0.84       181
   macro avg       0.84      0.84      0.84       181
weighted avg       0.84      0.84      0.84       181


Confusion Matrix:
 [[43  1  1  0]
 [ 1 35  9  0]
 [ 2 12 32  0]
 [ 1  2  0 42]]


In [18]:
# Export the model
import joblib
model_file = f'{folder_to_save}/best_svm_model.pkl'

joblib.dump(best_svm_model, model_file)
print(f"Model saved to {model_file}")

Model saved to trained_models/first_pass/best_svm_model.pkl


## Neural Network

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# Step 1: Load Data from Raw Pixel Embeddings (Already stored as X,y)

# Step 2: Split the dataset into 85% train+validation and 15% test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

# Next, split the train+validation set into 70% train and 15% validation
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42, stratify=y_train_val)

# Step 3: Convert the numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Step 4: Create PyTorch datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Step 5: Define a simple feed-forward neural network
class SimpleClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# Instantiate the model (EfficientNet-B0 embeddings have size 1280)
input_size = X_train.shape[1]  # Should be 1280 for EfficientNet-B0 embeddings
num_classes = 4  # Monkeypox, Chickenpox, Measles, Normal
model = SimpleClassifier(input_size=input_size, num_classes=num_classes)

# Step 6: Move the model to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# Step 7: Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 8: Function to train the model with early stopping based on validation accuracy
def train_model_with_early_stopping(model, criterion, optimizer, num_epochs=10, patience=3):
    best_model_wts = None
    best_acc = 0.0
    epochs_no_improve = 0
    stop_training = False

    for epoch in range(num_epochs):
        if stop_training:
            break

        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Training phase
        model.train()
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Track loss and accuracy
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        print(f'Training Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        # Validation phase
        model.eval()
        val_corrects = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)

                val_corrects += torch.sum(preds == labels.data)

        val_acc = val_corrects.double() / len(val_loader.dataset)
        print(f'Validation Accuracy: {val_acc:.4f}')

        # Check if this is the best accuracy we've seen
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = model.state_dict()  # Save the best model weights
            epochs_no_improve = 0  # Reset the counter if validation improves
            print(f"Validation accuracy improved to {val_acc:.4f}, saving model weights...")
        else:
            epochs_no_improve += 1
            print(f"No improvement. Epochs without improvement: {epochs_no_improve}")

        # Early stopping condition
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {patience} epochs of no improvement.")
            stop_training = True

    # Load best model weights
    if best_model_wts is not None:
        model.load_state_dict(best_model_wts)
        print("Loaded best model weights.")

    return model, best_acc

# Step 9: Train the model with early stopping
trained_model, best_val_acc = train_model_with_early_stopping(model, criterion, optimizer, num_epochs=10, patience=3)

# Step 10: Function to evaluate the model on the test set
def evaluate_model(model, dataloader):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.eval()  # Set the model to evaluation mode

    all_preds = []
    all_labels = []
    running_corrects = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            running_corrects += torch.sum(preds == labels.data)

    # Calculate accuracy
    accuracy = running_corrects.double() / len(dataloader.dataset)
    print(f'Test Accuracy: {accuracy:.4f}')

    # Classification report and confusion matrix
    print("\nClassification Report:\n", classification_report(all_labels, all_preds, target_names=['Monkeypox', 'Chickenpox', 'Measles', 'Normal']))
    print("\nConfusion Matrix:\n", confusion_matrix(all_labels, all_preds))

# Step 11: Evaluate the model on the test set
evaluate_model(trained_model, test_loader)


Epoch 1/10
----------
Training Loss: 4174.1250 Acc: 0.4583
Validation Accuracy: 0.7127
Validation accuracy improved to 0.7127, saving model weights...
Epoch 2/10
----------
Training Loss: 183.2566 Acc: 0.7988
Validation Accuracy: 0.7182
Validation accuracy improved to 0.7182, saving model weights...
Epoch 3/10
----------
Training Loss: 340.8406 Acc: 0.7726
Validation Accuracy: 0.7845
Validation accuracy improved to 0.7845, saving model weights...
Epoch 4/10
----------
Training Loss: 177.3305 Acc: 0.8095
Validation Accuracy: 0.8177
Validation accuracy improved to 0.8177, saving model weights...
Epoch 5/10
----------
Training Loss: 77.8544 Acc: 0.8417
Validation Accuracy: 0.7901
No improvement. Epochs without improvement: 1
Epoch 6/10
----------
Training Loss: 81.1716 Acc: 0.8107
Validation Accuracy: 0.5083
No improvement. Epochs without improvement: 2
Epoch 7/10
----------
Training Loss: 93.3748 Acc: 0.8071
Validation Accuracy: 0.8011
No improvement. Epochs without improvement: 3
Early 

In [20]:
# Save the best model
model_file = f'{folder_to_save}/early_stopping_nn.pth'
torch.save(trained_model.state_dict(), model_file)
print(f"Best model saved to {model_file} with validation accuracy: {best_val_acc:.4f}")

Best model saved to trained_models/first_pass_augmented/early_stopping_nn.pth with validation accuracy: 0.8177
