Murat Tut

In [18]:
import torch.nn as nn
import torch
from sklearn.model_selection import train_test_split
from torchvision import models
from torchvision.models import efficientnet_v2_s
from torchvision.transforms import v2
from torchvision import datasets
from tqdm.auto import tqdm
from torch.optim.lr_scheduler import StepLR
import numpy as np
from torchvision.models import EfficientNet_V2_S_Weights, ResNet50_Weights
from vit_pytorch import ViT
from sklearn.metrics import f1_score, cohen_kappa_score
import torch.nn.functional as F

import random


Torch_train_dtype = torch.bfloat16

#PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 # mps de memory overflow hatası alıyorsanız bu satırı ekleyin colabde mps yok


In [19]:
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing

    def forward(self, inputs, targets):
        log_probs = torch.log_softmax(inputs, dim=-1)
        targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
        targets = (1 - self.smoothing) * targets + self.smoothing / inputs.size(1)
        loss = (-targets * log_probs).mean()
        return loss
class DiceLoss(nn.Module):
    def __init__(self):
        super(DiceLoss, self).__init__()

    def forward(self, inputs, targets, epsilon=1e-7):
        inputs = torch.softmax(inputs, dim=1)
        targets_one_hot = torch.zeros_like(inputs).scatter_(1, targets.unsqueeze(1), 1)
        intersection = torch.sum(inputs * targets_one_hot)
        union = torch.sum(inputs + targets_one_hot)
        dice_loss = 1 - (2. * intersection + epsilon) / (union + epsilon)
        return dice_loss
class KappaLoss(nn.Module):
    def __init__(self, num_classes):
        super(KappaLoss, self).__init__()
        self.num_classes = num_classes

    def forward(self, inputs, targets):
        y_pred = torch.softmax(inputs, dim=1)
        targets_one_hot = torch.zeros_like(y_pred).scatter_(1, targets.unsqueeze(1), 1)
        observed = torch.sum(targets_one_hot * y_pred, dim=0)
        expected = torch.sum(y_pred, dim=0) * torch.sum(targets_one_hot, dim=0) / targets.size(0)
        kappa_loss = 1 - torch.sum((observed - expected)**2 / (expected + 1e-7))
        return kappa_loss
class TverskyLoss(nn.Module):
    def __init__(self, alpha=0.5, beta=0.5):
        super(TverskyLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta

    def forward(self, inputs, targets):
        inputs = torch.softmax(inputs, dim=1)
        targets_one_hot = torch.zeros_like(inputs).scatter_(1, targets.unsqueeze(1), 1)
        tp = torch.sum(inputs * targets_one_hot)
        fp = torch.sum(inputs * (1 - targets_one_hot))
        fn = torch.sum((1 - inputs) * targets_one_hot)
        tversky_loss = 1 - (tp / (tp + self.alpha * fp + self.beta * fn + 1e-7))
        return tversky_loss
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing

    def forward(self, inputs, targets):
        log_probs = torch.log_softmax(inputs, dim=-1)
        targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
        targets = (1 - self.smoothing) * targets + self.smoothing / inputs.size(1)
        loss = (-targets * log_probs).mean()
        return loss

In [20]:
def train(model, train_loader, test_loader, num_epochs=None, show_progress=False):
    loss_fn = nn.CrossEntropyLoss(weight=class_weights.to(device))
    #loss_fn = dice_loss
    #loss_fn = LabelSmoothingCrossEntropy()
    #loss_fn = DiceLoss()
    #loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    #optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)
    #scheduler = StepLR(optimizer, step_size=5, gamma=0.1)  # Her *** epoch'ta LR %10'a düşer overfittin ihtimalini azaltmak için yinede cok random bir parametre olmasada olur

    total_accuracy = 0

    # overfitting i engellemek için en iyi modeli seciyor
    best_model = {"model": None, "accuracy": 0, "test_loss": float("inf")}
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        train_loss = 0.0

        # Training loop
        for inputs, labels in tqdm(train_loader, desc="Training Progress"):

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Evaluation
        model.eval()  # Set model to evaluation mode
        test_loss = 0.0
        correct = 0
        total = 0
        all_labels = []
        all_predictions = []
        with torch.no_grad(): # Gradients are not calculated in this block
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                test_loss += loss.item()

                # Calculate accuracy
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                # Store labels and predictions for metrics
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())

        train_loss /= len(train_loader)  # Average training loss
        test_loss /= len(test_loader)  # Average test loss
        accuracy = 100 * correct / total  # Test accuracy
        total_accuracy += accuracy

        # Calculate F1 score and Cohen's Kappa
        f1 = f1_score(all_labels, all_predictions, average='weighted')
        kappa = cohen_kappa_score(all_labels, all_predictions)


        if show_progress:
            print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%, F1 Score: {f1:.4f}, Cohen's Kappa: {kappa:.4f}")

        #scheduler.step()

        if accuracy >= best_model["accuracy"]:
            best_model["model"] = model
            best_model["accuracy"] = accuracy
            best_model["test_loss"] = test_loss

    print(f"Best model accuracy: {best_model['accuracy']}")

    return best_model["model"], best_model["accuracy"]

In [21]:
def modify_first_conv_layer_efnet(model, num_channels):
    # Get the original first convolutional layer
    original_conv1 = model.features[0][0]

    # Create a new convolutional layer with the specified number of input channels
    new_conv1 = nn.Conv2d(
        in_channels=num_channels,  # Updated input channels
        out_channels=original_conv1.out_channels,
        kernel_size=original_conv1.kernel_size,
        stride=original_conv1.stride,
        padding=original_conv1.padding,
        bias=original_conv1.bias is not None
    )

    # Copy the weights for the first min(3, num_channels) channels
    new_conv1.weight.data[:, :min(3, num_channels), :, :] = original_conv1.weight.data[:, :min(3, num_channels), :, :]

    # Initialize any additional channels with zeros
    if num_channels > 3:
        new_conv1.weight.data[:, 3:num_channels, :, :] = torch.zeros_like(new_conv1.weight.data[:, 3:num_channels, :, :])

    # Replace the original convolutional layer with the modified one
    model.features[0][0] = new_conv1

    return model

def modify_first_conv_layer_resnet(model, num_channels):
    # Get the original first convolutional layer
    original_conv1 = model.conv1

    # Create a new convolutional layer with the specified number of input channels
    new_conv1 = nn.Conv2d(
        in_channels=num_channels,  # Updated input channels
        out_channels=original_conv1.out_channels,
        kernel_size=original_conv1.kernel_size,
        stride=original_conv1.stride,
        padding=original_conv1.padding,
        bias=original_conv1.bias is not None
    )

    # Copy the weights for the first min(3, num_channels) channels
    new_conv1.weight.data[:, :min(3, num_channels), :, :] = original_conv1.weight.data[:, :min(3, num_channels), :, :]

    # Initialize any additional channels with zeros
    if num_channels > 3:
        new_conv1.weight.data[:, 3:num_channels, :, :] = 0

    # Replace the original convolutional layer with the modified one
    model.conv1 = new_conv1

    return model

In [22]:
from skimage.feature import graycomatrix, graycoprops
import cv2
from torchvision.transforms import functional as F

class CustomTransform:
    """Custom transform to add edges detected by Canny as the 4th channel."""
    def __call__(self, image):
        # convert the image to a numpy array
        image_np = np.array(image)
        # remove top 200 pixels
        image_np = image_np_original = image_np[200:, :, :]

        # convert the image to grayscale
        image_np_gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

        # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        clahe_image = clahe.apply(image_np_gray)

        # Apply HSV (Hue, Saturation, Value) color space
        hsv_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV)
        h, s, v = cv2.split(hsv_image)

        # Apply Histogram Equalization
        hist_eq = cv2.equalizeHist(image_np_gray)

        # Apply GlCM (Gray-Level Co-occurrence Matrix)
        glcm = graycomatrix(image_np_gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        correlation = graycoprops(glcm, 'correlation')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]

        contrast_channel = np.full_like(image_np[:, :, 0], contrast)
        correlation_channel = np.full_like(image_np[:, :, 0], correlation)
        energy_channel = np.full_like(image_np[:, :, 0], energy)
        homogeneity_channel = np.full_like(image_np[:, :, 0], homogeneity)

        # Apply sharpening
        """kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]])

        sharpened = cv2.filter2D(image_np_original, -1, kernel)"""

        # apply Canny edge detection
        edges = cv2.Canny(image_np_gray, 50, 150)

        # add edges as the 4th channel
        image_np = np.dstack((image_np, edges))

        # add CLAHE as the 5th channel
        image_np = np.dstack((image_np, clahe_image))

        # add HSV as the 6th, 7th, and 8th channels
        image_np = np.dstack((image_np, h, s, v))

        # add Histogram Equalization as the 9th channel
        image_np = np.dstack((image_np, hist_eq))

        # add GlCM properties as the 10th, 11th, 12th, and 13th channels
        image_np = np.dstack((image_np, contrast_channel, correlation_channel, energy_channel, homogeneity_channel))

        # convert uint8 to float32
        image_np = image_np.astype(np.float32)

        # normalize the image
        image_np = image_np / 255.0

        # applay gray world algorithm
        mean_r = np.mean(image_np[:, :, 0])
        mean_g = np.mean(image_np[:, :, 1])
        mean_b = np.mean(image_np[:, :, 2])
        mean_gray = (mean_r + mean_g + mean_b) / 3

        image_np[:, :, 0] *= mean_gray / mean_r
        image_np[:, :, 1] *= mean_gray / mean_g
        image_np[:, :, 2] *= mean_gray / mean_b

        # convert the image to a tensor
        image = F.to_tensor(image_np)

        # rezise the image
        image = F.resize(image, ([224, 224]))

        # normalize the image
        channel_size = image.size(0)

        if channel_size == 3:
            image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            image = image.to(Torch_train_dtype)
            return image
        elif channel_size > 3:
            mean = [0.485, 0.456, 0.406] + [0.0] * (channel_size - 3)
            std = [0.229, 0.224, 0.225] + [1.0] * (channel_size - 3)
            image = F.normalize(image, mean=mean, std=std)
            image = image.to(Torch_train_dtype)
            return image
        else:
            raise ValueError("The number of channels must be at least 3. for cnn models")

In [23]:
# Load the dataset from local directory
data_dir = "dataset/"

# For colab you can use this path or you can use your own path
#data_dir = "drive/MyDrive/dataset/"
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.mps.is_available():
    device = torch.device("mps") # mps is for apple
else:
    # if you use cpu probably you will get memory error or it will take too long time
    device = torch.device("cpu")

def load_data(data_dir, seed):
    # data augmentations
    #v2.RandomHorizontalFlip(p=0.5),# Yatay çevirme
    #v2.RandomRotation(degrees=15), # Hafif döndürme
    #v2.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Rastgele kaydırma
    #v2.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),  # Renk artırma
    #v2.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 2.0)),  # Gaussian bulanıklık
    #v2.RandomResizedCrop((224, 224), scale=(0.8, 1.0)),  # Rastgele kırpma
    #v2.RandomErasing(p=0.3, scale=(0.02, 0.1), ratio=(0.3, 3.3)),  # Rastgele silme

    transform = v2.Compose([
        # if you want to use the default transforms you can use this line
        #v2.ToImage(),
        #v2.Resize((224, 224)),
        #v2.ToDtype(Torch_train_dtype, scale=True),

        # if you want do not use the default transforms you can use this line
        CustomTransform(),

        # if you used Torch_train_dtype = torch.bfloat16 you will get an error because of augmentation so please use this line Torch_train_dtype = torch.float32
    ])

    dataset = datasets.ImageFolder(data_dir, transform=transform)
    train_dataset, test_dataset = train_test_split(dataset, test_size=0.33, stratify=dataset.targets, random_state=seed)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

    # burada classlar arasında eşitsizlik olduğundan classlar arasında dengesizlik olabilir. Bu durumda classlar arasında dengesizliği gidermek için cross entropy loss fonksiyonuna class_weights parametresi ekledim.
    class_counts = np.bincount(dataset.targets)
    class_weights = 1.0 / torch.tensor(class_counts, dtype=Torch_train_dtype)

    class_weights = class_weights / class_weights.sum() * len(class_counts)
    class_weights = class_weights.to(device)

    return train_dataloader, test_dataloader, class_weights

In [24]:
# Load the pre-trained model
#from vit_pytorch import ViT

models_accuracys = []
best_model = {"model": None, "accuracy": 0}

for epoch in tqdm(range(5)):
    import random
    seed = random.randint(0, 100)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.mps.manual_seed(seed)

    train_dataloader, test_dataloader, class_weights = load_data(data_dir, seed=seed)

    #model = models.resnet50(weights=ResNet50_Weights.DEFAULT).to(device)
    #model = modify_first_conv_layer_resnet(model, 13)
    #model.fc = nn.Linear(model.fc.in_features, 3).to(device)

    model = models.efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT).to(device)
    model = modify_first_conv_layer_efnet(model, 13)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 3).to(device)

    # for vit we do not need to modify the first layer because it does not have a convolutional layer
    """model = ViT(
        image_size=224,
        patch_size=16,
        num_classes=3,
        dim=512,
        depth=6,
        heads=8,
        mlp_dim=1024,
        channels=13,
        dropout=0.1,
        emb_dropout=0.1)"""

    # Convert to bfloat16 for training
    model = model.to(dtype=Torch_train_dtype, device=device)

    # Train the model
    model, model_accuracy = train(model, train_dataloader, test_dataloader, num_epochs=20, show_progress=False)  # Train for 20 epochs

    if model_accuracy > best_model["accuracy"]:
        best_model["model"] = model
        best_model["accuracy"] = model_accuracy
    models_accuracys.append(model_accuracy)

print(f"Average accuracy: {np.mean(models_accuracys)}")
model = best_model["model"]

  0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Best model accuracy: 90.0


Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Best model accuracy: 90.0


Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Best model accuracy: 91.42857142857143


Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Best model accuracy: 84.28571428571429


Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Best model accuracy: 92.85714285714286
Average accuracy: 89.71428571428571
