In [3]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [4]:
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.transforms import transforms
import os
from PIL import Image
import torch
import ssl
import torchvision
import torchvision.models as models
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm
import splitfolders
import csv
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np


def is_folder_empty(folder_path):
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"The folder '{folder_path}' does not exist.")
        return False

    # Check if the folder is empty
    for root, dirs, files in os.walk(folder_path):
        if not dirs and not files:
            print(f"The folder '{root}' is empty.")
            return True  # The folder is empty

    print(f"The folder '{folder_path}' is not empty.")
    return False

def get_subdirectories(folder_path):
    subdirectories = []
    for entry in os.listdir(folder_path):
        entry_path = os.path.join(folder_path, entry)
        if os.path.isdir(entry_path):
            subdirectories.append(entry_path)
    return subdirectories

def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

ssl._create_default_https_context = ssl._create_unverified_context

if __name__ == '__main__':
    # Define the device to be used for training
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set up the transform to resize and normalize the images
    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5]),
    ])

    # Update input folder and output folder paths
    input_folder = r"/kaggle/input/images-highpassfilter-small-zip/IMAGES_HighPassFilter_Small"
    output_folder = r"/kaggle/input/images-split-highpass/IMAGES_Split_HighPass"

    ### Uncomment only for first time. once data is splitted into train and validation, comment it out
    #splitfolders.ratio(input_folder, output_folder, seed=42, ratio=(0.8, 0.2), group_prefix=None)

    # Create datasets for the training and testing sets
    train_dataset = torchvision.datasets.ImageFolder(output_folder + '/train', transform=transform)
    val_dataset = torchvision.datasets.ImageFolder(output_folder + '/val', transform=transform)
    train_size = len(train_dataset)
    val_size = len(val_dataset)

    # Create the data loaders for training and validation
    train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True,num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=5, shuffle=True,num_workers=4)
    list_of_classes = os.listdir(r"/kaggle/input/images-split-highpass/IMAGES_Split_HighPass/train")
    print(list_of_classes)
    classes = list(train_dataset.class_to_idx.keys())
    classes.sort()


    # Define the VGG model
    model = torchvision.models.vgg16(pretrained=True)
    num_features = model.classifier[0].in_features
    model.classifier = nn.Sequential(
        nn.Linear(num_features, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(4096, len(list_of_classes))
    )

    ## uncomment for CPU
    model = model.to(device)

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    START_LR = 0.0001
    optimizer = optim.Adam(model.parameters(), lr=START_LR)
    model = model.to(device)
    criterion = criterion.to(device)

    ### uncomment for GPU
    model.cuda()
        # Train the model
    for epoch in range(1):
        ### uncomment for GPU
        torch.cuda.empty_cache()
        print('Epoch {}/{}'.format(epoch + 1, 1))
        print('-' * 1)

        running_loss = 0
        running_corrects = 0

        model.train()
        predictions = []
        true_labels = []
        for inputs, labels in tqdm(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            true_labels.extend(labels.cpu().numpy())
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects.double() / train_size
        report_dict = classification_report(true_labels, predictions, target_names=list_of_classes,output_dict=True)
        report_pd = pd.DataFrame(report_dict)
        report_pd.to_csv('training-classification-epoch' + str(epoch + 1) + '.csv')
        cnf_matrix = confusion_matrix(true_labels, predictions)
        df_cm = pd.DataFrame(cnf_matrix / np.sum(cnf_matrix, axis=1)[:, None], index = [i for i in classes],
                        columns = [i for i in classes])
        df_cm.to_csv('confusion-matrix-train-epoch' + str(epoch + 1) + '.csv')
        #acc = matrix.diagonal()/matrix.sum(axis=1)
        FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix) 
        FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
        TP = np.diag(cnf_matrix)
        TN = cnf_matrix.sum() - (FP + FN + TP)
        FP = FP.astype(float)
        FN = FN.astype(float)
        TP = TP.astype(float)
        TN = TN.astype(float)
        ACC = (TP+TN)/(TP+FP+FN+TN)
        TPR = TP/(TP+FN)
        PPV = TP/(TP+FP)
        print("accuracy for all classes in train phase", ACC)
        print("recall for all classes in train phase", TPR)
        print("precision for all classes in train phase", PPV)
        
        pd.DataFrame(ACC, columns=['Accuracy']).to_csv('accuracy-train-epoch' + str(epoch + 1) + '.csv')
        print('Train Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
        
        # Delete data to clear GPU memory
        del outputs
        del preds
        del labels
        del inputs
        torch.cuda.empty_cache()
        
        # Validation phase
        running_loss = 0
        running_corrects = 0
        model.eval()  # set the model to evaluation mode
        predictions = []
        true_labels = []
        for inputs, labels in tqdm(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            true_labels.extend(labels.cpu().numpy())
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        epoch_loss = running_loss / val_size
        epoch_acc = running_corrects.double() / val_size
       
        #classification_report(true_labels, predictions, target_names=list_of_classes,output_dict=True)
        report_dict = classification_report(true_labels, predictions, target_names=list_of_classes,output_dict=True)
        report_pd = pd.DataFrame(report_dict)
        report_pd.to_csv('val-classification-epoch' + str(epoch + 1) + '.csv')
        #matrix = confusion_matrix(true_labels, predictions)
        cnf_matrix = confusion_matrix(true_labels, predictions)
        df_cm = pd.DataFrame(cnf_matrix / np.sum(cnf_matrix, axis=1)[:, None], index = [i for i in classes],
                        columns = [i for i in classes])
        df_cm.to_csv('confusion-matrix-val-epoch' + str(epoch + 1) + '.csv')
        #acc = matrix.diagonal()/matrix.sum(axis=1)
        FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix) 
        FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
        TP = np.diag(cnf_matrix)
        TN = cnf_matrix.sum() - (FP + FN + TP)
        FP = FP.astype(float)
        FN = FN.astype(float)
        TP = TP.astype(float)
        TN = TN.astype(float)
        ACC = (TP+TN)/(TP+FP+FN+TN)
        TPR = TP/(TP+FN)
        PPV = TP/(TP+FP)
        pd.DataFrame(ACC, columns=['Accuracy']).to_csv('accuracy-val-epoch' + str(epoch + 1) + '.csv')
        print("accuracy for all classes in validation phase", ACC)
        print("recall for all classes in validation phase", TPR)
        print("precision for all classes in validation phase", PPV)
        print('Val Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
        
        # Delete data to clear GPU memory
        del outputs
        del preds
        del labels
        del inputs
        torch.cuda.empty_cache()
                
        

    # Save the model
    torch.save(model, 'vgg16_model.pth')






['yetgre1', 'moccha1', 'rostur1', 'walsta1', 'ratcis1', 'norfis1', 'macshr1', 'brrwhe3', 'crefra2', 'pabspa1', 'sltnig1', 'cabgre1', 'equaka1', 'sobfly1', 'rindov', 'wlwwar', 'brwwar1', 'gnbcam2', 'carcha1', 'abethr1', 'yertin1', 'spewea1', 'varsun2', 'yebduc1', 'eubeat1', 'hadibi1', 'brcale1', 'litwea1', 'sincis1', 'whbcro2', 'thrnig1', 'bubwar2', 'kvbsun1', 'blbpuf2', 'blakit1', 'colsun2', 'bltapa1', 'gycwar3', 'joygre1', 'greegr', 'vibsta2', 'wtbeat1', 'afrgos1', 'rebfir2', 'yebgre1', 'comsan', 'pygbat1', 'meypar1', 'yelbis1', 'norbro1', 'ndcsun2', 'gybfis1', 'reftin1', 'brobab1', 'refwar2', 'norcro1', 'yebapa1', 'yewgre1', 'palfly2', 'gargan', 'darter3', 'rerswa1', 'augbuz1', 'gyhbus1', 'refcro1', 'witswa1', 'gryapa1', 'pitwhy', 'eaywag1', 'blhgon1', 'yebsto1', 'hipbab1', 'whcpri2', 'spemou2', 'gobsta5', 'blksaw1', 'afecuc1', 'spepig1', 'mabeat1', 'rewsta1', 'rebhor1', 'brtcha1', 'blacuc1', 'brican1', 'rehblu1', 'gobbun1', 'supsta1', 'bkfruw1', 'litswi1', 'spmthr1', 'spwlap1', 'qua

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:01<00:00, 315MB/s] 


Epoch 1/1
-


100%|██████████| 18324/18324 [27:56<00:00, 10.93it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  PPV = TP/(TP+FP)


accuracy for all classes in train phase [0.9994106  0.9889978  0.99813355 0.99917047 0.99861381 0.99460805
 0.99907223 0.9997817  0.995896   0.99745683 0.99481543 0.99998909
 0.99371303 0.99621253 0.9992687  0.99899583 0.99377852 0.99764238
 0.99977079 0.9991923  0.9594512  0.99763147 0.9997817  0.99700932
 0.99783885 0.99407322 0.99835185 0.99704207 0.99548124 0.98271082
 0.9977297  0.9986793  0.99898492 0.99746775 0.99884302 0.99945426
 0.99982536 0.99845009 0.99934511 0.999487   0.99975987 0.99923596
 0.99963981 0.99951975 0.99973804 0.99762055 0.99994543 0.99860289
 0.99841734 0.99849375 0.99893034 0.99951975 0.99995634 0.99647449
 0.99788251 0.99860289 0.99541575 0.99969438 0.99822087 0.998461
 0.98763343 0.99703115 0.97808291 0.9992687  0.99974896 0.99965072
 0.99549215 0.99873387 0.99903949 0.9936912  0.95738829 0.98610535
 0.95964767 0.96928551 0.99008928 0.99993451 0.99843917 0.99660547
 0.99799166 0.99987994 0.99588509 0.99995634 0.99972713 0.99947609
 0.97657666 0.99917047 0

100%|██████████| 4607/4607 [02:34<00:00, 29.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  PPV = TP/(TP+FP)


accuracy for all classes in validation phase [0.9993922  0.98411045 0.99813319 0.99917513 0.99869758 0.99839368
 0.99904489 0.99973952 0.9972215  0.99748198 0.99457324 0.99995659
 0.99535469 0.99622297 0.99926196 0.99891465 0.9969176  0.99765564
 0.99973952 0.99917513 0.96418338 0.99769905 0.99973952 0.9966137
 0.9978293  0.9938352  0.99887123 0.99696101 0.99561518 0.97004428
 0.99791612 0.99865416 0.99904489 0.99756881 0.99882782 0.99943562
 0.99982634 0.99848051 0.99930537 0.99956586 0.99973952 0.99921855
 0.99960927 0.99947903 0.99973952 0.99761223 0.99991317 0.99861075
 0.99839368 0.99848051 0.99891465 0.99952245 0.99995659 0.99526786
 0.99822002 0.99856734 0.99683077 0.9996961  0.99826344 0.99848051
 0.98918989 0.99700443 0.97429886 0.99926196 0.99973952 0.99965269
 0.99626639 0.99878441 0.99904489 0.99400886 0.979031   0.98246071
 0.97937831 0.97069549 0.99522445 0.99991317 0.99765564 0.99652687
 0.99795954 0.99986976 0.99266302 0.99995659 0.9996961  0.99947903
 0.99092646 0.9991