# Import libraries

In [1]:
%matplotlib inline
from PIL import Image
from os import listdir
from skimage import data, img_as_float
from skimage import exposure
from skimage.filters import gaussian
from skimage.transform import rotate, AffineTransform, warp
from skimage.util import random_noise
from sklearn.metrics import classification_report, confusion_matrix
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision import transforms, utils, datasets
from tqdm.notebook import tqdm
import matplotlib
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import seaborn as sns
import skimage.io as io
import torch
import torch.nn as nn
import torch.nn.functional
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import warnings
warnings.filterwarnings('ignore')

# UDF

In [2]:
def split_data (df, testRatio, y):
    from sklearn.model_selection import train_test_split
    train, test = train_test_split(df, test_size=testRatio, shuffle=True,stratify=y)
    return train, test

In [3]:
def Unique_values_from_Column (df, ColumnName):
    return df[ColumnName].unique().tolist();

In [4]:
def X_ray_image_nameList_perCategory (df, ColumnName, CategoryName):
    return df.loc[df[ColumnName] == CategoryName]['X_ray_image_name'].unique().tolist();

In [5]:
def ImagePathList_perCategory (df, ColumnName, CategoryName):
    return df.loc[df[ColumnName] == CategoryName]['ImagePath'].unique().tolist();

In [6]:
def create_directory (Path):
    import os
    if not os.path.exists(Path):
        os.makedirs(Path)
        return True;
    else: 
        return False;

In [7]:
def get_ListOfFiles(path):
    import os
    ListOfFiles=[]
    for root, dirs, files in os.walk(path):
        for filename in files:
            ListOfFiles.append(filename)
    return ListOfFiles

In [8]:
def remove_files_from_folder(directoryPath, filesEndWith):
    import glob, os, os.path
    filelist = glob.glob(os.path.join(directoryPath, filesEndWith))
    for f in filelist:
        os.remove(f)

In [9]:
def Folder_perClass (df,ColumnName, SourcePath, targetPath):
    import shutil, sys  
    from pathlib import Path
    ListOfLabel=Unique_values_from_Column (df, ColumnName)
    for label in ListOfLabel:
        imageNames=X_ray_image_nameList_perCategory (df, ColumnName, label)
        try:
            create_directory (targetPath+"/"+label)
        except:
            pass
        for imageName in imageNames:        
            original=SourcePath+"\\"+imageName 
            target=r""+targetPath+"\\"+label+"\\"+imageName
            if not Path(target).is_file():
                try:
                    shutil.copyfile(original, target)
                except:
                    pass

In [10]:
def Folder_perClass_remove_image_SourcePath (df,ColumnName, SourcePath, targetPath):
    import shutil, sys
    from pathlib import Path

    ListOfLabel=Unique_values_from_Column (df, ColumnName)
    for label in ListOfLabel:
        imageNames=X_ray_image_nameList_perCategory (df, ColumnName, label)
        try:
            create_directory (targetPath+"/"+label)
        except:
            pass
        for imageName in imageNames:        
            original=SourcePath+"\\"+imageName 
            target=r""+targetPath+"\\"+label+"\\"+imageName 
            if not Path(target).is_file():
                try:
                    shutil.copyfile(original, target)
                    remove_files_from_folder(SourcePath, imageName)
                except:
                    pass

In [11]:
def organise_data1_separate_folders():
    create_directory ("data")
    create_directory ("data/train")
    create_directory ("data/test")

    ColumnName= 'Label'
    targetPath="data/train"
    SourcePath=r"covid-chestxray-dataset-master\covid-chestxray-dataset-master\images"
    Folder_perClass (train,ColumnName, SourcePath,targetPath)

    targetPath="data/test"
    Folder_perClass (test,ColumnName, SourcePath,targetPath)

    ColumnName= 'Label_2'
    targetPath="data/train/Pnemonia"
    SourcePath=r"data/train/Pnemonia"
    Folder_perClass_remove_image_SourcePath (train,ColumnName, SourcePath,targetPath)

    targetPath="data/test/Pnemonia"
    SourcePath=r"data/test/Pnemonia"
    Folder_perClass_remove_image_SourcePath (test,ColumnName, SourcePath,targetPath)

In [12]:
def organise_data2_separate_folders():
    create_directory ("data")
    create_directory ("data/train")
    create_directory ("data/test")

    ColumnName= 'Label'
    targetPath="data/train"
    SourcePath=r"Coronahack-Chest-XRay-Dataset\Coronahack-Chest-XRay-Dataset\train"
    Folder_perClass (train,ColumnName, SourcePath,targetPath)
    SourcePath=r"Coronahack-Chest-XRay-Dataset\Coronahack-Chest-XRay-Dataset\test"
    Folder_perClass (train,ColumnName, SourcePath,targetPath)

    targetPath="data/test"
    SourcePath=r"Coronahack-Chest-XRay-Dataset\Coronahack-Chest-XRay-Dataset\train"
    Folder_perClass (test,ColumnName, SourcePath,targetPath)
    SourcePath=r"Coronahack-Chest-XRay-Dataset\Coronahack-Chest-XRay-Dataset\test"
    Folder_perClass (test,ColumnName, SourcePath,targetPath)

    ColumnName= 'Label_2'
    targetPath="data/train/Pnemonia"
    SourcePath=r"data/train/Pnemonia"
    Folder_perClass_remove_image_SourcePath (train,ColumnName, SourcePath,targetPath)

    targetPath="data/test/Pnemonia"
    SourcePath=r"data/test/Pnemonia"
    Folder_perClass_remove_image_SourcePath (test,ColumnName, SourcePath,targetPath)

In [13]:
def number_of_elements_in_directory(dirPath):
    import os
    List = os.listdir(dirPath) #directory path
    return len(List)

In [14]:
# This function will plot images in the form of a grid with 1 row and length of the images_arr columns.
def plotImages(images_path_arr):
    fig, axes = plt.subplots(1, len(images_path_arr), figsize=(20,20))
    axes = axes.flatten()
    for imagePath, ax in zip( images_path_arr, axes):
        img=mpimg.imread(imagePath)
        if len(img.shape)==3:
            rgb_weights = [0.2989, 0.5870, 0.1140]
            img = np.dot(img[...,:3], rgb_weights)
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

In [15]:
def Remove_duplicated_images(df):
    from pathlib import Path
    df['Duplicted'] = df['ImagePath'].apply(lambda x : not Path(x).is_file())
    df=df.loc[df['Duplicted'] == False]
    return df

In [16]:
def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        if height>0:
            ax.annotate('{}'.format(height),
                        xy=(rect.get_x() + rect.get_width() / 2, height),
                        xytext=(0, 3),  # 3 points vertical offset
                        textcoords="offset points",
                        ha='center', va='bottom', fontsize=10)

# Reload data

In [17]:
CsvPath='data/data.csv'
df = pd.read_csv(CsvPath)

# Stratified data split

In [18]:
df=df.fillna("")
train,test=split_data (df, 0.3, df["Label_2_Virus_category"])

In [19]:
train['Dataset_Type'] = "train"
test['Dataset_Type'] = "test"
df = train.append(test, ignore_index=True, sort=False)
Unique_values_from_Column (df, 'Dataset_Type')

['train', 'test']

In [20]:
create_directory ("data")
df.to_csv("data/data.csv")

# Organize folders

In [21]:
organise_data1_separate_folders()
organise_data2_separate_folders()