# **1. LIBRARIES**

In [2]:
%pip install opencv-python




In [None]:
import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
from fractions import Fraction
from tqdm import tqdm
from matplotlib.pyplot import figure
from sklearn.model_selection import train_test_split

# **2. DATASET**

In [2]:
img_size = 128

In [3]:
names = []

def data_loading_cxr(path):
    names.clear()
    for name in os.listdir(path):
        if name == 'Thumbs.db':
            continue
        names.append(name)
    names.sort()
    
    data = [] 
    for i in tqdm(range(len(names))):
        name = names[i]
        cxr = cv2.imread(os.path.join(path, name), cv2.IMREAD_GRAYSCALE)
        cxr = cv2.resize(cxr, (img_size,img_size))
        data.append(cxr)
        
    return np.array(data, dtype=int)

def data_loading_mask(path):
    data = []
    for i in tqdm(range(len(names))):
        name = names[i]
        try:
            image = cv2.imread(os.path.join(path, name), cv2.IMREAD_GRAYSCALE)
        except Exception as e:
            print(name)
        image = cv2.resize(image, (img_size,img_size))
        data.append(image)
        
    return np.array(data, dtype=int)

In [4]:
covid_cxr = data_loading_cxr('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images')
covid_mask = data_loading_mask('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/masks')

train_covid_cxr, test_covid_cxr, train_covid_mask, test_covid_mask = train_test_split(covid_cxr, covid_mask, train_size=9/10, shuffle=False, random_state=42)

train_covid_cxr, val_covid_cxr, train_covid_mask, val_covid_mask = train_test_split(train_covid_cxr, train_covid_mask, train_size=8/9, shuffle=False, random_state=42)

train_cxr = train_covid_cxr
train_mask = train_covid_mask
val_cxr = val_covid_cxr
val_mask = val_covid_mask
test_cxr = test_covid_cxr
test_mask = test_covid_mask

del covid_cxr
del covid_mask
del train_covid_cxr
del train_covid_mask
del val_covid_cxr
del val_covid_mask
del test_covid_cxr
del test_covid_mask

100%|██████████| 3616/3616 [00:31<00:00, 114.38it/s]
100%|██████████| 3616/3616 [00:21<00:00, 165.84it/s]


In [5]:
opacity_cxr = data_loading_cxr('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Lung_Opacity/images')
opacity_mask = data_loading_mask('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Lung_Opacity/masks')

train_opacity_cxr, test_opacity_cxr, train_opacity_mask, test_opacity_mask = train_test_split(opacity_cxr, opacity_mask, train_size=9/10, shuffle=False, random_state=42)

train_opacity_cxr, val_opacity_cxr, train_opacity_mask, val_opacity_mask = train_test_split(train_opacity_cxr, train_opacity_mask, train_size=8/9, shuffle=False, random_state=42)

train_cxr = np.concatenate((train_cxr, train_opacity_cxr), axis=0)
train_mask = np.concatenate((train_mask, train_opacity_mask), axis=0)
val_cxr = np.concatenate((val_cxr, val_opacity_cxr), axis=0)
val_mask = np.concatenate((val_mask, val_opacity_mask), axis=0)
test_cxr = np.concatenate((test_cxr, test_opacity_cxr), axis=0)
test_mask = np.concatenate((test_mask, test_opacity_mask), axis=0)

del opacity_cxr
del opacity_mask
del train_opacity_cxr
del train_opacity_mask
del val_opacity_cxr
del val_opacity_mask
del test_opacity_cxr
del test_opacity_mask

100%|██████████| 6012/6012 [00:57<00:00, 105.33it/s]
100%|██████████| 6012/6012 [00:39<00:00, 150.64it/s]


In [6]:
pneumonia_cxr = data_loading_cxr('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Viral Pneumonia/images')
pneumonia_mask = data_loading_mask('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Viral Pneumonia/masks')

train_pneumonia_cxr, test_pneumonia_cxr, train_pneumonia_mask, test_pneumonia_mask = train_test_split(pneumonia_cxr, pneumonia_mask, train_size=9/10, shuffle=False, random_state=42)

train_pneumonia_cxr, val_pneumonia_cxr, train_pneumonia_mask, val_pneumonia_mask = train_test_split(train_pneumonia_cxr, train_pneumonia_mask, train_size=8/9, shuffle=False, random_state=42)

train_cxr = np.concatenate((train_cxr, train_pneumonia_cxr), axis=0)
train_mask = np.concatenate((train_mask, train_pneumonia_mask), axis=0)
val_cxr = np.concatenate((val_cxr, val_pneumonia_cxr), axis=0)
val_mask = np.concatenate((val_mask, val_pneumonia_mask), axis=0)
test_cxr = np.concatenate((test_cxr, test_pneumonia_cxr), axis=0)
test_mask = np.concatenate((test_mask, test_pneumonia_mask), axis=0)

del pneumonia_cxr
del pneumonia_mask
del train_pneumonia_cxr
del train_pneumonia_mask
del val_pneumonia_cxr
del val_pneumonia_mask
del test_pneumonia_cxr
del test_pneumonia_mask

100%|██████████| 1345/1345 [00:12<00:00, 105.06it/s]
100%|██████████| 1345/1345 [00:08<00:00, 154.71it/s]


In [7]:
normal_cxr = data_loading_cxr('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal/images')
normal_mask = data_loading_mask('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal/masks')

train_normal_cxr, test_normal_cxr, train_normal_mask, test_normal_mask = train_test_split(normal_cxr, normal_mask, train_size=9/10, shuffle=False, random_state=42)

train_normal_cxr, val_normal_cxr, train_normal_mask, val_normal_mask = train_test_split(train_normal_cxr, train_normal_mask, train_size=8/9, shuffle=False, random_state=42)

train_cxr = np.concatenate((train_cxr, train_normal_cxr), axis=0)
train_mask = np.concatenate((train_mask, train_normal_mask), axis=0)
val_cxr = np.concatenate((val_cxr, val_normal_cxr), axis=0)
val_mask = np.concatenate((val_mask, val_normal_mask), axis=0)
test_cxr = np.concatenate((test_cxr, test_normal_cxr), axis=0)
test_mask = np.concatenate((test_mask, test_normal_mask), axis=0)

del normal_cxr
del normal_mask
del train_normal_cxr
del train_normal_mask
del val_normal_cxr
del val_normal_mask
del test_normal_cxr
del test_normal_mask

100%|██████████| 10192/10192 [01:34<00:00, 107.29it/s]
100%|██████████| 10192/10192 [01:04<00:00, 159.14it/s]


In [8]:
print(np.shape(train_cxr))
print(np.shape(train_mask))
print(np.shape(val_cxr))
print(np.shape(val_mask))
print(np.shape(test_cxr))
print(np.shape(test_mask))

(16927, 128, 128)
(16927, 128, 128)
(2119, 128, 128)
(2119, 128, 128)
(2119, 128, 128)
(2119, 128, 128)


In [9]:
os.makedirs('/kaggle/working/without_convexhull/train', exist_ok=True)
np.save('/kaggle/working/without_convexhull/train/cxr.npy', train_cxr)

os.makedirs('/kaggle/working/without_convexhull/train', exist_ok=True)
np.save('/kaggle/working/without_convexhull/train/mask.npy', train_mask)

os.makedirs('/kaggle/working/without_convexhull/val', exist_ok=True)
np.save('/kaggle/working/without_convexhull/val/cxr.npy', val_cxr)

os.makedirs('/kaggle/working/without_convexhull/val', exist_ok=True)
np.save('/kaggle/working/without_convexhull/val/mask.npy', val_mask)

os.makedirs('/kaggle/working/without_convexhull/test', exist_ok=True)
np.save('/kaggle/working/without_convexhull/test/cxr.npy', test_cxr)

os.makedirs('/kaggle/working/without_convexhull/test', exist_ok=True)
np.save('/kaggle/working/without_convexhull/test/mask.npy', test_mask)