## X-ray image loading and analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, cv2, json, shutil
import tensorflow as tf
import seaborn as sns


from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input
from keras.preprocessing import image_dataset_from_directory
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras import models, layers, regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
metadata = pd.read_csv('COVID19_SBU/metadata.csv')

metadata.head(5)

Unnamed: 0,id,patient_id,enhanced,series,filename
0,IMG00001,A000801,False,01-03-1901-CHEST AP PORT-62879,A000801/01-03-1901-CHEST AP PORT-62879/1.00000...
1,IMG00002,A000801,True,01-03-1901-CHEST AP PORT-62879,A000801/01-03-1901-CHEST AP PORT-62879/2.00000...
2,IMG00003,A000801,False,01-03-1901-CHEST AP PORT-62879,A000801/01-03-1901-CHEST AP PORT-62879/3.00000...
3,IMG00004,A000801,True,01-03-1901-CHEST AP PORT-62879,A000801/01-03-1901-CHEST AP PORT-62879/4.00000...
4,IMG00005,A000801,False,01-04-1901-CHEST AP PORT-17928,A000801/01-04-1901-CHEST AP PORT-17928/1.00000...


In [3]:
metadata['filename'][0]

'A000801/01-03-1901-CHEST AP PORT-62879/1.000000-AP-12195/1-1.png'

In [4]:
metadata['enhanced'].value_counts()

False    6938
True     6700
Name: enhanced, dtype: int64

In [5]:
metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13638 entries, 0 to 13637
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   id          13638 non-null  object
 1   patient_id  13638 non-null  object
 2   enhanced    13638 non-null  bool  
 3   series      13638 non-null  object
 4   filename    13638 non-null  object
dtypes: bool(1), object(4)
memory usage: 439.6+ KB


In [6]:
metadata.columns

Index(['id', 'patient_id', 'enhanced', 'series', 'filename'], dtype='object')

In [7]:
metadata.patient_id.value_counts()

A860070    236
A763364    183
A221230    175
A701295    174
A766483    173
          ... 
A327819      1
A685471      1
A974180      1
A776010      1
A031437      1
Name: patient_id, Length: 1365, dtype: int64

In [8]:
len(metadata.patient_id.unique())

1365

In [9]:
#only keep enhanced images
metadata.drop(metadata[metadata['enhanced'] == False].index, inplace=True)
metadata.head()

Unnamed: 0,id,patient_id,enhanced,series,filename
1,IMG00002,A000801,True,01-03-1901-CHEST AP PORT-62879,A000801/01-03-1901-CHEST AP PORT-62879/2.00000...
3,IMG00004,A000801,True,01-03-1901-CHEST AP PORT-62879,A000801/01-03-1901-CHEST AP PORT-62879/4.00000...
5,IMG00006,A000801,True,01-04-1901-CHEST AP PORT-17928,A000801/01-04-1901-CHEST AP PORT-17928/2.00000...
7,IMG00008,A000801,True,01-04-1901-CHEST AP PORT-17928,A000801/01-04-1901-CHEST AP PORT-17928/4.00000...
9,IMG00010,A000801,True,01-04-1901-CHEST AP PORT-38592,A000801/01-04-1901-CHEST AP PORT-38592/2.00000...


In [10]:
metadata.shape

(6700, 5)