In [1]:
import os
import re
import requests
from PIL import Image
import pandas as pd
import numpy as np

# Libraries for data visualization
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Deep Learnong Libraries, Preprocsseing, Modeling, Evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D,MaxPooling2D, Dropout, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing import image
from tensorflow import keras
from tensorflow.keras.optimizers.legacy import Adam, Adamax
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16

from tensorflow.keras import regularizers
import warnings
warnings.filterwarnings('ignore')

In [2]:
train_data_dir = '/Users/aanshsavla/Desktop/Aansh/AI/ML/DataSets/chest_xray/chest_xray/train'
filepaths = []
labels = []

folds = os.listdir(train_data_dir)
for fold in folds:
    if str(fold) != '.DS_Store':
        foldpath = os.path.join(train_data_dir,fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath,file)
            filepaths.append(fpath)
            labels.append(fold)
# Concatenate data paths with labels into one dataframe
Fseries = pd.Series(filepaths,name='filepaths')
Lseries = pd.Series(labels, name='labels')

train_df = pd.concat([Fseries,Lseries],axis=1)

In [3]:
val_data_dir = '/Users/aanshsavla/Desktop/Aansh/AI/ML/DataSets/chest_xray/chest_xray/val'
filepaths= []
labels = []

folds = os.listdir(val_data_dir)
for fold in folds:
    if str(fold) != '.DS_Store':
        foldpath = os.path.join(val_data_dir,fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath,file)
            filepaths.append(fpath)
            labels.append(fold)

# Concatenate data paths with labels into one dataframe
Fseries = pd.Series(filepaths, name='filepaths')
Lseries = pd.Series(labels, name='labels')
val_df = pd.concat([Fseries,Lseries],axis=1)

In [4]:
img_width = 244
img_height = 244

In [5]:
conv_base = VGG16(weights='imagenet', 
                  include_top=False,
                  input_shape=(img_width, img_height, 3))

# Show architecture
conv_base.summary()

2024-02-19 17:59:27.804428: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2024-02-19 17:59:27.804451: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-02-19 17:59:27.804457: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-02-19 17:59:27.804495: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-19 17:59:27.804513: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 244, 244, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 244, 244, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 244, 244, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 122, 122, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 122, 122, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 122, 122, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 61, 61, 128)       0     

In [6]:
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 16

def extract_features(df, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 512))  # Must be equal to the output of the convolutional base
    labels = np.zeros(shape=(sample_count,2))
    # Preprocess data
    generator = datagen.flow_from_dataframe(df,
                                            target_size=(img_width,img_height),
                                            x_col='filepaths',y_col='labels',
                                            batch_size = batch_size,
                                            class_mode='categorical',
                                            color_mode = 'rgb')
    # Pass data through convolutional base
    i = 0
    for inputs_batch, labels_batch in generator:
        with tf.device("/cpu:0"):
            features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size: (i + 1) * batch_size] = features_batch
        labels[i * batch_size: (i + 1) * batch_size] = labels_batch
        i += 1
        print(i * batch_size)
        if i * batch_size >= sample_count:
            break
    return features, labels

In [7]:
train_features, train_labels = extract_features(train_df, 5216)  # Agree with our small dataset size
validation_features, validation_labels = extract_features(val_df, 16)

Found 5216 validated image filenames belonging to 2 classes.


2024-02-19 17:59:33.763319: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


16
32
48
64
80
96
112
128
144
160
176
192
208
224
240
256
272
288
304
320
336
352
368
384
400
416
432
448
464
480
496
512
528
544
560
576
592
608
624
640
656
672
688
704
720
736
752
768
784
800
816
832
848
864
880
896
912
928
944
960
976
992
1008
1024
1040
1056
1072
1088
1104
1120
1136
1152
1168
1184
1200
1216
1232
1248
1264
1280
1296
1312
1328
1344
1360
1376
1392
1408
1424
1440
1456
1472
1488
1504
1520
1536
1552
1568
1584
1600
1616
1632
1648
1664
1680
1696
1712
1728
1744
1760
1776
1792
1808
1824
1840
1856
1872
1888
1904
1920
1936
1952
1968
1984
2000
2016
2032
2048
2064
2080
2096
2112
2128
2144
2160
2176
2192
2208
2224
2240
2256
2272
2288
2304
2320
2336
2352
2368
2384
2400


2416
2432
2448
2464
2480
2496
2512
2528
2544
2560
2576
2592
2608
2624
2640
2656
2672
2688
2704
2720
2736
2752
2768
2784
2800
2816
2832
2848
2864
2880
2896
2912
2928
2944
2960
2976
2992
3008
3024
3040
3056
3072
3088
3104
3120
3136
3152
3168
3184
3200
3216
3232
3248
3264
3280
3296
3312
3328
3344
3360
3376
3392
3408
3424
3440
3456
3472
3488
3504
3520
3536
3552
3568
3584
3600
3616
3632
3648
3664
3680
3696
3712
3728
3744
3760
3776
3792
3808
3824
3840
3856
3872
3888
3904
3920
3936
3952
3968
3984
4000
4016
4032
4048
4064
4080
4096
4112
4128
4144
4160
4176
4192
4208
4224
4240
4256
4272
4288
4304
4320
4336
4352
4368
4384
4400
4416
4432
4448
4464
4480
4496
4512
4528
4544
4560
4576
4592
4608
4624
4640
4656
4672
4688
4704
4720
4736
4752
4768
4784


4800
4816
4832
4848
4864
4880
4896
4912
4928
4944
4960
4976
4992
5008
5024
5040
5056
5072
5088
5104
5120
5136
5152
5168
5184
5200
5216
Found 16 validated image filenames belonging to 2 classes.
16


In [8]:
model = Sequential()
model.add(GlobalAveragePooling2D(input_shape=(7,7,512)))
model.add(Dense(2, activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 global_average_pooling2d (  (None, 512)               0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 2)                 1026      
                                                                 
Total params: 1026 (4.01 KB)
Trainable params: 1026 (4.01 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
epochs = 10

In [10]:
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['acc'])

# Train model
with tf.device("/cpu:0"):
    history = model.fit(train_features, train_labels,
                    epochs=epochs,
                    batch_size=batch_size, 
                    validation_data=(validation_features, validation_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
