https://www.kaggle.com/code/shivan118/x-ray-detecting-using-cnn

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import tensorflow.keras as kr

In [2]:
batch_size = 32
img_height = 180
img_width = 180


In [3]:
train_ds = kr.utils.image_dataset_from_directory(
    "COVID-19_Radiography_Dataset",
    subset = "training",
    validation_split = 0.2,
    seed = 123,
    image_size = (img_height,img_width)
)
test_ds = kr.utils.image_dataset_from_directory(
    "COVID-19_Radiography_Dataset",
    subset = "validation",
    validation_split = 0.2,
    seed = 123,
    image_size = (img_height,img_width)
)

Found 15153 files belonging to 3 classes.
Using 12123 files for training.
Found 15153 files belonging to 3 classes.
Using 3030 files for validation.


In [4]:
def count(counts, batch):
  features, labels = batch
  class_1 = labels == 1
  class_1 = tf.cast(class_1, tf.int32)

  class_0 = labels == 0
  class_0 = tf.cast(class_0, tf.int32)

  class_2 = labels == 2
  class_2 = tf.cast(class_0, tf.int32)
  counts['class_0'] += tf.reduce_sum(class_0)
  counts['class_1'] += tf.reduce_sum(class_1)
  counts['class_2'] += tf.reduce_sum(class_2)

  return counts

counts = train_ds.take(10).reduce(
    initial_state={'class_0': 0, 'class_1': 0, 'class_2' : 0},
    reduce_func = count)

counts = np.array([counts['class_0'].numpy(),
                   counts['class_1'].numpy(),
                   counts['class_2']]).astype(np.float32)

fractions = counts/counts.sum()
print(fractions)

[0.21276596 0.5744681  0.21276596]


In [5]:
def class_func(features, label):
  return label

In [6]:

resample_ds = (
    train_ds
    .unbatch()
    .rejection_resample(class_func, target_dist=[0.33,0.33,0.33],
                        initial_dist=fractions)
    .batch(10))

Instructions for updating:
Use tf.print instead of tf.Print. Note that tf.print returns a no-output operator that directly prints the output. Outside of defuns or eager mode, this operator will not be executed unless it is directly specified in session.run or used as a control dependency for other operators. This is only a concern in graph mode. Below is an example of how to ensure tf.print executes in graph mode:



In [7]:
balanced_train_ds = resample_ds.map(lambda extra_label, features_and_label: features_and_label)

In [8]:
c_names = train_ds.class_names

In [9]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size = AUTOTUNE)
val_ds = test_ds.cache().prefetch(buffer_size = AUTOTUNE)

In [10]:
import matplotlib.pyplot as plt
class_names = train_ds.class_names
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

AttributeError: '_PrefetchDataset' object has no attribute 'class_names'

In [10]:
from keras.layers import Rescaling,Conv2D,MaxPooling2D,Flatten,Dense,Dropout
num_classes = 4

model = tf.keras.Sequential([
    Rescaling(1./255),
    Conv2D(32,3,activation = "relu"),
    MaxPooling2D(),
    Conv2D(32,3,activation = "relu"),
    MaxPooling2D(),
    Conv2D(32,3,activation = "relu"),
    MaxPooling2D(),
    Flatten(),
    Dense(128,activation = 'relu'),
    Dense(num_classes,activation="softmax")
]
)

In [11]:
def build_model(hp):
    num_classes = 4
    filters = hp.Int("filters",min_value = 8, max_value = 32, step = 4)
    units_1 = hp.Int("units_1",min_value = 32, max_value = 128, step = 32)
    units_2 =  hp.Int("units_2",min_value = 32, max_value = 128, step = 32)
    activation = "relu"
    dropout = hp.Boolean("dropout")
    lr = hp.Float("lr",min_value = 1e-5, max_value = 1e-3,sampling="log")
    model = tf.keras.Sequential([
        Rescaling(1./255),
        Conv2D(filters=filters,kernel_size=3,activation = activation),
        MaxPooling2D(),
        Conv2D(filters=filters,kernel_size=3,activation = activation),
        MaxPooling2D(),
        Conv2D(filters=filters,kernel_size=3,activation = activation),
        MaxPooling2D(),
        Flatten(),
        Dense(units=units_1,activation = activation),
        Dense(units=units_2,activation = activation),
        
    ]
    )  
    if dropout:
        model.add(Dropout(rate=0.25)) 
    model.add(Dense(num_classes,activation="softmax"))
    optimzer = kr.optimizers.Adam(learning_rate=lr)
    model.compile(
    optimizer=optimzer,
    loss = tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
    )
    return model

In [12]:
import keras_tuner
tuner = keras_tuner.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10)

Using TensorFlow backend
Reloading Tuner from .\untitled_project\tuner0.json


In [13]:
tuner.search(balanced_train_ds, epochs=2, validation_data=(test_ds))

In [14]:
optimzer = kr.optimizers.Adam(learning_rate=0.001)
model.compile(
    optimizer="Adam",
    loss = tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)

In [15]:
model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=2
)

Epoch 1/2
 19/379 [>.............................] - ETA: 1:26 - loss: 0.8013 - accuracy: 0.6678

KeyboardInterrupt: 

In [16]:
best_model = tuner.get_best_models()[0]

In [17]:
best_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=2
)

Epoch 1/2


Epoch 2/2


<keras.callbacks.History at 0x147a971be20>

In [127]:

best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 180, 180, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 178, 178, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 89, 89, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 87, 87, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 43, 43, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 41, 41, 32)        9

In [128]:
y_pred = np.argmax(best_model.predict(val_ds),axis=1)



In [None]:
y_pred

In [None]:
y_labels = np.array(val_ds.as_numpy_iterator())

In [129]:
train_label = np.concatenate([y for x, y in val_ds], axis=0)

test_label = np.concatenate([y for x, y in val_ds], axis=0) 

In [None]:
test_label

In [None]:
y_labels

In [130]:
tf.math.confusion_matrix(labels=test_label,predictions=y_pred)

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 630,  101,    5],
       [  82, 1943,    6],
       [   8,   24,  231]])>

In [None]:
c_names