In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.layers import Dense,AveragePooling2D
from keras.models import Model
from keras import layers
import tensorflow as tf
from keras.utils import to_categorical
from sklearn.utils import shuffle
from keras.metrics import CategoricalAccuracy,BinaryAccuracy,Recall,Precision
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
import cv2

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

In [None]:
import os
tb_dir='/content/drive/MyDrive/tbx11k-simplified/Dataset/TB'
len(os.listdir(tb_dir))

In [None]:
import os
dataset_url = '/content/drive/MyDrive/tbx11k-simplified'
os.chdir('/content/drive/MyDrive/tbx11k-simplified')


In [None]:
dataset=os.path.join(dataset_url,"Dataset")

In [None]:
folder_labels={"TB":0,
               "HEALTHY":1,
               "SICK":2}

In [None]:
folder_labels['TB']

In [None]:

images,labels=[],[]
for folder in os.listdir(dataset):
  count=0
  print(folder)
  for i,img in enumerate(os.listdir(os.path.join(dataset,folder))):
    img=cv2.imread(os.path.join(dataset,os.path.join(folder,img)))
    img=cv2.resize(img,(256,256))
    count=count+1
    if i%100==0:
      print(i)
    images.append(img)
    labels.append(folder_labels[folder])
    if count==1000:
      break


In [None]:
import pandas as pd
pd.Series(labels).value_counts()

In [None]:
len(images),len(labels)

In [None]:
lab=pd.Series(labels).unique()

In [None]:
image=np.array(images)
print(image.shape)


In [None]:
label=np.array(labels)

In [None]:
## split train / test

indices_train, indices_test = train_test_split(list(range(image.shape[0])), train_size=0.8, test_size=0.2, shuffle=True)

x_train = image[indices_train]
y_train = label[indices_train]
x_test = image[indices_test]
y_test = label[indices_test]

x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
y_test

In [None]:
from keras.utils import to_categorical

In [None]:
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

y_train.shape, y_test.shape

In [None]:
y_train

In [None]:
y_test

In [None]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
pip install -U tensorflow-addons

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa

In [None]:
num_classes = 3
input_shape = (256,256, 3)

In [None]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
batch_size = 16
num_epochs = 50
image_size = 256  # We'll resize input images to this size
patch_size = 8  # Size of the patches to be extract from the input images
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
    projection_dim * 2,
    projection_dim,
]  # Size of the transformer layers
transformer_layers = 8
mlp_head_units = [2048, 1024] 

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.Normalization(),
        layers.Resizing(image_size, image_size),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# Compute the mean and the variance of the training data for normalization.
data_augmentation.layers[0].adapt(x_train)

In [None]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x) # GELU activation function
        x = layers.Dropout(dropout_rate)(x) # Dropout layer
    return x


In [None]:
class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(4, 4))
image = x_train[np.random.choice(range(x_train.shape[0]))]
plt.imshow(image.astype("uint8"))
plt.axis("off")

resized_image = tf.image.resize(
    tf.convert_to_tensor([image]), size=(image_size, image_size)
)
patches = Patches(patch_size)(resized_image)
print(f"Image size: {image_size} X {image_size}")
print(f"Patch size: {patch_size} X {patch_size}")
print(f"Patches per image: {patches.shape[1]}")
print(f"Elements per patch: {patches.shape[-1]}")

n = int(np.sqrt(patches.shape[1]))
plt.figure(figsize=(4, 4))
for i, patch in enumerate(patches[0]):
    ax = plt.subplot(n, n, i + 1)
    patch_img = tf.reshape(patch, (patch_size, patch_size, 3))
    plt.imshow(patch_img.numpy().astype("uint8"))
    plt.axis("off")

In [None]:
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

In [None]:
def create_vit_classifier():
    inputs = layers.Input(shape=input_shape)
    # Augment data.
    augmented = data_augmentation(inputs)
    # Create patches.
    patches = Patches(patch_size)(augmented)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    # Add MLP.
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs.
    logits = layers.Dense(num_classes, activation="sigmoid")(features)
    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=logits)
    return model

In [None]:
from matplotlib import pyplot
from sklearn.metrics import precision_score,recall_score,f1_score,accuracy_score,cohen_kappa_score,roc_auc_score,confusion_matrix,classification_report

In [None]:
# compile the model
model = create_vit_classifier()
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")],
)
model.summary()
# train the model
history = model.fit(
    x_train, y_train, batch_size=16, epochs=50, validation_data=(x_test, y_test)
)

# evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test loss: {round(loss, 2)}")
print(f"Test accuracy: {round(accuracy * 100, 2)} %")

In [None]:
import matplotlib.pyplot as plt
output_folder = 'Results'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
# summarize history for accuracy
plt.plot(history.history['accuracy'],label="train_acc")
plt.plot(history.history['val_accuracy'],label="val_acc")
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig("/content/drive/MyDrive/tbx11k-simplified/Results/accuracyplot.png")
plt.show()
# summarize history for loss
plt.plot(history.history['loss'],label="train_loss")
plt.plot(history.history['val_loss'],label="val_loss")
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig("/content/drive/MyDrive/tbx11k-simplified/Results/lossplot.png")
plt.show()

In [None]:
y_pred=model.predict(x_test)

In [None]:
y_pred=np.argmax(y_pred,axis=1)

In [None]:
y_pred

In [None]:
len(y_test),len(y_pred)

In [None]:
y_test=np.argmax(y_test, axis=1)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

In [None]:
import seaborn as sns
plt.figure(figsize = (10,7))
sns.heatmap(cm, annot=True, fmt='g')
plt.savefig("/content/drive/MyDrive/tbx11k-simplified/Results/CM.png")

In [None]:
classes = ['TB','HEALTHY','SICK']  
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred,target_names=classes))


In [None]:
# history
print(f'The model has a best accuracy of {round(max(history.history["accuracy"])*100,2)}% and a best loss of {round(min(history.history["loss"]),2)}')
 
