In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import numpy as np
from sklearn.utils import shuffle
from glob import glob
from sklearn.model_selection import train_test_split
from patchify import patchify
import tensorflow as tf
import cv2
from tensorflow.keras.callbacks import EarlyStopping
from vit import ViT

In [2]:
## Setting up hyper parameters.
hp={}
hp["image_size"]=200
hp["num_channels"]=3
hp["patch_size"]=25
hp["num_patches"]=(hp['image_size']**2)//(hp['patch_size']**2)
hp["flat_patches_shape"] = (hp["num_patches"], hp["patch_size"]*hp["patch_size"]*hp["num_channels"])

hp["batch_size"]=32
hp["lr"]=1e-4
hp["num_epochs"]=100
hp["num_classes"]=3
hp["class_names"]=["benign","adenocarcinoma","squamous_cell_carcinoma"]

hp["num_layers"]=12
hp["hidden_dim"]=768
hp["mlp_dim"]=3072
hp["num_heads"]=12
hp["dropout_rate"]=0.1


In [3]:

## Creating directory for storing the files:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)



def load_data(path, split=0.1):
    images= shuffle(glob(os.path.join(path,"*","*.jpg")))
    split_size=int(len(images)*split)
    train_x,valid_x=train_test_split(images,test_size=split_size,random_state=42)
    return train_x,valid_x


import os

import os
import cv2
import numpy as np
import tensorflow as tf
from glob import glob
from sklearn.utils import shuffle
from tensorflow.keras.callbacks import EarlyStopping
from patchify import patchify

def process_image_label(path):
    path = path.decode()
    if not os.path.exists(path):
        raise FileNotFoundError(f"Image file not found: {path}")

    image = cv2.imread(path, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError(f"Failed to load image: {path}")

    image = cv2.resize(image, (hp["image_size"], hp["image_size"]))
    image = image / 255.0

    ## Preprocessing on patches:
    patch_shape = (hp["patch_size"], hp["patch_size"], hp["num_channels"])
    patches = patchify(image, patch_shape, hp["patch_size"])

    patches = np.reshape(patches, hp["flat_patches_shape"])
    patches = patches.astype(np.float32)

    ## Labels
    path = path.replace("\\", "/")  # Normalize path for Windows
    class_name = os.path.basename(os.path.dirname(path))
    
    if class_name not in hp["class_names"]:
        raise ValueError(f"Class name '{class_name}' not found in class_names!")

    class_idx = hp["class_names"].index(class_name)  
    class_idx = np.array(class_idx, dtype=np.int32) 

    return patches, class_idx

def load_data(path, split=0.1):
    images = glob(os.path.join(path, "*", "*.jpg"))
    if len(images) == 0:
        raise ValueError(f"No images found in path: {path}")

    images = shuffle(images)
    split_size = int(len(images) * split)
    train_x, valid_x = train_test_split(images, test_size=split_size, random_state=42)
    return train_x, valid_x



def parse(path):
    patches,labels=tf.numpy_function(process_image_label,[path],[tf.float32,tf.int32])
    labels=tf.one_hot(labels,hp["num_classes"])

    patches.set_shape(hp["flat_patches_shape"])
    labels.set_shape(hp["num_classes"])

    return patches,labels



## Creating data batches:
def tf_dataset(images, batch=32):
    ds=tf.data.Dataset.from_tensor_slices((images))
    ds=ds.map(parse).batch(batch).prefetch(8)
    return ds





## Seeding
if __name__=="__main__":

    np.random.seed(42)
    tf.random.set_seed(42)


    create_dir("files")

    ## Paths:
    model_path=os.path.join("files","model.h5")
    csv_path=os.path.join("files","log.csv")

    ##Dataset:
    train_path="T:\Lung Cancer (H&E Images)\dataset\Train"
    test_path="T:\Lung Cancer (H&E Images)\dataset\Test"
    train_x,valid_x=load_data(train_path)
    print(f"Train: {len(train_x)} - Valid: {len(valid_x)}")

    train_ds=tf_dataset(train_x,batch=hp["batch_size"])
    valid_ds=tf_dataset(valid_x,batch=hp["batch_size"])
    
    ## Model:
    model = ViT(hp)
    model.compile(
        loss="categorical_crossentropy",
        optimizer=tf.keras.optimizers.Adam(hp["lr"], clipvalue=1.0),
        metrics=["acc"]
    )
    
    ## Setting Up Early-Stopping:
    callbacks= EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

    model.fit(
        train_ds,
        epochs=hp["num_epochs"],
        validation_data=valid_ds,
        callbacks=callbacks
    )

Train: 12150 - Valid: 1350

Epoch 1/100
[1m  3/380[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19:12:33[0m 183s/step - acc: 0.3854 - loss: 6.8956

In [None]:
pip install tensorflow-addons




In [63]:
path="T:\Lung Cancer (H&E Images)\dataset\Train\benign\0005.jpg"
category = path.split(os.sep)[-2]
print(category)


dataset
