In [1]:
!nvidia-smi -L

'nvidia-smi' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import numpy as np
import cv2
# import wandb
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from patchify import patchify
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping
from vit import ViT
# from wandb.keras import WandbCallback

In [3]:
!wandb login d9ae8f452cf7f0c4ed803a7364869721e5dd2f52
# token = d9ae8f452cf7f0c4ed803a7364869721e5dd2f52

'wandb' is not recognized as an internal or external command,
operable program or batch file.


In [4]:
import os

dataset_dir = './Dataset/'  # Replace with the path to your dataset folder

# Get the class names from the subfolder names
class_names = [class_name for class_name in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, class_name))]

# Print the class names
print(len(class_names))
print(class_names)

2
['RoughBark', 'StripeCanker']


In [5]:
""" Hyperparameters """
hp = {}
hp["image_size"] = 200
hp["num_channels"] = 3
hp["patch_size"] = 25
hp["num_patches"] = (hp["image_size"]**2) // (hp["patch_size"]**2)
hp["flat_patches_shape"] = (hp["num_patches"], hp["patch_size"]*hp["patch_size"]*hp["num_channels"])

hp["batch_size"] = 32
hp["lr"] = 1e-4
hp["num_epochs"] = 2
hp["num_classes"] = len(class_names)
hp["class_names"] = class_names

hp["num_layers"] = 12
hp["hidden_dim"] = 768
hp["mlp_dim"] = 3072
hp["num_heads"] = 12
hp["dropout_rate"] = 0.1

In [6]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def load_data(train_path, test_path):
    split=0.15
    # Load training data
    train_images = shuffle(glob(os.path.join(train_path, "*", "*.jpg")))
    
    # subdirectories = glob(os.path.join(train_path, "*"))
    # print({"subdir" : subdirectories})
    # train_images = shuffle(glob(os.path.join(train_path, "*", "*.jpg")))
    # train_images = shuffle(glob(os.path.join(train_path, "*", "*.jpg"), recursive=True))

    # images_array = os.listdir("/content/drive/MyDrive/Datasets/VerySmallDataset/train/RoughBark")

    # print(subdirectories)
    # Collect images from each subdirectory
    # train_images = []
    # images_array = []
    # for subdir in subdirectories:
    #   # print(subdir)
    #   images_array.extend(os.listdir(subdir))

    # train_images = shuffle(images_array)

    # print(train_images)
    split_size = int(len(train_images) * split)
    train_x, valid_x = train_test_split(train_images, test_size=split_size, random_state=42)

    # Load test data from a different folder
    test_x = shuffle(glob(os.path.join(test_path, "*", "*.jpg")))

    return train_x, valid_x, test_x

In [7]:
def process_image_label(path):
    """ Reading images """
    path = path.decode()
    print(path)
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (hp["image_size"], hp["image_size"]))
    image = image/255.0

    """ Preprocessing to patches """
    patch_shape = (hp["patch_size"], hp["patch_size"], hp["num_channels"])
    patches = patchify(image, patch_shape, hp["patch_size"])

    # patches = np.reshape(patches, (64, 25, 25, 3))
    # for i in range(64):
    #     cv2.imwrite(f"files/{i}.png", patches[i])

    patches = np.reshape(patches, hp["flat_patches_shape"])
    patches = patches.astype(np.float32)

    """ Label """
    # print({"path": path})
    # class_name = path.split("/")[-2]
    # print({"classname": class_name})
    normalized_path = os.path.normpath(path)

    # Split the normalized path
    path_parts = normalized_path.split(os.sep)

    # Extract the second-to-last part as the class name
    class_name = path_parts[-2]

    # print(class_name)
    class_idx = hp["class_names"].index(class_name)
    class_idx = np.array(class_idx, dtype=np.int32)

    return patches, class_idx

In [8]:
def parse(path):
    patches, labels = tf.numpy_function(process_image_label, [path], [tf.float32, tf.int32])
    labels = tf.one_hot(labels, hp["num_classes"])

    patches.set_shape(hp["flat_patches_shape"])
    labels.set_shape(hp["num_classes"])

    return patches, labels

def tf_dataset(images, batch=32):
    ds = tf.data.Dataset.from_tensor_slices((images))
    ds = ds.map(parse).batch(batch).prefetch(8)
    return ds

In [9]:
""" Seeding """
np.random.seed(42)
tf.random.set_seed(42)

""" Directory for storing files """
create_dir("files")

In [10]:
""" Paths """
train_path = "./Dataset/"
test_path = "./Dataset/"
model_path = os.path.join("files", "model_vms.h5")
csv_path = os.path.join("files", "log_vms.csv")

""" Dataset """
train_x, valid_x, test_x = load_data(train_path, test_path)
print(f"Train: {len(train_x)} - Valid: {len(valid_x)} - Test: {len(test_x)}")

Train: 278 - Valid: 48 - Test: 326


In [11]:
train_ds = tf_dataset(train_x, batch=hp["batch_size"])
valid_ds = tf_dataset(valid_x, batch=hp["batch_size"])

""" Model """
model = ViT(hp)
model.compile(
    loss="categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(hp["lr"], clipvalue=1.0),
    metrics=["acc"]
)

In [12]:
wandb.init()

NameError: name 'wandb' is not defined

In [13]:
callbacks = [
    ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, min_lr=1e-10, verbose=1),
    CSVLogger(csv_path),
    EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=False),
    # WandbCallback(),
]

history = model.fit(
    train_ds,
    epochs=hp["num_epochs"],
    validation_data=valid_ds,
    callbacks=callbacks
)

Epoch 1/2
./Dataset\StripeCanker\IMG_1573.JPG
./Dataset\RoughBark\IMG_E4862.JPG
./Dataset\RoughBark\IMG_E4815.JPG
./Dataset\RoughBark\IMG_1965.JPG
./Dataset\StripeCanker\IMG_1626.JPG
./Dataset\RoughBark\IMG_E4807.JPG
./Dataset\RoughBark\IMG_E4837.JPG
./Dataset\StripeCanker\IMG_E4886.JPG
./Dataset\RoughBark\IMG_4783.JPG
./Dataset\RoughBark\IMG_E4806.JPG
./Dataset\RoughBark\IMG_2058.JPG
./Dataset\StripeCanker\IMG_1808.JPG
./Dataset\RoughBark\IMG_E4805.JPG
./Dataset\StripeCanker\IMG_E4876.JPG
./Dataset\StripeCanker\IMG_E4889.JPG
./Dataset\RoughBark\IMG_E4810.JPG
./Dataset\StripeCanker\IMG_1679.JPG
./Dataset\RoughBark\IMG_4862.JPG
./Dataset\RoughBark\IMG_4853.JPG
./Dataset\RoughBark\IMG_E4860.JPG
./Dataset\RoughBark\IMG_E4788.JPG
./Dataset\RoughBark\IMG_4813.JPG
./Dataset\StripeCanker\IMG_1779.JPG
./Dataset\StripeCanker\IMG_E4888.JPG
./Dataset\StripeCanker\IMG_1622.JPG
./Dataset\StripeCanker\IMG_1645.JPG
./Dataset\StripeCanker\IMG_4799.JPG
./Dataset\RoughBark\IMG_E4844.JPG
./Dataset\Stripe

In [None]:
wandb.finish()

VBox(children=(Label(value='0.021 MB of 0.021 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [None]:
import pickle
with open('training_history.pkl', 'wb') as file:
    pickle.dump(history.history, file)