In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
from os import listdir, mkdir
from glob import glob
from time import time
import cv2
import matplotlib.pyplot as plt
from imutils import rotate_bound
from os.path import isfile, exists
import argparse
from textwrap import dedent

In [None]:
def draw_table(data, classes, save_as="table", save_to="images"):
    """
    Draw a table of precision and recall values and save it as an image.

    :param data: Confusion matrix.
    :param classes: List of class names.
    :param save_as: Name of the file to save the table.
    :param save_to: Directory to save the table.
    """
    result = [[precision(label, data), recall(label, data)] for label in range(len(classes))]
    columns = ["precision", "recall"]
    colors = plt.cm.BuPu(np.linspace(0, 0.5, len(classes)))[::-1]
    plt.table(colWidths=[.3, .3], cellText=result, cellLoc='center', rowLabels=classes, rowColours=colors, colLabels=columns, loc='center')
    plt.axis('off')
    plt.savefig(f'{save_to}/{save_as}.png')
    plt.show()

In [None]:
def dataSetGenerator(path, resize=False, resize_to=224, percentage=100, dataAugmentation=False):
    """
    Generate an image dataset from a directory of images.

    :param str path: Path to the image dataset directory.
    :param bool resize: Whether to resize the images.
    :param int resize_to: Size to resize the images to.
    :param int percentage: Percentage of images to include in the dataset.
    :param bool dataAugmentation: Whether to apply data augmentation.
    :return: Tuple of images, labels, and classes.
    :rtype: tuple[np.ndarray, np.ndarray, np.ndarray]
    """
    try:
        start_time = time()
        classes = listdir(path)
        image_list, labels = [], []

        for classe in classes:
            for filename in glob(f'{path}/{classe}/*'):
                img = cv2.imread(filename, cv2.COLOR_BGR2RGB)
                if resize:
                    img = cv2.resize(img, (resize_to, resize_to))
                image_list.append(img)
                label = np.zeros(len(classes))
                label[classes.index(classe)] = 1
                labels.append(label)

                if dataAugmentation:
                    for angle in np.arange(0, 360, 90):
                        rotated = rotate_bound(img, angle)
                        image_list.extend([rotated, np.fliplr(rotated)])
                        labels.extend([label, label])

        indices = np.random.permutation(len(image_list))[:int(len(image_list) * percentage / 100)]
        print(f"\n --- dataSet generated in {np.round(time() - start_time)} seconds --- \n")
        return np.array([image_list[x] for x in indices]), np.array([labels[x] for x in indices]), np.array(classes)

    except IOError as e:
        print(f"I/O error({e.errno}): {e.strerror} \nlike : {path}")


In [None]:
def dataSetToNPY(path, SaveTo="DataSets", resize=True, resize_to=224, percentage=80, dataAugmentation=False):
    """
    Generate a image dataSet from a picture dataSets and save it in pny files for fast reading in teast and train

    the picture dataSets must be in the same structure to generate also labels

    example of pictureFolder: http://weegee.vision.ucmerced.edu/datasets/landuse.html

    picture dataSets
      |
      |----------class-1
      |        .   |-------image-1
      |        .   |         .
      |        .   |         .
      |        .   |         .
      |        .   |-------image-n
      |        .
      |-------class-n

    :param str path: the path for picture dataSets folder (/)
    :param str SaveTo: the path when we save dataSets (/)
    :param bool resize: choose resize the pictures or not
    :param int resize_to: the new size of pictures
    :param int or bool dataAugmentation: apply data Augmentation Strategy
    :param int or float percentage: how many pictures you want to get from this pictureFolder for training
    :return: return dataset in npy files for fast Test and Train
    """
    try:
        from os import mkdir
        from os.path import exists
        dataSet_name = path.replace('\\', "/").split("/")[-1]
        mkdir(SaveTo + "/" + dataSet_name) if not exists(SaveTo + "/" + dataSet_name) else None
        SaveTo = SaveTo + "/" + dataSet_name
        data, labels, classes = dataSetGenerator(path, resize, resize_to, 100, dataAugmentation)
        indice = np.random.permutation(len(data))
        indice80 = indice[:int(len(data) * percentage / 100)]
        indice20 = indice[int(len(data) * percentage / 100):]
        np.save(SaveTo + "/" + dataSet_name + '_dataTrain.npy', [data[x] for x in indice80])
        np.save(SaveTo + "/" + dataSet_name + '_labelsTrain.npy', [labels[x] for x in indice80])
        np.save(SaveTo + "/" + dataSet_name + '_dataTest.npy', [data[x] for x in indice20])
        np.save(SaveTo + "/" + dataSet_name + '_labelsTest.npy', [labels[x] for x in indice20])
        np.save(SaveTo + "/" + dataSet_name + '_classes.npy', classes)
    except IOError as e:
        print("I/O error({}): {} \nlike : {}".format(e.errno, e.strerror, path))


In [None]:
!ls drive/MyDrive/Colab\ Notebooks\ 3/HamDet/output

In [None]:
def picShow(data, labels, classes, just=None, predict=None, autoClose=False, Save_as="pic", save_to="images"):
    """
    Display images with their true and predicted classes.

    :param data: List of images.
    :param labels: List of true labels for the images.
    :param classes: List of class names.
    :param just: Number of images to display.
    :param predict: List of predicted probabilities for each class.
    :param autoClose: Whether to automatically close the plot after displaying.
    :param Save_as: Name of the file to save the plot.
    :param save_to: Directory to save the plot.
    """
    fig = plt.figure()
    if just is None:
        just = len(data)

    for i in range(1, just + 1):
        true_out = classes[labels[i - 1].argmax()]
        sub = fig.add_subplot(np.rint(np.sqrt(just)), np.ceil(np.sqrt(just)), i)
        title = f"true: {true_out}"
        color = 'black'

        if predict is not None:
            classIndex = predict[i - 1].argmax()
            predict_out = classes[classIndex]
            title += f" predicted: {round(predict[i - 1][classIndex] * 100, 2)} {predict_out}"
            color = 'green' if predict_out == true_out else 'red'

        sub.set_title(title, color=color, fontsize=7, fontweight='bold')
        sub.axis('off')
        sub.imshow(data[i - 1], interpolation='nearest', aspect="auto")
        plt.savefig(f'{save_to}/{Save_as}.png')

    if autoClose:
        plt.show(0)
        plt.pause(10)
        plt.close()
    else:
        plt.show()

In [None]:
def plotFiles(*paths, xlabel='# epochs', ylabel='Error and Accuracy', reduce_each=False, autoClose=False, ff=None, Save_as="plot", save_to="images"):
    """
    Plot data from multiple files in one chart.

    :param paths: Paths to the data files.
    :param xlabel: Label for the x-axis.
    :param ylabel: Label for the y-axis.
    :param reduce_each: Reduce the chart data by averaging every n points.
    :param autoClose: Whether to automatically close the plot after displaying.
    :param ff: Custom label for the plot.
    :param Save_as: Name of the file to save the plot.
    :param save_to: Directory to save the plot.
    """
    for path in paths:
        if isfile(path):
            with open(path) as f:
                data = [float(i.strip('\x00')) for i in f.read().split('\n')[:-1] if i.strip('\x00').isdigit()]
                resultat = []

                if reduce_each:
                    for i in range(1, len(data), reduce_each):
                        l = data[i - 1:reduce_each * i]
                        resultat.append(sum(l) / float(len(l)))
                else:
                    resultat = data

                label = ff if ff else path.replace("\\", '/').split("/")[-1].split(".")[0]
                plt.plot(resultat, label=label)

        else:
            print(f"I/O error({IOError.errno}): {IOError.strerror} \nlike : {path}")

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend(loc='center left')
    plt.savefig(f'{save_to}/{Save_as}.png')
    if autoClose:
        plt.show(0)
        plt.pause(10)
        plt.close()
    else:
        plt.show()

In [None]:
def plotSubFiles(*paths, xlabel='# epochs', ylabel='Error and Accuracy', reduce_each=False, autoClose=False):
    """
    Plot data from multiple files in subplots.

    :param paths: Paths to the data files.
    :param xlabel: Label for the x-axis.
    :param ylabel: Label for the y-axis.
    :param reduce_each: Reduce the chart data by averaging every n points.
    :param autoClose: Whether to automatically close the plot after displaying.
    """
    fig = plt.figure()
    fig.subplots_adjust(hspace=0.5, wspace=0.5)
    sub_index = 1

    for path in paths:
        if isfile(path):
            with open(path) as f:
                data = [float(i.strip('\x00')) for i in f.read().split('\n')[:-1] if i.strip('\x00').isdigit()]
                resultat = []

                if reduce_each:
                    for i in range(1, len(data), reduce_each):
                        l = data[i - 1:reduce_each * i]
                        resultat.append(sum(l) / float(len(l)))
                else:
                    resultat = data

                sub = fig.add_subplot(np.rint(np.sqrt(len(paths))), np.ceil(np.sqrt(len(paths))), sub_index)
                sub_index += 1
                sub.set_title(path.replace("\\", '/').split("/")[-1].split(".")[0], fontsize=7, fontweight='bold')
                sub.plot(resultat)

        else:
            print(f"I/O error({IOError.errno}): {IOError.strerror} \nlike : {path}")

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    if autoClose:
        plt.show(0)
        plt.pause(10)
        plt.close()
    else:
        plt.show()

In [None]:
def plot_histories(histories, keys=["loss", "val_loss"], legend=["Train", "Validation"], xlabel='# epochs', ylabel='Error', autoClose=False, Save_as="plot", save_to="images"):
    """
    Plot training histories.

    :param histories: List of history objects.
    :param keys: Keys to plot from the history objects.
    :param legend: Legend for the plot.
    :param xlabel: Label for the x-axis.
    :param ylabel: Label for the y-axis.
    :param autoClose: Whether to automatically close the plot after displaying.
    :param Save_as: Name of the file to save the plot.
    :param save_to: Directory to save the plot.
    """
    for history in histories:
        for key in keys:
            plt.plot(history.history[key])

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend(legend, loc='upper left')
    plt.savefig(f'{save_to}/{Save_as}.png')
    if autoClose:
        plt.show(0)
        plt.pause(10)
        plt.close()
    else:
        plt.show()

In [None]:
def precision(label, confusion_matrix):
    """
    Calculate the precision for a given class label.

    :param int label: Class label.
    :param np.ndarray confusion_matrix: Confusion matrix.
    :return: Precision value.
    :rtype: float
    """
    col = confusion_matrix[:, label]
    return confusion_matrix[label, label] / col.sum()

In [None]:
def recall(label, confusion_matrix):
    """
    Calculate the recall for a given class label.

    :param int label: Class label.
    :param np.ndarray confusion_matrix: Confusion matrix.
    :return: Recall value.
    :rtype: float
    """
    row = confusion_matrix[label, :]
    return confusion_matrix[label, label] / row.sum()

In [None]:

def accuracy(confusion_matrix):
    """
    Calculate the accuracy from the confusion matrix.

    :param np.ndarray confusion_matrix: Confusion matrix.
    :return: Accuracy value.
    :rtype: float
    """
    diagonal_sum = confusion_matrix.trace()
    sum_of_all_elements = confusion_matrix.sum()
    return diagonal_sum / sum_of_all_elements

In [None]:
def draw_confusion_matrix(confusion_matrix, class_labels, Save_as="Confusion Matrix", save_to="images"):
    """
    Draw and save the confusion matrix.

    :param np.ndarray confusion_matrix: Confusion matrix.
    :param list class_labels: List of class labels.
    :param Save_as: Name of the file to save the confusion matrix.
    :param save_to: Directory to save the confusion matrix.
    """
    num_classes = len(class_labels)
    fig, ax = plt.subplots()
    cax = ax.matshow(confusion_matrix, cmap=plt.cm.Blues)
    fig.colorbar(cax)

    for i in range(num_classes):
        for j in range(num_classes):
            ax.text(i, j, str(confusion_matrix[j, i]), va='center', ha='center')

    ax.set_xticks(np.arange(num_classes))
    ax.set_xticklabels(class_labels)
    ax.set_yticks(np.arange(num_classes))
    ax.set_yticklabels(class_labels)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig(f'{save_to}/{Save_as}.png')
    plt.show()

In [None]:
if __name__ == '__main__':

    path = 'drive/MyDrive/Colab Notebooks 3/HamDet/ham'
    SaveTo = 'drive/MyDrive/Colab Notebooks 3/HamDet/output'
    resize = True
    resize_to = 224
    percentage = 80
    dataAugmentation = False

    # Print the paths to verify
    print(f"Dataset path: {path}")
    print(f"Save to: {SaveTo}")

    dataSetToNPY(path, SaveTo, resize, resize_to, percentage, dataAugmentation)


Dataset path: drive/MyDrive/Colab Notebooks 3/HamDet/ham
Save to: drive/MyDrive/Colab Notebooks 3/HamDet/output

 --- dataSet generated in 6.0 seconds --- 



In [None]:
classes_name = "ham"  # Set the dataset name here
batch_size = 10  # Set the batch size here
epochs = 30  # Set the number of epochs here

classes = np.load(f"drive/MyDrive/Colab Notebooks 3/HamDet/output/ham/{classes_name}_classes.npy")
batch = np.load(f"drive/MyDrive/Colab Notebooks 3/HamDet/output/ham/{classes_name}_dataTrain.npy")
labels = np.load(f"drive/MyDrive/Colab Notebooks 3/HamDet/output/ham/{classes_name}_labelsTrain.npy")

classes_num = len(classes)
rib = batch.shape[1]  # picture Rib

with tf.device('/cpu:0'):
    with tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=int(environ['NUMBER_OF_PROCESSORS']))) as sess:
        images = tf.placeholder(tf.float32, [None, rib, rib, 3])
        true_out = tf.placeholder(tf.float32, [None, classes_num])
        train_mode = tf.placeholder(tf.bool)

        try:
            vgg = vgg16.Vgg16(f'drive/MyDrive/Colab Notebooks 3/HamDet/output/output/VGG16_{classes_name}.npy', classes_num)
        except:
            print(f'drive/MyDrive/Colab Notebooks 3/HamDet/output/outputVGG16_{classes_name}.npy Not Exist')
            vgg = vgg16.Vgg16(None, classes_num)
        vgg.build(images, train_mode)

        # print number of variables used: 143667240 variables, i.e. ideal size = 548MB
        print('number of variables used:', vgg.get_var_count())

        sess.run(tf.global_variables_initializer())

        # test classification
        prob = sess.run(vgg.prob, feed_dict={images: batch[:10], train_mode: False})
        picShow(batch[:10], labels[:10], classes, None, prob, True)

        # simple 1-step training
        cost = tf.reduce_sum((vgg.prob - true_out) ** 2)
        train = tf.train.GradientDescentOptimizer(0.0001).minimize(cost)

        correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(true_out, 1))
        acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        batche_num = batch.shape[0]
        costs = []
        accs = []
        for epoch in range(epochs):
            indice = np.random.permutation(batche_num)
            counter = 0
            for i in range(int(batche_num / batch_size)):
                min_batch = indice[i * batch_size:(i + 1) * batch_size]
                cur_cost, _, cur_acc = sess.run([cost, train, acc], feed_dict={images: batch[min_batch], true_out: labels[min_batch], train_mode: True})
                print(f"Epoch: {epoch} Batch: {i} Loss: {cur_cost}")
                accs.append(cur_acc)
                costs.append(cur_cost)
                counter += 1
                if counter % 100 == 0:
                    #  save graph data
                    append(costs, f'Data/COST16_{classes_name}.txt')
                    append(accs, f'Data/ACC16_{classes_name}.txt')
                    # save Weights
                    vgg.save_npy(sess, f'Weights/VGG16_{classes_name}.npy')

            #  save graph data
            append(costs, f'Data/COST16_{classes_name}.txt')
            append(accs, f'Data/ACC16_{classes_name}.txt')
            #  save Weights
            vgg.save_npy(sess, f'Weights/VGG16_{classes_name}.npy')

        # test classification again, should have a higher probability about tiger
        prob = sess.run(vgg.prob, feed_dict={images: batch[:10], train_mode: False})
        picShow(batch[:10], labels[:10], classes, None, prob)

IndexError: tuple index out of range

In [None]:
import numpy as np

# Load data from a .npy file
data = np.load(f"drive/MyDrive/Colab Notebooks 3/HamDet/output/ham/{classes_name}_classes.npy")

# Now you can use the 'data' variable to access the contents of the .npy file
print(data)

['Archery (67).jpg' 'Archery (5).jpg' 'Archery (100).jpg' ...
 'Archery (122).jpg' 'Archery (53).jpg' 'Archery (78).jpg']


In [None]:
#Restnet 152


# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install necessary packages (if not already installed)
!pip install tensorflow

from tensorflow.keras.applications import ResNet152
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# Define image data generators for training and validation
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

# Define image paths for training and validation data (update with your directory structure)
train_dir = "/content/drive/My Drive/Research/Paper2/Anaconda1/Train"
val_dir = "/content/drive/My Drive/Research/Paper2/Anaconda1/Test"

# Set image dimensions
img_width, img_height = 224, 224

# Create training and validation data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode='categorical'  # Adjust for your classification task
)
val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=8,
    class_mode='categorical'  # Adjust for your classification task
)

# Load the pre-trained ResNet152 model without the top layer
base_model = ResNet152(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

# Freeze the pre-trained layers for fine-tuning
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # Adjust number of units based on your dataset
predictions = Dense(2, activation='softmax')(x)  # Adjust output layer for your number of classes

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the training data
hist = model.fit(train_generator, epochs=100, validation_data=val_generator)  # Adjust epochs as needed


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install necessary packages (if not already installed)
!pip install tensorflow

from tensorflow.keras.applications import ResNet152
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# Define image data generators for training and validation
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

# Define image paths for training and validation data (update with your directory structure)
train_dir = "/content/drive/My Drive/Research/Paper2/Anaconda1/Train"
val_dir = "/content/drive/My Drive/Research/Paper2/Anaconda1/Test"

# Set image dimensions
img_width, img_height = 224, 224

# Create training and validation data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode='categorical'  # Adjust for your classification task
)
val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=8,
    class_mode='categorical'  # Adjust for your classification task
)

# Load the pre-trained ResNet152 model without the top layer
base_model = ResNet152(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

# Freeze the pre-trained layers for fine-tuning
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # Adjust number of units based on your dataset
predictions = Dense(2, activation='softmax')(x)  # Adjust output layer for your number of classes

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the training data
hist = model.fit(train_generator, epochs=100, validation_data=val_generator)  # Adjust epochs as needed
