# Import Libraries

In [1]:
import os

import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import Sequential
from keras.layers import Dense, Conv3D, Dropout, Flatten, MaxPooling3D
from keras import models, layers, optimizers
from sklearn.model_selection import train_test_split
from sklearn import metrics

from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

from sklearn.preprocessing import LabelEncoder

import zipfile
import shutil
import tifffile

## Unzip File
### Make sure to use the respective path

In [8]:
def unzip_file(zip_file, extract_to):
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

zip_file = '/content/oneill_20240508_133843.zip'
extract_to = '/content'

unzip_file(zip_file, extract_to)


## Load dataset paths and labels
### Make sure to use the respective path

In [9]:
data_dir = '/content/oneill_20240508_133843'
categories = os.listdir(data_dir)
file_paths = []
labels = []

# Store file paths and their respective labels
for category in categories:
    category_path = os.path.join(data_dir, category)
    files = os.listdir(category_path)

    for file in files:
        file_paths.append(os.path.join(category_path, file))
        labels.append(category)

# Train-Test Split

In [10]:
# Train-Test split
train_files, test_files, train_labels, test_labels = train_test_split(
    file_paths, labels, test_size=0.2, stratify=labels, random_state=42
)

# Split the training data into training and validation sets
train_files, val_files, train_labels, val_labels = train_test_split(
    train_files, train_labels, test_size=0.2, random_state=42
)

# Make train & test folders to store the dataset
train_dir = "/content/train"
test_dir = "/content/test"
val_dir = "/content/val"

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

for category in categories:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)

# Store files in respective train/test directories
def move_files(files, labels, destination):
    for file, label in zip(files, labels):
        dest_folder = os.path.join(destination, label)
        try:
          shutil.move(file, dest_folder)
        except Exception as e:
            print(f"Failed to move {file} to {dest_folder}. Error: {e}")



move_files(train_files, train_labels, train_dir)
move_files(test_files, test_labels, test_dir)
move_files(val_files, val_labels, val_dir)

In [11]:
# Load the train & test folder data
def load_tiff_data(data_dir):
    images = []
    labels = []
    class_names = os.listdir(data_dir)
    for class_name in class_names:
        class_dir = os.path.join(data_dir, class_name)
        for file_name in os.listdir(class_dir):
            file_path = os.path.join(class_dir, file_name)
            with tifffile.TiffFile(file_path) as tif:
                image = tif.asarray()  # Load the image data
                images.append(image)
                labels.append(class_name)

    # Convert lists to numpy arrays
    images = np.array(images)
    labels = np.array(labels)

    # Convert class names to integer labels
    label_map = {name: i for i, name in enumerate(class_names)}
    labels = np.array([label_map[label] for label in labels])

    return images, labels

train_images, train_labels = load_tiff_data('/content/train')
test_images, test_labels = load_tiff_data('/content/test')
val_images, val_labels = load_tiff_data('/content/val')

# One-Hot Encode (Only if classifying using categorical_crossentropy)
# DO NOT RUN if the model uses sparse_categorical_crossentropy

In [None]:
# One-Hot encode labels for classification
num_classes = len(np.unique(train_labels))
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes)
val_labels = tf.keras.utils.to_categorical(val_labels, num_classes)


# Balance Dataset

In [13]:
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weight_dict = dict(enumerate(class_weights))
print(f"Class weights = {class_weight_dict}")

Class weights = {0: 3.2222222222222223, 1: 0.5918367346938775}


# Train the Model

In [14]:
# Define model input data shape
# The new "1" dimension represents the channels
input_shape = train_images[0].shape
input_shape = (*input_shape, 1)
num_labels = len(np.unique(labels))
print(f"Input Shape: {input_shape}  |  Num Labels: {num_labels}")

Input Shape: (64, 10, 11, 1)  |  Num Labels: 2


In [None]:
# Create CNN model
model = Sequential()
model.add(Conv3D(32, kernel_size=(3, 3, 3), padding='same', input_shape=input_shape))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Conv3D(64, kernel_size=(3, 3, 3), padding='same'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Conv3D(128, kernel_size=(5, 4, 3), padding='same'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Flatten())
model.add(Dense(1024, activation=tf.nn.relu))
model.add(Dropout(0.5))
model.add(Dense(1024, activation=tf.nn.relu))
model.add(Dropout(0.5))
model.add(Dense(1024, activation=tf.nn.relu))
model.add(Dropout(0.5))
model.add(Dense(num_labels, activation=tf.nn.softmax))

# Compile model
model.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])

history = model.fit(train_images, train_labels,
                    epochs=100,
                    batch_size=32,
                    validation_data=(val_images, val_labels),
                    class_weight=class_weight_dict
                    )

In [43]:
# Save the model in .hdf5 format
model.save('/content/test_model.hdf5')



In [44]:
from tensorflow.keras.models import load_model

# Load the model
loaded_model = model
loaded_model = loaded_model.load_weights('/content/test_model.hdf5')

# Test the Model

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


In [None]:
# Generate predictions for the test set
predictions = model.predict(test_images)

# Convert predictions to class labels
predicted_labels = np.argmax(predictions, axis=1)

# If test_labels are one-hot encoded, convert them back to class labels
#true_labels = np.argmax(test_labels, axis=1)

# Print some of the predictions
for i in range(19):
    print(f"True label: {test_labels[i]}, Predicted label: {predicted_labels[i]}")


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

class_names = np.unique(train_labels)
# Create confusion matrix
cm = confusion_matrix(test_labels, predicted_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Extract loss values from history
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict.get('val_loss', [])  # Handle cases where validation loss might not be present

# Create a plot for loss
plt.figure(figsize=(12, 6))
plt.plot(loss_values, 'o-', label='Training Loss')
plt.plot(val_loss_values, 'o-', label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Extract accuracy values from history
accuracy_values = history_dict['accuracy']
val_accuracy_values = history_dict.get('val_accuracy', [])  # Handle cases where validation accuracy might not be present

# Create a plot for accuracy
plt.figure(figsize=(12, 6))
plt.plot(accuracy_values, 'o-', label='Training Accuracy')
plt.plot(val_accuracy_values, 'o-', label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid(True)
plt.show()


# Zip Dataset

In [21]:
import zipfile
import os

# Paths to the folders you want to zip
folder1 = "/content/train"
folder2 = "/content/test"
folder3 = "/content/val"

# Define the name of the output zip file
output_zip = "/content/dataset.zip"

# Create a zip file containing the three folders
with zipfile.ZipFile(output_zip, 'w') as zipf:
    for folder in [folder1, folder2, folder3]:
        # Walk through each folder and add files to the zip
        for root, dirs, files in os.walk(folder):
            for file in files:
                # Create the full path to the file
                full_path = os.path.join(root, file)
                # Add file to the zip file with an appropriate archive name
                archive_name = os.path.relpath(full_path, os.path.dirname(folder1))
                zipf.write(full_path, arcname=archive_name)

print(f"Zipped folders into {output_zip}")


Zipped folders into /content/dataset.zip
