In [39]:
import os
import pandas as pd
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models
import numpy as np

# Define paths
folder_path = 'Example Data-20240208T214429Z-001/Example Data/exported'
labels_path = 'Example Data-20240208T214429Z-001/Example Data/ImageLabels.xlsx'
image_name_column = 'Image Name'  

# Load the labels
labels_df = pd.read_excel(labels_path)

# Prepare the data arrays
images = []
valid_labels = []

# Load and preprocess the images
for index, row in labels_df.iterrows():
    image_name = row[image_name_column]
    image_path = os.path.join(folder_path, image_name)
    if os.path.exists(image_path):
        # Open the image
        with Image.open(image_path) as img:
            # Preprocess the image as required, e.g., resize
            img = img.resize((128, 128))  # Example resizing to 128x128
            img = img.convert('RGB')  # Convert to RGB if needed

            # Convert the image to a numpy array and normalize pixel values
            img_array = np.array(img) / 255.0

            # Append the image and label to the lists
            images.append(img_array)
            valid_labels.append(row['Label'])  # Replace 'Label' with your actual label column name
    else:
        print(f"Image {image_name} not found.")

# Convert images to a numpy array
images_array = np.array(images)

# Encode the valid labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(valid_labels)
one_hot_labels = to_categorical(encoded_labels, num_classes=len(label_encoder.classes_))

# Now we can be sure the images and labels match
assert len(images_array) == len(one_hot_labels), "The number of images does not match the number of labels."

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images_array, one_hot_labels, test_size=0.50, random_state=42)

# Define a simple CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(y_train.shape[1], activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")

# Save the model
model.save('my_model.h5')

Epoch 1/10


  super().__init__(


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 170ms/step - accuracy: 0.0188 - loss: 5.2385 - val_accuracy: 0.0402 - val_loss: 5.1084
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 143ms/step - accuracy: 0.0372 - loss: 5.0334 - val_accuracy: 0.0402 - val_loss: 5.1391
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 144ms/step - accuracy: 0.0696 - loss: 4.8993 - val_accuracy: 0.0352 - val_loss: 5.2069
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 156ms/step - accuracy: 0.0732 - loss: 4.8312 - val_accuracy: 0.0251 - val_loss: 5.4152
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 142ms/step - accuracy: 0.0919 - loss: 4.6113 - val_accuracy: 0.0201 - val_loss: 5.1267
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 141ms/step - accuracy: 0.0760 - loss: 4.5876 - val_accuracy: 0.0151 - val_loss: 5.5724
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0



Test accuracy: 0.0



The .h5 file format is commonly used to store trained models in Keras. It contains the architecture of the model, the weights of the model, the training configuration (loss, optimizer), and the state of the optimizer, allowing to resume training exactly where you left off.

In [31]:
from tensorflow.keras.utils import plot_model

# Plot the model architecture
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

You must install pydot (`pip install pydot`) for `plot_model` to work.


In [38]:
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

additional_epochs = 10


# Load the model
model = load_model('my_model.h5')

# Define early stopping and model checkpointing
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
model_checkpoint = ModelCheckpoint('my_model_best.keras', save_best_only=True)

# Continue training the model
history = model.fit(
    X_train,
    y_train,
    epochs=additional_epochs,  # Define how many additional epochs to train
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, model_checkpoint]
)

# Evaluate the model again
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")

# To calculate the F1 score,  to make predictions and compare  to the true labels
from sklearn.metrics import classification_report

# Make predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate F1 score
report = classification_report(y_true, y_pred_classes, output_dict=True)
f1_score = report['weighted avg']['f1-score']
print(f"F1 Score: {f1_score}")



Epoch 1/10


ValueError: Unknown variable: <KerasVariable shape=(3, 3, 3, 32), dtype=float32, path=sequential_3/conv2d_9/kernel>. This optimizer can only be called for the variables it was originally built with. When working with a new set of variables, you should recreate a new optimizer instance.