In [1]:
import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Processing all images

In [2]:
%%time
images_True = []
images_False = []
image_dir = "data/images_small/traning/5-day/"
dirist = [x for x in os.listdir(image_dir) if not x.startswith(".")]
for folder_name in dirist:
    folder_dir = os.path.join(image_dir, folder_name)
    for file_name in os.listdir(folder_dir):
        file_dir = os.path.join(folder_dir, file_name)
        label = 1 if file_dir.strip().endswith("True.png") else 0
        img = cv2.imread(file_dir, cv2.IMREAD_GRAYSCALE)
        if label == 1:
            images_True.append(img)
        else:
            images_False.append(img)
images_True = np.array(images_True)
#images_True = images_True / 255.0 # --> Apparently Kelly does not scale betwen 0 and 1

images_False = np.array(images_False)
#images_False = images_False / 255.0

print(f"TRUE: {len(images_True)} images with shape {images_True[0].shape}")
print(f"FALSE: {len(images_False)} images with shape {images_False[0].shape}")

TRUE: 107440 images with shape (32, 15)
FALSE: 85738 images with shape (32, 15)
CPU times: user 2.23 s, sys: 6.4 s, total: 8.63 s
Wall time: 15.3 s


In [3]:
def calculate_memory_usage(image_list):
    total_bytes = sys.getsizeof(image_list)  # Get the size of the list itself
    for img in image_list:
        total_bytes += sys.getsizeof(img)  # Add the size of each image array
    
    total_gb = total_bytes / (1024 ** 3)  # Convert bytes to gigabytes
    print(f"The total memory usage of the image list is approximately {total_gb:.3f} GB.")

calculate_memory_usage(images_True)
calculate_memory_usage(images_False)

The total memory usage of the image list is approximately 0.061 GB.
The total memory usage of the image list is approximately 0.049 GB.


In [4]:
from sklearn.model_selection import train_test_split

# Step 1: Assign labels
true_labels = np.ones(len(images_True))  # Label 1 for True images
false_labels = np.zeros(len(images_False))  # Label 0 for False images

# Step 1.5: Balance the dataset
# As we have 21706 more true images than false we will remove these 
true_labels = true_labels[21706:]
images_True = images_True[21706:]

# Step 2: Combine images and labels
images = np.concatenate([images_True, images_False], axis=0)
labels = np.concatenate([true_labels, false_labels], axis=0)

# Step 3: Shuffle and split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.3, random_state=42, stratify=labels
)

# Output the results
print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Image shape: {X_train[0].shape}")

Training set: 120030 samples
Test set: 51442 samples
Image shape: (32, 15)


### Calculate some metris to make sure that the traning data is balances with respect to the labels

In [5]:
len(true_labels)

85734

In [6]:
len(false_labels)

85738

In [7]:
y_train.mean()

0.4999916687494793

# Building the model

In [8]:
from tensorflow.keras import models, layers
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.initializers import GlorotUniform  # Xavier initializer

def build_cnn_model(input_shape=(32, 15)):
    model = models.Sequential([
        # First Convolutional Block
        layers.Conv2D(64, (5, 3), padding='same', kernel_initializer=GlorotUniform(), input_shape=(*input_shape, 1)),
        layers.BatchNormalization(),  # Batch normalization
        layers.LeakyReLU(alpha=0.1),  # Leaky ReLU with alpha=0.1
        layers.MaxPooling2D((2, 1)),  # Pooling with size (2, 1)
        
        # Second Convolutional Block
        layers.Conv2D(128, (5, 3), padding='same', kernel_initializer=GlorotUniform()),
        layers.BatchNormalization(),  # Batch normalization
        layers.LeakyReLU(alpha=0.1),  # Leaky ReLU with alpha=0.1
        layers.MaxPooling2D((2, 1)),  # Pooling with size (2, 1)
        
        # Flatten and Fully Connected Layers
        layers.Flatten(),
        layers.Dense(1, kernel_initializer=GlorotUniform()),  # Fully connected layer
        layers.Dropout(0.5),  # 50% dropout
        layers.Activation('sigmoid')  # Binary classification (True/False)
    ])
    
    # Use Adam optimizer with a learning rate of 0.1
    optimizer = Adam(learning_rate=0.1)
    
    # Compile the model
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [9]:
# Build the model
model = build_cnn_model()

# Summary of the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 15, 64)        1024      
                                                                 
 batch_normalization (Batch  (None, 32, 15, 64)        256       
 Normalization)                                                  
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 32, 15, 64)        0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 16, 15, 64)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 15, 128)       123008    
                                                                 
 batch_normalization_1 (Bat  (None, 16, 15, 128)       5

# Traning the model

In [None]:
# Create an EarlyStopping callback
early_stopping = tfk.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=2,
    restore_best_weights=True
)

# Store the callback in a list
callbacks = [early_stopping]

# Train the model with early stopping callback
history = model.fit(
    x=X_train,
    y=y_train,
    batch_size=128,
    epochs=50,
    validation_split=0.3,
    callbacks=callbacks
).history

In [None]:
model.save("my_model.h5")  # Save the model to a file

In [None]:
from tensorflow.keras.models import load_model

loaded_model = load_model("my_model.h5")  # Load the model from the file