In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Processing all images

In [2]:
%%time
images_True = []
images_False = []
image_dir = "data/images/traning/20-day/"
dirist = [x for x in os.listdir(image_dir) if not x.startswith(".")]
for folder_name in dirist:
    folder_dir = os.path.join(image_dir, folder_name)
    for file_name in os.listdir(folder_dir):
        file_dir = os.path.join(folder_dir, file_name)
        label = 1 if file_dir.strip().endswith("True.png") else 0
        img = cv2.imread(file_dir, cv2.IMREAD_GRAYSCALE)
        if label == 1:
            images_True.append(img)
        else:
            images_False.append(img)
images_True = np.array(images_True)
images_True = images_True / 255.0

images_False = np.array(images_False)
images_False = images_False / 255.0

print(f"TRUE: {len(images_True)} images with shape {images_True[0].shape}")
print(f"FALSE: {len(images_False)} images with shape {images_False[0].shape}")

TRUE: 32091 images with shape (125, 60)
FALSE: 22101 images with shape (125, 60)
CPU times: user 1.57 s, sys: 1.8 s, total: 3.37 s
Wall time: 4.79 s


In [3]:
import sys

def calculate_memory_usage(image_list):
    total_bytes = sys.getsizeof(image_list)  # Get the size of the list itself
    for img in image_list:
        total_bytes += sys.getsizeof(img)  # Add the size of each image array
    
    total_gb = total_bytes / (1024 ** 3)  # Convert bytes to gigabytes
    print(f"The total memory usage of the image list is approximately {total_gb:.3f} GB.")

# Example usage
calculate_memory_usage(images_True)
calculate_memory_usage(images_False)

The total memory usage of the image list is approximately 1.797 GB.
The total memory usage of the image list is approximately 1.238 GB.


In [4]:
import numpy as np
from sklearn.model_selection import train_test_split

# Step 1: Assign labels
true_labels = np.ones(len(images_True))  # Label 1 for True images
false_labels = np.zeros(len(images_False))  # Label 0 for False images

# Step 2: Combine images and labels
images = np.concatenate([images_True, images_False], axis=0)
labels = np.concatenate([true_labels, false_labels], axis=0)

# Step 3: Shuffle and split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.3, random_state=42, stratify=labels
)

# Output the results
print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Image shape: {X_train[0].shape}")

Training set: 37934 samples
Test set: 16258 samples
Image shape: (125, 60)


In [27]:
y_train.mean()

0.592160067485633

In [29]:
y_test.mean()

0.5922007627014393

# Build and train model

In [8]:
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl


def build_model(
    input_shape=(125, 60, 1),
    output_shape=1,
    learning_rate=0.1,
    seed=42
):
    tf.random.set_seed(seed)

    # Build the neural network layer by layer
    inputs = tfkl.Input(shape=input_shape, name='Input')

    x = tfkl.Conv2D(filters=64, kernel_size=3, padding='same', name='conv1')(inputs)
    x = tfkl.Activation('relu', name='act1')(x)
    x = tfkl.MaxPooling2D(pool_size=2, name='mp1')(x)

    x = tfkl.Conv2D(filters=128, kernel_size=3, padding='same', name='conv2')(x)
    x = tfkl.Activation('relu', name='act2')(x)
    x = tfkl.MaxPooling2D(pool_size=2, name='mp2')(x)
    
    x = tfkl.Conv2D(filters=256, kernel_size=3, padding='same', name='conv3')(x)
    x = tfkl.Activation('relu', name='act3')(x)
    x = tfkl.MaxPooling2D(pool_size=2, name='mp3')(x)


    x = tfkl.Flatten(name='flatten')(x)

    x = tfkl.Dense(units=output_shape, name='dense')(x)
    outputs = tfkl.Activation('softmax', name='softmax')(x)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=inputs, outputs=outputs, name='CNN')

    # Compile the model
    loss = tfk.losses.BinaryCrossentropy()
    optimizer = tfk.optimizers.legacy.Adam(learning_rate)
    metrics = ['accuracy']
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    # Return the model
    return model

In [9]:
# Build the model
model = build_model()

# Summary of the model
model.summary()

Model: "CNN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 125, 60, 1)]      0         
                                                                 
 conv1 (Conv2D)              (None, 125, 60, 64)       640       
                                                                 
 act1 (Activation)           (None, 125, 60, 64)       0         
                                                                 
 mp1 (MaxPooling2D)          (None, 62, 30, 64)        0         
                                                                 
 conv2 (Conv2D)              (None, 62, 30, 128)       73856     
                                                                 
 act2 (Activation)           (None, 62, 30, 128)       0         
                                                                 
 mp2 (MaxPooling2D)          (None, 31, 15, 128)       0       

In [13]:
# Create an EarlyStopping callback
early_stopping = tfk.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=3,
    restore_best_weights=True
)

# Store the callback in a list
callbacks = [early_stopping]

# Train the model with early stopping callback
history = model.fit(
    x=X_train,
    y=y_train,
    batch_size=16,
    epochs=50,
    validation_split=0.2,
    callbacks=callbacks
).history

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50


In [14]:
# Calculate and print the final validation accuracy
final_val_accuracy = round(max(history['val_accuracy'])* 100, 2)
print(f'Final validation accuracy: {final_val_accuracy}%')

# Save the trained model to a file with the accuracy included in the filename
model_filename = 'models/20-day_'+str(final_val_accuracy)+'.keras'
model.save(model_filename)

# Delete the model to free up resources
del model

Final validation accuracy: 60.0%


# Checking Test accuracy

In [21]:
# Load the saved model
model = tfk.models.load_model('models/20-day_60.0.keras')

In [22]:
# Predict labels for the entire test set
test_predictions = model.predict(X_test, verbose=0)

# Display the shape of the predictions
print("Predictions Shape:", predictions.shape)

Predictions Shape: (16258, 1)


In [23]:
print("y_test Shape:", y_test.shape)

y_test Shape: (16258,)


In [25]:
test_predictions.mean()

1.0

# Out of Sample

In [15]:
%%time
images_True = []
images_False = []
image_dir = "data/images/out_of_sample/20-day/"
dirist = [x for x in os.listdir(image_dir) if not x.startswith(".")]
for folder_name in dirist:
    folder_dir = os.path.join(image_dir, folder_name)
    for file_name in os.listdir(folder_dir):
        file_dir = os.path.join(folder_dir, file_name)
        label = 1 if file_dir.strip().endswith("True.png") else 0
        img = cv2.imread(file_dir, cv2.IMREAD_GRAYSCALE)
        if label == 1:
            images_True.append(img)
        else:
            images_False.append(img)
images_True = np.array(images_True)
images_True = images_True / 255.0

images_False = np.array(images_False)
images_False = images_False / 255.0

print(f"TRUE: {len(images_True)} images with shape {images_True[0].shape}")
print(f"FALSE: {len(images_False)} images with shape {images_False[0].shape}")

TRUE: 11529 images with shape (125, 60)
FALSE: 9987 images with shape (125, 60)
CPU times: user 645 ms, sys: 829 ms, total: 1.47 s
Wall time: 2.04 s


In [16]:
# Step 1: Assign labels
true_labels = np.ones(len(images_True))  # Label 1 for True images
false_labels = np.zeros(len(images_False))  # Label 0 for False images

# Step 2: Combine images and labels
test_images = np.concatenate([images_True, images_False], axis=0)
test_labels = np.concatenate([true_labels, false_labels], axis=0)

In [19]:
print("Test_labels Shape:", test_labels.shape)

Test_labels Shape: (21516,)
