# Classification using Deep Learning with Histogram data

---

### Reading the data

First, we'll load the saved image and label data from the NumPy files.

In [None]:
import numpy as np

# Base path for .npy files
base_path = '/content/drive/MyDrive/cubsat'

# Loading files with expected names
train_images = np.load(f'{base_path}/train_images.npy')
train_labels = np.load(f'{base_path}/train_labels.npy')
val_images   = np.load(f'{base_path}/val_images.npy')
val_labels   = np.load(f'{base_path}/val_labels.npy')

# Display shapes for verification
print("train_images shape:", train_images.shape)
print("train_labels shape:", train_labels.shape)
print("val_images shape:", val_images.shape)
print("val_labels shape:", val_labels.shape)


In [None]:
import numpy as np

# Display image information
print("=== Image Characteristics ===")
print(f"Shape of training set: {train_images.shape} (Samples, Height, Width, Channels)")
print(f"Shape of validation set: {val_images.shape} (Samples, Height, Width, Channels)")

# Image resolution (height and width)
image_resolution = train_images.shape[1:3]  # Taking height and width dimensions
print(f"Image resolution: {image_resolution} pixels")

# Total number of images
print(f"Number of images in training set: {train_images.shape[0]}")
print(f"Number of images in validation set: {val_images.shape[0]}")

# Data type of the images
print(f"Data type of images: {train_images.dtype}")

# Range of values in the images
print(f"Minimum and maximum values in training dataset: {train_images.min()} to {train_images.max()}")

# Display some unique labels and their counts in the training set
unique_labels_train, counts_train = np.unique(train_labels, return_counts=True)
print("\n=== Class Distribution in Training Set ===")
for label, count in zip(unique_labels_train, counts_train):
    print(f"Class {label}: {count} samples")

# Display some unique labels and their counts in the validation set
unique_labels_val, counts_val = np.unique(val_labels, return_counts=True)
print("\n=== Class Distribution in Validation Set ===")
for label, count in zip(unique_labels_val, counts_val):
    print(f"Class {label}: {count} samples")

## Combining training and validation sets

In [None]:
import numpy as np
import os

# New directory where combined files will be saved
cached_data_dir = '/content/drive/MyDrive/cubsat'
os.makedirs(cached_data_dir, exist_ok=True)

# Paths for combined .npy files
all_images_path = os.path.join(cached_data_dir, 'all_images.npy')
all_labels_path = os.path.join(cached_data_dir, 'all_labels.npy')

# Check if files already exist
if os.path.exists(all_images_path) and os.path.exists(all_labels_path):
    print("Combined files already exist. Loading from disk...")
    all_images = np.load(all_images_path)
    all_labels = np.load(all_labels_path)
else:
    print("Combined files do not exist. Concatenating data...")
    # Concatenate training and validation sets
    all_images = np.concatenate((train_images, val_images), axis=0)
    all_labels = np.concatenate((train_labels, val_labels), axis=0)

    # Save combined files
    np.save(all_images_path, all_images)
    np.save(all_labels_path, all_labels)
    print("Data concatenated and saved.")

# Display image information
print("=== Image Characteristics ===")
print(f"Shape of combined dataset: {all_images.shape} (Samples, Height, Width, Channels)")
print(f"Image resolution: {all_images.shape[1:3]} pixels")
print(f"Total number of images: {all_images.shape[0]}")
print(f"Data type of images: {all_images.dtype}")
print(f"Minimum and maximum values in combined dataset: {all_images.min()} to {all_images.max()}")

# Display class distribution
unique_labels, counts = np.unique(all_labels, return_counts=True)
print("\n=== Class Distribution in Combined Dataset ===")
for label, count in zip(unique_labels, counts):
    print(f"Class {label}: {count} samples")

print(f"\nData saved in: {cached_data_dir}")


In [None]:
import numpy as np

# File path
all_images_path = '/content/drive/MyDrive/cubsat/all_images.npy'

# Load .npy file
all_images = np.load(all_images_path)

# Check data shape and type
print("Shape:", all_images.shape)
print("Dtype:", all_images.dtype)


PLOTTING RANDOM SAMPLES FROM EACH CLASS

In [None]:
import numpy as np
import plotly.express as px
import random

# File paths
all_images_path = '/content/drive/MyDrive/cubsat/all_images.npy'
all_labels_path = '/content/drive/MyDrive/cubsat/all_labels.npy'

# Load data
all_images = np.load(all_images_path)
all_labels = np.load(all_labels_path)

# Identify unique classes
unique_classes = np.unique(all_labels)

# Plot a random image from each class
for cls in unique_classes:
    # Indices of images in current class
    indices = np.where(all_labels == cls)[0]
    # Choose random index
    random_index = random.choice(indices)
    # Select image
    image = all_images[random_index]

    # Plot with Plotly
    fig = px.imshow(image.astype(np.uint8))
    fig.update_layout(title=f"Class {cls}")
    fig.show()


## APPLYING DATA AUGMENTATION FOR CLASS BALANCING

 Show a random image with its class (after BALANCING):

In [None]:
import numpy as np
import os
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils import shuffle

# Original files directory
base_dir = '/content/drive/MyDrive/cubsat'
all_images_path = os.path.join(base_dir, 'all_images.npy')
all_labels_path = os.path.join(base_dir, 'all_labels.npy')

# Directory for balanced data
ros_dir = os.path.join(base_dir, "RandomOversampling")
os.makedirs(ros_dir, exist_ok=True)

# Paths for balanced files
ros_images_path = os.path.join(ros_dir, "all_images_ros.npy")
ros_labels_path = os.path.join(ros_dir, "all_labels_ros.npy")

# Load original data
all_images = np.load(all_images_path)
all_labels = np.load(all_labels_path)

# Check if files already exist
if os.path.exists(ros_images_path) and os.path.exists(ros_labels_path):
    print("Random Oversampled files already exist. Loading from disk...")
    X_resampled = np.load(ros_images_path)
    y_resampled = np.load(ros_labels_path)
else:
    print("Applying Random Oversampling...")

    # Temporary flattening for oversampling
    num_samples, height, width, channels = all_images.shape
    all_images_flat = all_images.reshape(num_samples, -1)

    # Apply RandomOverSampler
    ros = RandomOverSampler(sampling_strategy='auto', random_state=42)
    X_resampled_flat, y_resampled = ros.fit_resample(all_images_flat, all_labels)

    # Restore original shape
    X_resampled = X_resampled_flat.reshape(-1, height, width, channels)

    # Shuffle
    X_resampled, y_resampled = shuffle(X_resampled, y_resampled, random_state=42)

    # Save
    np.save(ros_images_path, X_resampled)
    np.save(ros_labels_path, y_resampled)
    print("Random Oversampling applied and saved.")

# Display class distribution
unique_labels, counts = np.unique(y_resampled, return_counts=True)
print("\n=== Class Distribution after Random Oversampling ===")
for label, count in zip(unique_labels, counts):
    print(f"Class {label}: {count} samples")

print(f"\nBalanced data saved in: {ros_dir}")


In [None]:
import numpy as np
import os
import plotly.express as px
import random

# Paths
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'
ros_images_path = os.path.join(ros_dir, "all_images_ros.npy")
ros_labels_path = os.path.join(ros_dir, "all_labels_ros.npy")

# Load data
images = np.load(ros_images_path)
labels = np.load(ros_labels_path)

# Identify unique classes
unique_classes = np.unique(labels)

# Create subplots with plotly
from plotly.subplots import make_subplots
import plotly.graph_objects as go

n_classes = len(unique_classes)
fig = make_subplots(rows=1, cols=n_classes, subplot_titles=[f"Class {cls}" for cls in unique_classes])

# Add a random image from each class
for idx, cls in enumerate(unique_classes):
    indices = np.where(labels == cls)[0]
    selected_idx = random.choice(indices)
    img = images[selected_idx]

    fig.add_trace(
        go.Image(z=img.astype(np.uint8)),
        row=1, col=idx+1
    )

# Layout
fig.update_layout(height=300, width=200 * n_classes, title_text="Random Example from Each Class (Balanced Dataset)")
fig.show()


## Splitting combined set in proportion 80% for training and 20% for validation

In [None]:
import numpy as np
import os
import random

# Paths
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'
ros_images_path = os.path.join(ros_dir, "all_images_ros.npy")
ros_labels_path = os.path.join(ros_dir, "all_labels_ros.npy")

# Load data
images = np.load(ros_images_path)
labels = np.load(ros_labels_path)

# Identify unique classes
unique_classes = np.unique(labels)

# Lists to store split data
train_images, train_labels = [], []
val_images, val_labels = [], []

# Ensure reproducibility
random.seed(42)

# Split data while maintaining balance
for cls in unique_classes:
    indices = np.where(labels == cls)[0]
    random.shuffle(indices.tolist())

    split_idx = int(0.8 * len(indices))
    train_idx = indices[:split_idx]
    val_idx = indices[split_idx:]

    train_images.append(images[train_idx])
    train_labels.append(labels[train_idx])
    val_images.append(images[val_idx])
    val_labels.append(labels[val_idx])

# Concatenate results
train_images = np.concatenate(train_images, axis=0)
train_labels = np.concatenate(train_labels, axis=0)
val_images = np.concatenate(val_images, axis=0)
val_labels = np.concatenate(val_labels, axis=0)

# Save files
np.save(os.path.join(ros_dir, "train_images_ros.npy"), train_images)
np.save(os.path.join(ros_dir, "train_labels_ros.npy"), train_labels)
np.save(os.path.join(ros_dir, "val_images_ros.npy"), val_images)
np.save(os.path.join(ros_dir, "val_labels_ros.npy"), val_labels)

print("Sets saved successfully!")


GENERATING RANDOM IMAGES OF EACH CLASS PER SET

In [None]:
import numpy as np
import os
import random
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Path
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'
train_images = np.load(os.path.join(ros_dir, "train_images_ros.npy"))
train_labels = np.load(os.path.join(ros_dir, "train_labels_ros.npy"))

# Unique classes
unique_classes = np.unique(train_labels)
n_classes = len(unique_classes)

# Create subplots
fig = make_subplots(rows=1, cols=n_classes, subplot_titles=[f"Class {cls}" for cls in unique_classes])

# Add random image from each class
for idx, cls in enumerate(unique_classes):
    indices = np.where(train_labels == cls)[0]
    selected_idx = random.choice(indices)
    img = train_images[selected_idx]

    fig.add_trace(
        go.Image(z=img.astype(np.uint8)),
        row=1, col=idx+1
    )

fig.update_layout(height=300, width=200 * n_classes, title_text="Training: Random image per class")
fig.show()


In [None]:
import numpy as np
import os
import random
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Path
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'
val_images = np.load(os.path.join(ros_dir, "val_images_ros.npy"))
val_labels = np.load(os.path.join(ros_dir, "val_labels_ros.npy"))

# Unique classes
unique_classes = np.unique(val_labels)
n_classes = len(unique_classes)

# Create subplots
fig = make_subplots(rows=1, cols=n_classes, subplot_titles=[f"Class {cls}" for cls in unique_classes])

# Add random image from each class
for idx, cls in enumerate(unique_classes):
    indices = np.where(val_labels == cls)[0]
    selected_idx = random.choice(indices)
    img = val_images[selected_idx]

    fig.add_trace(
        go.Image(z=img.astype(np.uint8)),
        row=1, col=idx+1
    )

fig.update_layout(height=300, width=200 * n_classes, title_text="Validation: Random image per class")
fig.show()


In [None]:
import numpy as np
import os
import collections

# Path
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'

# Load labels
train_labels = np.load(os.path.join(ros_dir, "train_labels_ros.npy"))
val_labels = np.load(os.path.join(ros_dir, "val_labels_ros.npy"))

# Count classes
train_counts = collections.Counter(train_labels)
val_counts = collections.Counter(val_labels)

# Display results
print("Class distribution in TRAINING set:")
for cls, count in sorted(train_counts.items()):
    print(f"Class {cls}: {count} images")

print("\nClass distribution in VALIDATION set:")
for cls, count in sorted(val_counts.items()):
    print(f"Class {cls}: {count} images")


In [None]:
import numpy as np
import os

# Updated paths
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'
bin_edges_path = os.path.join(ros_dir, 'bin_edges_ros.npy')

# Load training and validation data
train_images = np.load(os.path.join(ros_dir, 'train_images_ros.npy'))
train_labels = np.load(os.path.join(ros_dir, 'train_labels_ros.npy'))
val_images = np.load(os.path.join(ros_dir, 'val_images_ros.npy'))
val_labels = np.load(os.path.join(ros_dir, 'val_labels_ros.npy'))

# Check if bin_edges file exists
if os.path.exists(bin_edges_path):
    print("bin_edges file already exists. Loading from disk...")
    bin_edges = np.load(bin_edges_path, allow_pickle=True)
else:
    print("bin_edges file not found. Calculating quantiles...")

    # Define quantile levels (deciles -> 10 intervals)
    quantile_levels = np.linspace(0, 1, num=11)
    bin_edges = []

    # Calculate quantiles for each channel (R, G, B)
    for channel in range(3):
        channel_pixels = train_images[:, :, :, channel].flatten()
        edges = np.quantile(channel_pixels, quantile_levels)
        bin_edges.append(edges)

    # Save bin_edges
    np.save(bin_edges_path, bin_edges)
    print("Quantiles calculated and bin_edges saved.")

# Display bin_edges
print("\n=== Bin Edges per Channel ===")
for channel, edges in enumerate(bin_edges):
    print(f"Channel {channel} (R={channel==0}, G={channel==1}, B={channel==2}): {edges}")


In [None]:
import numpy as np
import os
import gc

# Base path
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'

# File paths
train_images_path = os.path.join(ros_dir, 'train_images_ros.npy')
train_labels_path = os.path.join(ros_dir, 'train_labels_ros.npy')
val_images_path = os.path.join(ros_dir, 'val_images_ros.npy')
val_labels_path = os.path.join(ros_dir, 'val_labels_ros.npy')
bin_edges_path = os.path.join(ros_dir, 'bin_edges_ros.npy')

# Load data
print("Loading training and validation data...")
train_images = np.load(train_images_path)
train_labels = np.load(train_labels_path)
val_images = np.load(val_images_path)
val_labels = np.load(val_labels_path)

# Check bin_edges existence
if os.path.exists(bin_edges_path):
    print("bin_edges_ros.npy already exists. Loading...")
    bin_edges = np.load(bin_edges_path, allow_pickle=True)
else:
    print("Calculating bin_edges (quantiles)...")
    quantile_levels = np.linspace(0, 1, num=11)  # deciles
    bin_edges = []
    for channel in range(3):  # R, G, B
        pixels = train_images[:, :, :, channel].flatten()
        edges = np.quantile(pixels, quantile_levels)
        bin_edges.append(edges)
    np.save(bin_edges_path, bin_edges)
    print("bin_edges saved to disk.")

# Display bin_edges (optional)
print("\n=== Bin Edges per Channel ===")
for ch, edges in enumerate(bin_edges):
    print(f"Channel {ch}: {edges}")

# Function to convert image to normalized histogram
def image_to_histogram(image, bin_edges):
    features = []
    for channel in range(3):  # R, G, B
        pixels = image[:, :, channel].flatten()
        hist, _ = np.histogram(pixels, bins=bin_edges[channel])
        hist = hist / len(pixels)  # Normalize to proportion
        features.extend(hist)
    return np.array(features)

# Convert training images to histograms
print("\nConverting training images...")
train_histograms = np.array([image_to_histogram(img, bin_edges) for img in train_images])
print("Training histogram created.")

# Free memory
del train_images
gc.collect()
print("train_images removed from memory.")

# Convert validation images to histograms
print("\nConverting validation images...")
val_histograms = np.array([image_to_histogram(img, bin_edges) for img in val_images])
print("Validation histogram created.")

# Free memory
del val_images
gc.collect()
print("val_images removed from memory.")

# Save histograms
print("\nSaving histograms to disk...")
np.save(os.path.join(ros_dir, "train_histograms_ros.npy"), train_histograms)
np.save(os.path.join(ros_dir, "val_histograms_ros.npy"), val_histograms)
print("Histograms saved successfully.")


---

### Train CubeSatNet DNN model

We will define and train a Dense Neural Network (DNN) model.

FOR PLOTS

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

# Base path
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'

# Load histograms and labels
train_histograms = np.load(os.path.join(ros_dir, "train_histograms_ros.npy"))
val_histograms = np.load(os.path.join(ros_dir, "val_histograms_ros.npy"))
train_labels = np.load(os.path.join(ros_dir, "train_labels_ros.npy"))
val_labels = np.load(os.path.join(ros_dir, "val_labels_ros.npy"))

# Number of classes (assuming 5 as in original code)
n_classes = len(np.unique(train_labels))

# One-hot encode labels
train_labels_cat = to_categorical(train_labels, num_classes=n_classes)
val_labels_cat = to_categorical(val_labels, num_classes=n_classes)

# Define model
model = Sequential([
    Dense(128, activation='relu', input_shape=(train_histograms.shape[1],)),
    Dense(64, activation='relu'),
    Dense(n_classes, activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Training with early stopping
history = model.fit(
    train_histograms, train_labels_cat,
    validation_data=(val_histograms, val_labels_cat),
    epochs=30,
    batch_size=32,
    callbacks=[EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)]
)

# Evaluate model
loss, acc = model.evaluate(val_histograms, val_labels_cat, verbose=0)
print(f"\nFinal validation accuracy: {acc:.4f}")


In [None]:
import plotly.graph_objects as go

# === Accuracy Plot ===
fig_acc = go.Figure()
fig_acc.add_trace(go.Scatter(y=history.history['accuracy'], mode='lines+markers', name='Train'))
fig_acc.add_trace(go.Scatter(y=history.history['val_accuracy'], mode='lines+markers', name='Validation'))
fig_acc.update_layout(
    title="Accuracy Evolution",
    xaxis_title="Epoch",
    yaxis_title="Accuracy",
    yaxis=dict(range=[0, 1]),
    template="plotly_white"
)
fig_acc.show()

# === Loss Plot ===
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=history.history['loss'], mode='lines+markers', name='Train'))
fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines+markers', name='Validation'))
fig_loss.update_layout(
    title="Loss Evolution",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    yaxis=dict(range=[0, 1]),
    template="plotly_white"
)
fig_loss.show()


In [None]:
import plotly.graph_objects as go

# === Accuracy Plot ===
fig_acc = go.Figure()
fig_acc.add_trace(go.Scatter(y=history.history['accuracy'], mode='lines+markers', name='Train'))
fig_acc.add_trace(go.Scatter(y=history.history['val_accuracy'], mode='lines+markers', name='Validation'))
fig_acc.update_layout(
    title="Accuracy Evolution",
    xaxis_title="Epoch",
    yaxis_title="Accuracy",
    template="plotly_white"
)
fig_acc.show()

# === Loss Plot ===
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=history.history['loss'], mode='lines+markers', name='Train'))
fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines+markers', name='Validation'))
fig_loss.update_layout(
    title="Loss Evolution",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    template="plotly_white"
)
fig_loss.show()


Optimizing the code

In [None]:
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from kerastuner.tuners import RandomSearch
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

# Base path
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'

# 1. Function to load data
def load_data(ros_dir):
    train_X = np.load(os.path.join(ros_dir, "train_histograms_ros.npy"))
    val_X = np.load(os.path.join(ros_dir, "val_histograms_ros.npy"))
    train_y = np.load(os.path.join(ros_dir, "train_labels_ros.npy"))
    val_y = np.load(os.path.join(ros_dir, "val_labels_ros.npy"))
    return train_X, val_X, train_y, val_y

# 2. Model creation function with hyperparameter search
def build_model_hp(hp):
    model = Sequential()

    model.add(Dense(
        units=hp.Int('units_hidden1', min_value=64, max_value=256, step=32),
        activation=hp.Choice('activation1', ['relu', 'tanh']),
        input_shape=(train_X.shape[1],)
    ))

    # Optional Dropout
    model.add(Dropout(hp.Float('dropout1', min_value=0.0, max_value=0.5, step=0.1)))

    model.add(Dense(
        units=hp.Int('units_hidden2', min_value=32, max_value=128, step=32),
        activation=hp.Choice('activation2', ['relu', 'tanh'])
    ))

    model.add(Dropout(hp.Float('dropout2', min_value=0.0, max_value=0.5, step=0.1)))

    model.add(Dense(n_classes, activation='softmax'))

    # Choose optimizer
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    optimizer_name = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=learning_rate)
    else:
        optimizer = RMSprop(learning_rate=learning_rate)

    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# 3. Plot with Plotly
def plot_history_plotly(history):
    fig = make_subplots(rows=1, cols=2, subplot_titles=("Accuracy", "Loss"))

    fig.add_trace(go.Scatter(
        y=history.history['accuracy'], mode='lines+markers', name='Train Accuracy'
    ), row=1, col=1)

    fig.add_trace(go.Scatter(
        y=history.history['val_accuracy'], mode='lines+markers', name='Validation Accuracy'
    ), row=1, col=1)

    fig.add_trace(go.Scatter(
        y=history.history['loss'], mode='lines+markers', name='Train Loss'
    ), row=1, col=2)

    fig.add_trace(go.Scatter(
        y=history.history['val_loss'], mode='lines+markers', name='Validation Loss'
    ), row=1, col=2)

    fig.update_layout(
        title="Training History",
        xaxis_title="Epoch",
        xaxis2_title="Epoch",
        yaxis_title="Accuracy",
        yaxis2_title="Loss",
        template="plotly_white",
        width=1000,
        height=400
    )
    fig.show()

# 4. Execute pipeline
train_X, val_X, train_y_raw, val_y_raw = load_data(ros_dir)
n_classes = len(np.unique(train_y_raw))
train_y = to_categorical(train_y_raw, num_classes=n_classes)
val_y = to_categorical(val_y_raw, num_classes=n_classes)

# 5. Initialize tuner
tuner = RandomSearch(
    build_model_hp,
    objective='val_accuracy',
    max_trials=20,
    executions_per_trial=1,
    directory='keras_tuner_dir',
    project_name='cubsat_tuning'
)

tuner.search_space_summary()

# 6. Train with early stopping
callbacks = [EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)]

tuner.search(train_X, train_y,
             validation_data=(val_X, val_y),
             epochs=30,
             batch_size=32,
             callbacks=callbacks,
             verbose=1)

# 7. Best model
best_model = tuner.get_best_models(num_models=1)[0]

# 8. Train final best model
history = best_model.fit(
    train_X, train_y,
    validation_data=(val_X, val_y),
    epochs=30,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

# 9. Evaluate and save
loss, acc = best_model.evaluate(val_X, val_y, verbose=0)
print(f"\nFinal validation accuracy: {acc:.4f}")

# Save best model
final_model_path = os.path.join(ros_dir, "optimized_best_model.keras")
best_model.save(final_model_path)
print(f"Model saved at: {final_model_path}")

# 10. Visualize history
plot_history_plotly(history)


In [None]:
import numpy as np
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical

# Path to saved model
model_path = '/content/drive/MyDrive/cubsat/RandomOversampling/optimized_best_model.keras'

# Load saved model
model = load_model(model_path)

# Reload data
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'
train_X = np.load(os.path.join(ros_dir, "train_histograms_ros.npy"))
val_X = np.load(os.path.join(ros_dir, "val_histograms_ros.npy"))
train_y_raw = np.load(os.path.join(ros_dir, "train_labels_ros.npy"))
val_y_raw = np.load(os.path.join(ros_dir, "val_labels_ros.npy"))

# One-hot encoding
n_classes = len(np.unique(train_y_raw))
train_y = to_categorical(train_y_raw, num_classes=n_classes)
val_y = to_categorical(val_y_raw, num_classes=n_classes)

# Train for 50 epochs without early stopping
history = model.fit(
    train_X, train_y,
    validation_data=(val_X, val_y),
    epochs=50,
    batch_size=32,
    verbose=1
)

# Save retrained model
new_path = os.path.join(ros_dir, "optimized_best_model_plus50epochs.keras")
model.save(new_path)
print(f"Retrained model saved at: {new_path}")


In [None]:
import plotly.graph_objects as go

# === Accuracy Plot ===
fig_acc = go.Figure()
fig_acc.add_trace(go.Scatter(y=history.history['accuracy'], mode='lines+markers', name='Train'))
fig_acc.add_trace(go.Scatter(y=history.history['val_accuracy'], mode='lines+markers', name='Validation'))
fig_acc.update_layout(
    title="Accuracy Evolution",
    xaxis_title="Epoch",
    yaxis_title="Accuracy",
    yaxis=dict(range=[0, 1]),
    template="plotly_white"
)
fig_acc.show()

# === Loss Plot ===
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=history.history['loss'], mode='lines+markers', name='Train'))
fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines+markers', name='Validation'))
fig_loss.update_layout(
    title="Loss Evolution",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    yaxis=dict(range=[0, 1]),
    template="plotly_white"
)
fig_loss.show()


TESTING THE MODEL WITH THE TEST SET

In [None]:
import numpy as np
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
import plotly.figure_factory as ff

# Paths
ros_dir = '/content/drive/MyDrive/cubsat/RandomOversampling'
test_images_path = '/content/drive/MyDrive/cubsat/test_images.npy'
test_labels_path = '/content/drive/MyDrive/cubsat/test_labels.npy'
bin_edges_path = os.path.join(ros_dir, 'bin_edges_ros.npy')
model_path = os.path.join(ros_dir, '/content/drive/MyDrive/cubsat/RandomOversampling/optimized_best_model_plus50epochs.keras')

# Load data and model
test_images = np.load(test_images_path)
test_labels = np.load(test_labels_path)
model = load_model(model_path)
bin_edges = np.load(bin_edges_path, allow_pickle=True)

# Histogram extraction function
def image_to_histogram(image, bin_edges):
    features = []
    for channel in range(3):
        pixels = image[:, :, channel].flatten()
        hist, _ = np.histogram(pixels, bins=bin_edges[channel])
        hist = hist / len(pixels)
        features.extend(hist)
    return np.array(features)

# Convert test images to histograms
test_histograms = np.array([image_to_histogram(img, bin_edges) for img in test_images])

# Get predictions
predictions = model.predict(test_histograms)
predicted_labels = np.argmax(predictions, axis=1)

# Confusion matrix
conf_matrix = confusion_matrix(test_labels, predicted_labels)
labels = [f"Class {i}" for i in range(conf_matrix.shape[0])]

# Plot with Plotly
fig = ff.create_annotated_heatmap(
    z=conf_matrix,
    x=labels,
    y=labels,
    colorscale='Blues',
    showscale=True
)
fig.update_layout(
    title="Confusion Matrix - Test Set",
    xaxis_title="Predicted Label",
    yaxis_title="True Label"
)
fig.show()


In [None]:
from sklearn.metrics import classification_report

# Generate report
report = classification_report(test_labels, predicted_labels, digits=4)

# Print
print("Classification Report:")
print(report)


In [None]:
import time
import os
import psutil
import tracemalloc
from sklearn.metrics import accuracy_score, f1_score

# Start measuring time, memory, and CPU
start_time = time.time()
tracemalloc.start()
process = psutil.Process(os.getpid())
cpu_percent_start = psutil.cpu_percent(interval=None)

# Prediction
predictions = model.predict(test_histograms)
predicted_labels = np.argmax(predictions, axis=1)

# Metrics
accuracy = accuracy_score(test_labels, predicted_labels)
f1 = f1_score(test_labels, predicted_labels, average='weighted')

# End measurements
cpu_percent_end = psutil.cpu_percent(interval=None)
_, peak_memory = tracemalloc.get_traced_memory()
tracemalloc.stop()
end_time = time.time()

# Model code size
model_file_path = os.path.join(ros_dir, 'dnn_histogram_model.keras')
model_code_size = os.path.getsize(model_file_path) / 1024  # in KB

# Formatted print
print("Evaluation Time:        {:.4f} seconds".format(end_time - start_time))
print("Peak Memory Usage:      {:.2f} MB".format(peak_memory / (1024 * 1024)))
print("Average CPU Usage:      {:.2f}%".format((cpu_percent_start + cpu_percent_end) / 2))
print("Algorithm Code Size:    {:.2f} KB".format(model_code_size))
print("Accuracy:               {:.4f}".format(accuracy))
print("F1 Score:               {:.4f}".format(f1))
