# IndabaX HACKATHON

![IMS_Logo.png](attachment:5036202f-d31d-4fb2-80c0-2df537859253.png)

Author: Francois Naude

Contact: francois.naude@imseismology.org

In this guided hackathon you will implement and tune a model that is critical to the safety of mine workers and also helps keep mines running. You will try to produce a model that is able to give seismologists (people who analyse seismic events, like earthquakes) all the information they need to locate the origin and size of seismic events so that they can send rescue teams or give warnings of unsafe areas. Let's get started!

In [None]:
### Get the packages to equip you for building and testing models

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gzip
import tensorflow as tf
from tensorflow.keras.losses import KLDivergence

### Fetch support functions to feed our model
%run ./Utils.ipynb

In [None]:
### Auto-detect environment and setup paths
import sys
import os

# Detect if running in Google Colab
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running in Google Colab")
    # Mount Google Drive automatically
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Set Colab paths
    BASE_DIR = "/content/drive/MyDrive/HacakthonIMS/"
    TRAIN_DIR = BASE_DIR + "trailTrain/"
    VAL_DIR = BASE_DIR + "Validate1000/"
    TEST_DIR = BASE_DIR + "Test1000/"
    CHALLENGE_DIR = BASE_DIR + "Challenge/"
    
    # Set model save directory
    MODEL_DIR = "/content/drive/MyDrive/HacakthonIMS/models/"
    
else:
    print("Running locally")
    # Set local paths (Windows)
    BASE_DIR = "c:\\Users\\Admin\\OneDrive - Durban University of Technology\\Desktop\\HackathonIMS\\"
    TRAIN_DIR = BASE_DIR + "trailTrain\\"
    VAL_DIR = BASE_DIR + "Validate1000\\"
    TEST_DIR = BASE_DIR + "Test1000\\"
    CHALLENGE_DIR = BASE_DIR + "Challenge\\"
    
    # Set model save directory
    MODEL_DIR = BASE_DIR + "models\\"

# Create model directory if it doesn't exist
os.makedirs(MODEL_DIR, exist_ok=True)

# Define model checkpoint path
MODEL_CHECKPOINT_PATH = os.path.join(MODEL_DIR, "model_checkpoint.keras")
HISTORY_PATH = os.path.join(MODEL_DIR, "training_history.pkl")

print(f"Training directory: {TRAIN_DIR}")
print(f"Model checkpoint path: {MODEL_CHECKPOINT_PATH}")
print(f"History path: {HISTORY_PATH}")

### Inspect the data

In [None]:
### COMPLETE FOR HACKATHON
def InspectFunction(X,labels,fileIdx,w = 8192):
    pIdx = labels[0]
    sIdx = labels[1]

    plt.plot(X[:w])
    plt.show()
    # Plot A single seismogram...Continue


In [None]:
# Use automatically detected path
dataDir = TRAIN_DIR
print(f"Using data directory: {dataDir}")

In [None]:
## List of files within the directory


fileList = sorted(os.listdir(dataDir))

## Read in n files (CHANGE ME)
startFile = 0
numFiles = 5
w = 8192   # length of signal (Seismogram)

## Iterate through files and plot with inspect_function()
for fileIdx,fileName in enumerate(fileList[startFile:(startFile+numFiles)]):
    ## Read in the file as a dataframe df
    filePath = os.path.join(dataDir, fileName)
    with gzip.open(filePath, 'rt') as file:
        ## extract p and s labels
        firstLine = file.readline().strip()
        ## extract seismogram as colomns of x,y,z
        df = pd.read_csv(file,header=None, engine='python')

    ## Extract information from the dataframe
    labels = np.array(firstLine.split(','), dtype=int)
    X = df.iloc[0:, :3]

    ## Inspect a single example usinf your plot_function
    InspectFunction(X,labels,fileIdx)

### Build a data generator

In [None]:
### Initialize a training data generator (CHOOSE PARAMETERS)
batchSize = 1
trainGen = DataGenerator(dataDir, batch_size = batchSize, max_files = 100) # "max_files = None " uses all the files
# trainGen.total_len()

### Inspect the data generator

In [None]:
for i in range(batchSize):
    ### Plot the seismogram and labels
    batchNumber = 2  # Batch number
    fileNumber = i   # File number in batch (max is batch_size - 1)

    singleSeismogramData = trainGen[batchNumber][0][fileNumber][0]
    singleSeismogramLabel = trainGen[batchNumber][1][fileNumber][0]

    plt.plot(singleSeismogramData, alpha = 0.5)
    plt.plot(singleSeismogramLabel*20)    # Use the scalar for visual purposes
    plt.xlim(0,8192)                      # Trim plot for visual inspection eg. 4096 instead of 8192
    plt.show()

In [None]:
### BUILD a validation dataset
valDir = VAL_DIR
print(f"Using validation directory: {valDir}")

# Create validation data generator
valGen = DataGenerator(valDir, batch_size=batchSize, max_files=100)


### Build a model

In [None]:
### Build your custom model if you are feeling confident (You can skip this)
def CustomModel(input_size=(1, 8192, 3)): # Keep the same output size.
    inputs = Input(shape=input_size)

    ##### Example:
    filter_shape = (1, 7)
    output = Conv2D(3, filter_shape, activation="relu", padding="same")(inputs) # The 3 is the number of channels, which for the final layer is 3
    ##### replace this block

    model = Model(inputs=inputs, outputs=output)
    return model

In [None]:
# Inputs and Outputs: (1,8192,3)
model = UNetModel()  # change to your own custom model, or keep default "UNetModel()"
model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001, clipvalue = 1.0),
            loss='mse'
            # loss=KLDivergence()
)

In [None]:
### Auto-check for existing model and load if available
import pickle
from tensorflow.keras.models import load_model

# Check if model checkpoint exists
if os.path.exists(MODEL_CHECKPOINT_PATH):
    print("Found existing model checkpoint. Loading...")
    model = load_model(MODEL_CHECKPOINT_PATH)
    print("Model loaded successfully!")
    
    # Load training history if it exists
    if os.path.exists(HISTORY_PATH):
        print("Found existing training history. Loading...")
        with open(HISTORY_PATH, 'rb') as f:
            previous_history = pickle.load(f)
        print("Previous training history loaded!")
        print(f"Previous best val_loss: {min(previous_history.get('val_loss', [float('inf')]))}")
    else:
        previous_history = None
        print("No previous training history found.")
        
else:
    print("No existing model checkpoint found. Will start training from scratch.")
    previous_history = None

print(f"Model summary:")

### Train a model

##✅ Step 1: Save model weights and training history

In [None]:
# Add checkpoint using automatic path detection

from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint(MODEL_CHECKPOINT_PATH,
                             save_best_only=True,
                             monitor='val_loss',
                             verbose=1)

print(f"Model checkpoint will be saved to: {MODEL_CHECKPOINT_PATH}")

##✅ Step 2: Save the training history object manually

###Initial Training

In [None]:
# Training with proper validation data generator

from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)

history = model.fit(
    trainGen,
    validation_data=valGen,  # Use proper validation data
    epochs=10,
    callbacks=[reduce_lr, early_stop, checkpoint]
)

###Continue Training

In [None]:
import os
import pickle

HISTORY_PATH = 'training_history.pkl'

# Load previous training history if it exists (for continue training)
if os.path.exists(HISTORY_PATH):
    with open(HISTORY_PATH, 'rb') as f:
        previous_history = pickle.load(f)
    print("Previous training history loaded!")
    print(f"Previous best val_loss: {min(previous_history.get('val_loss', [float('inf')])):.6f}")
    print(f"Total previous epochs: {len(previous_history.get('loss', []))}")
else:
    print("No previous training history found.")
    previous_history = None


In [None]:
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

# Define your model checkpoint path
MODEL_CHECKPOINT_PATH = "model_checkpoint.keras"

# Continue training - load model automatically if checkpoint exists
if os.path.exists(MODEL_CHECKPOINT_PATH):
    print("Loading model from checkpoint...")
    model = load_model(MODEL_CHECKPOINT_PATH)
    print("✅ Model loaded successfully for continued training!")
else:
    print("⚠️ No checkpoint found. Make sure to run initial training first.")

# Set up callbacks for continued training
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
checkpoint = ModelCheckpoint(MODEL_CHECKPOINT_PATH, save_best_only=True, monitor='val_loss', verbose=1)

# Continue training with more epochs
print("Starting continued training...")
history = model.fit(
    trainGen,
    validation_data=valGen,  # Use proper validation data
    epochs=10,               # continue with more epochs
    callbacks=[reduce_lr, early_stop, checkpoint]
)

print("✅ Continued training completed!")


###After training

### 🎯 Training Workflow with Smart Model Saving

**New Features:**
1. **Performance Comparison**: Automatically compares current training with previous training history
2. **Visual Analysis**: Shows side-by-side plots of current vs previous training performance
3. **Improvement Detection**: Calculates whether the model improved and by how much
4. **Interactive Decision**: Lets you choose whether to save the model based on performance
5. **Automatic Backup**: Creates timestamped backups when saving
6. **Smart Restoration**: Loads previous best model if you choose not to save

**Workflow:**
1. Train your model (initial or continued training)
2. Review the performance comparison charts
3. Check the improvement analysis
4. Decide whether to save the model
5. If saved: New model becomes the checkpoint
6. If not saved: Previous best model is restored

**Recommendation Logic:**
- ✅ **Save** if validation loss improved
- ❌ **Don't save** if validation loss got worse
- 🤔 **Your choice** for mixed results

In [None]:
import pickle
import matplotlib.pyplot as plt
import numpy as np

# Save training history using automatic path
def save_history(history, previous_history=None):
    """Save training history, optionally combining with previous history"""
    if previous_history is not None:
        # Combine with previous history
        combined_history = {}
        for key in history.history.keys():
            if key in previous_history:
                combined_history[key] = previous_history[key] + history.history[key]
            else:
                combined_history[key] = history.history[key]
        
        # Add any keys that were only in previous history
        for key in previous_history.keys():
            if key not in combined_history:
                combined_history[key] = previous_history[key]
    else:
        combined_history = history.history
    
    # Save combined history
    with open(HISTORY_PATH, 'wb') as f:
        pickle.dump(combined_history, f)
    
    print(f"Training history saved to: {HISTORY_PATH}")
    return combined_history

# Training History Comparison and Model Saving Decision
def compare_training_performance(current_history, previous_history=None):
    """Compare current training performance with previous training history"""
    
    # Create subplots for comparison
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('Training Performance Comparison', fontsize=16)
    
    # Current training metrics
    current_loss = current_history.history['loss']
    current_val_loss = current_history.history.get('val_loss', [])
    
    # Plot current training loss
    axes[0, 0].plot(current_loss, 'b-', label='Current Training Loss', linewidth=2)
    if current_val_loss:
        axes[0, 0].plot(current_val_loss, 'r-', label='Current Validation Loss', linewidth=2)
    axes[0, 0].set_title('Current Training Session')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Performance metrics
    current_best_val_loss = min(current_val_loss) if current_val_loss else float('inf')
    current_final_loss = current_loss[-1] if current_loss else float('inf')
    
    improvement_text = f"Current Session:\n"
    improvement_text += f"Final Loss: {current_final_loss:.6f}\n"
    improvement_text += f"Best Val Loss: {current_best_val_loss:.6f}\n"
    improvement_text += f"Epochs: {len(current_loss)}\n"
    
    if previous_history is not None:
        # Previous training metrics
        prev_loss = previous_history.get('loss', [])
        prev_val_loss = previous_history.get('val_loss', [])
        
        # Plot previous training loss
        axes[0, 1].plot(prev_loss, 'g-', label='Previous Training Loss', linewidth=2)
        if prev_val_loss:
            axes[0, 1].plot(prev_val_loss, 'orange', label='Previous Validation Loss', linewidth=2)
        axes[0, 1].set_title('Previous Training History')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Loss')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        # Compare performance
        prev_best_val_loss = min(prev_val_loss) if prev_val_loss else float('inf')
        prev_final_loss = prev_loss[-1] if prev_loss else float('inf')
        
        improvement_text += f"\nPrevious Best:\n"
        improvement_text += f"Final Loss: {prev_final_loss:.6f}\n"
        improvement_text += f"Best Val Loss: {prev_best_val_loss:.6f}\n"
        improvement_text += f"Total Epochs: {len(prev_loss)}\n"
        
        # Determine if there's improvement
        val_loss_improved = current_best_val_loss < prev_best_val_loss
        loss_improved = current_final_loss < prev_final_loss
        
        improvement_text += f"\n📊 IMPROVEMENT ANALYSIS:\n"
        improvement_text += f"Val Loss: {'✅ IMPROVED' if val_loss_improved else '❌ WORSE'} "
        improvement_text += f"({prev_best_val_loss:.6f} → {current_best_val_loss:.6f})\n"
        improvement_text += f"Final Loss: {'✅ IMPROVED' if loss_improved else '❌ WORSE'} "
        improvement_text += f"({prev_final_loss:.6f} → {current_final_loss:.6f})\n"
        
        # Overall recommendation
        overall_improved = val_loss_improved or loss_improved
        recommendation = "💾 RECOMMEND SAVING" if overall_improved else "⚠️ CONSIDER NOT SAVING"
        improvement_text += f"\n🎯 {recommendation}"
        
        # Combined plot
        axes[1, 0].plot(range(len(prev_loss)), prev_loss, 'g-', label='Previous Training', linewidth=2)
        axes[1, 0].plot(range(len(prev_loss), len(prev_loss) + len(current_loss)), 
                       current_loss, 'b-', label='Current Training', linewidth=2)
        if prev_val_loss and current_val_loss:
            axes[1, 0].plot(range(len(prev_val_loss)), prev_val_loss, 'orange', 
                           label='Previous Validation', linewidth=2)
            axes[1, 0].plot(range(len(prev_val_loss), len(prev_val_loss) + len(current_val_loss)), 
                           current_val_loss, 'r-', label='Current Validation', linewidth=2)
        axes[1, 0].axvline(x=len(prev_loss), color='black', linestyle='--', alpha=0.5, label='Training Resumed')
        axes[1, 0].set_title('Complete Training History')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Loss')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
        
    else:
        axes[0, 1].text(0.5, 0.5, 'No Previous Training History', 
                       horizontalalignment='center', verticalalignment='center', 
                       transform=axes[0, 1].transAxes, fontsize=14)
        axes[0, 1].set_title('Previous Training History')
        axes[1, 0].plot(current_loss, 'b-', label='Current Training Loss', linewidth=2)
        if current_val_loss:
            axes[1, 0].plot(current_val_loss, 'r-', label='Current Validation Loss', linewidth=2)
        axes[1, 0].set_title('Training History')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Loss')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
        
        improvement_text += f"\n🎯 💾 RECOMMEND SAVING (First Training)"
    
    # Display improvement text
    axes[1, 1].text(0.05, 0.95, improvement_text, transform=axes[1, 1].transAxes, 
                   fontsize=11, verticalalignment='top', fontfamily='monospace',
                   bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue", alpha=0.8))
    axes[1, 1].set_xlim(0, 1)
    axes[1, 1].set_ylim(0, 1)
    axes[1, 1].axis('off')
    axes[1, 1].set_title('Performance Summary')
    
    plt.tight_layout()
    plt.show()
    
    return current_best_val_loss, previous_history.get('val_loss', [float('inf')])[-1] if previous_history and previous_history.get('val_loss') else float('inf')

# Save the current training history
final_history = save_history(history, previous_history)
print(f"Total epochs trained: {len(final_history['loss'])}")
print(f"Best val_loss: {min(final_history.get('val_loss', [float('inf')]))}")

# Compare performance
current_best_val_loss, previous_best_val_loss = compare_training_performance(history, previous_history)

print("=" * 80)
print("🎯 TRAINING PERFORMANCE ANALYSIS COMPLETE")
print("=" * 80)


In [None]:
# Interactive Model Saving Decision
def save_model_decision(current_history, previous_history=None):
    """Allow user to decide whether to save the model based on performance"""
    
    # Calculate improvement metrics
    current_val_loss = current_history.history.get('val_loss', [])
    current_best_val_loss = min(current_val_loss) if current_val_loss else float('inf')
    
    if previous_history and previous_history.get('val_loss'):
        prev_best_val_loss = min(previous_history.get('val_loss', [float('inf')]))
        improved = current_best_val_loss < prev_best_val_loss
        improvement_pct = ((prev_best_val_loss - current_best_val_loss) / prev_best_val_loss) * 100
    else:
        improved = True  # First training session
        improvement_pct = 0
        prev_best_val_loss = float('inf')
    
    print("\n" + "="*60)
    print("🤖 MODEL SAVING DECISION SYSTEM")
    print("="*60)
    
    if improved:
        print("✅ PERFORMANCE IMPROVED!")
        if improvement_pct > 0:
            print(f"📈 Validation Loss Improved by {improvement_pct:.2f}%")
        print(f"🎯 Previous Best Val Loss: {prev_best_val_loss:.6f}")
        print(f"🚀 Current Best Val Loss: {current_best_val_loss:.6f}")
        print("💡 Recommendation: SAVE THE MODEL")
    else:
        print("❌ PERFORMANCE DID NOT IMPROVE")
        print(f"📉 Validation Loss Got Worse by {abs(improvement_pct):.2f}%")
        print(f"🎯 Previous Best Val Loss: {prev_best_val_loss:.6f}")
        print(f"📉 Current Best Val Loss: {current_best_val_loss:.6f}")
        print("💡 Recommendation: DO NOT SAVE THE MODEL")
    
    print("\n" + "-"*60)
    
    # Get user decision
    while True:
        decision = input("Do you want to save this model? (y/n): ").lower().strip()
        if decision in ['y', 'yes']:
            return True
        elif decision in ['n', 'no']:
            return False
        else:
            print("Please enter 'y' for yes or 'n' for no.")

# Get user decision
save_model = save_model_decision(history, previous_history)

if save_model:
    print("\n💾 SAVING MODEL AND TRAINING HISTORY...")
    
    # Save the model (it's already saved by checkpoint, but let's make sure)
    model.save(MODEL_CHECKPOINT_PATH)
    print(f"✅ Model saved to: {MODEL_CHECKPOINT_PATH}")
    
    # Combine and save training history
    if previous_history is not None:
        combined_history = {}
        for key in history.history.keys():
            if key in previous_history:
                combined_history[key] = previous_history[key] + history.history[key]
            else:
                combined_history[key] = history.history[key]
        
        # Add any keys that were only in previous history
        for key in previous_history.keys():
            if key not in combined_history:
                combined_history[key] = previous_history[key]
    else:
        combined_history = history.history
    
    # Save combined history
    with open(HISTORY_PATH, 'wb') as f:
        pickle.dump(combined_history, f)
    
    print(f"✅ Training history saved to: {HISTORY_PATH}")
    print(f"📊 Total epochs in history: {len(combined_history['loss'])}")
    print(f"🎯 Best validation loss: {min(combined_history.get('val_loss', [float('inf')])):.6f}")
    
    # Save a backup with timestamp
    import datetime
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_model_path = MODEL_CHECKPOINT_PATH.replace('.keras', f'_backup_{timestamp}.keras')
    backup_history_path = HISTORY_PATH.replace('.pkl', f'_backup_{timestamp}.pkl')
    
    model.save(backup_model_path)
    with open(backup_history_path, 'wb') as f:
        pickle.dump(combined_history, f)
    
    print(f"💾 Backup saved: {backup_model_path}")
    print(f"💾 Backup history: {backup_history_path}")
    
else:
    print("\n❌ MODEL NOT SAVED")
    print("The current model weights and training history will not be saved.")
    print("Previous best model remains unchanged.")
    if previous_history:
        print(f"Previous best validation loss: {min(previous_history.get('val_loss', [float('inf')])):.6f}")
    
    # Load back the previous best model if it exists
    if os.path.exists(MODEL_CHECKPOINT_PATH):
        print("🔄 Loading previous best model...")
        model = load_model(MODEL_CHECKPOINT_PATH)
        print("✅ Previous best model loaded.")

print("\n" + "="*60)
print("🎉 TRAINING SESSION COMPLETE!")
print("="*60)

### Analyse training

In [None]:
# Analyze training history
# Use combined history if available, otherwise use current history
if 'combined_history' in locals():
    loss = combined_history['loss']
    val_loss = combined_history.get('val_loss', [])
    print(f"Analyzing combined training history with {len(loss)} epochs")
else:
    loss = history.history['loss']
    val_loss = history.history.get('val_loss', [])
    print(f"Analyzing current training history with {len(loss)} epochs")

print(f"Final training loss: {loss[-1]:.6f}")
if val_loss:
    print(f"Final validation loss: {val_loss[-1]:.6f}")
    print(f"Best validation loss: {min(val_loss):.6f}")
    print(f"Best validation loss achieved at epoch: {val_loss.index(min(val_loss)) + 1}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assuming 'loss' and 'val_loss' are already defined
# loss = [...]  # Training loss values
# val_loss = [...]  # Validation loss values (if available)

# Plot training and validation loss
plt.figure(figsize=(12, 5))

# Plot training loss
plt.subplot(1, 2, 1)
plt.plot(loss, 'b-', label='Training Loss', linewidth=2)
if val_loss:
    plt.plot(val_loss, 'r-', label='Validation Loss', linewidth=2)
    # Mark the best validation loss
    best_val_epoch = val_loss.index(min(val_loss))
    plt.plot(best_val_epoch, min(val_loss), 'ro', markersize=10, label=f'Best Val Loss: {min(val_loss):.6f}')
plt.title('Training Progress')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot loss improvement over time
plt.subplot(1, 2, 2)
if val_loss:
    # Calculate moving average for smoother visualization
    window_size = min(5, len(val_loss))
    if window_size > 1:
        val_loss_smooth = []
        for i in range(len(val_loss)):
            start_idx = max(0, i - window_size + 1)
            val_loss_smooth.append(np.mean(val_loss[start_idx:i+1]))
        plt.plot(val_loss_smooth, 'g-', label=f'Validation Loss (Moving Avg)', linewidth=2)
    plt.plot(val_loss, 'r-', alpha=0.5, label='Validation Loss', linewidth=1)
    plt.title('Validation Loss Trend')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)
else:
    plt.text(0.5, 0.5, 'No validation loss data available', 
             horizontalalignment='center', verticalalignment='center', 
             transform=plt.gca().transAxes, fontsize=12)

plt.tight_layout()
plt.show()

### Predict on a test set

In [None]:
# Use automatically detected test directory
testDir = TEST_DIR

# This is a challenge testSet of 6 seismograms
challengeDir = CHALLENGE_DIR

print(f"Test directory: {testDir}")
print(f"Challenge directory: {challengeDir}")

# Choose which directory to use for testing
# testDir = challengeDir  # Uncomment this line to use challenge data instead

In [None]:
### The training dataset "trainGen" is used here. USE YOUR testGenerator INSTEAD.
# Create test data generator
testGen = DataGenerator(testDir, batch_size=batchSize, max_files=None)  # Use all test files
print(f"Test generator created with {testGen.total_len()} samples")

# Make predictions on test data
Predictions = model.predict(testGen)
print(f"Predictions shape: {Predictions.shape}")

### Analyse predictions

In [None]:
print(Predictions.shape)

In [None]:
for seismogramNum in range(5):
    plt.plot(Predictions[seismogramNum][0][:,:2])
    plt.xlim()
    plt.show()

### Model Performance

In [None]:
# Model Performance Evaluation
# Negative means the Pick is after the label (Late) and Positive is Early
try:
    fig = resultsHistogram(Preds=Predictions, dataGen=testGen)
    print("Performance histogram generated successfully!")
except Exception as e:
    print(f"Error generating histogram: {e}")
    print("Make sure the resultsHistogram function is available in Utils.ipynb")

In [None]:
### Still feeling curious?
# Explore the Utils notebook to customise further. Remember to save the Utils notebook to a new checkpoint
# and to run the first cell of this notebook: or run the command " %run ./Utils.ipynb " in a cell to update your changes.

In [None]:
# Configuration Summary
print("=" * 60)
print("ENVIRONMENT CONFIGURATION SUMMARY")
print("=" * 60)
print(f"Environment: {'Google Colab' if IN_COLAB else 'Local'}")
print(f"Training directory: {TRAIN_DIR}")
print(f"Validation directory: {VAL_DIR}")
print(f"Test directory: {TEST_DIR}")
print(f"Challenge directory: {CHALLENGE_DIR}")
print(f"Model checkpoint: {MODEL_CHECKPOINT_PATH}")
print(f"Training history: {HISTORY_PATH}")
print(f"Model exists: {os.path.exists(MODEL_CHECKPOINT_PATH)}")
print(f"History exists: {os.path.exists(HISTORY_PATH)}")
print("=" * 60)

# Show model summary if model exists
if 'model' in locals():
    print("\nModel Summary:")
    model.summary()
else:
    print("\nNo model loaded yet. Run the model creation cells first.")