### Imports


In [1]:
import numpy as np
import pandas as pd

import sys
import os



## Preprossessing

In [2]:
# Import from src/ and config/
from src.preprocessing import preprocess_aachen_dataset
from config.defaults import Config

In [3]:
# Load the default configuration
config = Config()

# Preprocess the dataset for classification (CNN)
preprocessed_classification = preprocess_aachen_dataset(
    data_path=config.data_path,
    test_cell_count=config.test_cell_count,
    random_state=config.random_state,
    log_transform=config.log_transform,
    classification=True  # Explore classification (CNN) output
)

# Preprocess the dataset for regression (LSTM)
preprocessed_regression = preprocess_aachen_dataset(
    data_path=config.data_path,
    test_cell_count=config.test_cell_count,
    random_state=config.random_state,
    log_transform=config.log_transform,
    classification=False  # Explore regression (LSTM) output
)

# Explore classification (CNN) output
print("# Classification (CNN) Output Exploration")
print("X_train shape:", preprocessed_classification["X_train"].shape)  # Expected: (n_samples, 120, 1)
print("X_val shape:", preprocessed_classification["X_val"].shape)      # Expected: (n_samples, 120, 1)
print("X_test shape:", preprocessed_classification["X_test"].shape)    # Expected: (n_samples, 120, 1)
print("y_train shape:", preprocessed_classification["y_train"].shape)  # Expected: (n_samples, 7) for one-hot encoding
print("y_max:", preprocessed_classification["y_max"])                 # Maximum RUL for scaling
print("label_mapping:", preprocessed_classification["label_mapping"])  # RUL bin mappings
print("max_sequence_length:", preprocessed_classification["max_sequence_length"])  # Should be 120 for classification
print("Sample X_train[0]:\n", preprocessed_classification["X_train"][0])  # First sequence
print("Sample y_train[0]:\n", preprocessed_classification["y_train"][0])  # First one-hot label

# Explore regression (LSTM) output
print("\n# Regression (LSTM) Output Exploration")
print("X_train shape:", preprocessed_regression["X_train"].shape)     # Expected: (n_samples, max_seq_len, 1)
print("X_val shape:", preprocessed_regression["X_val"].shape)         # Expected: (n_samples, max_seq_len, 1)
print("X_test shape:", preprocessed_regression["X_test"].shape)       # Expected: (n_samples, max_seq_len, 1)
print("y_train shape:", preprocessed_regression["y_train"].shape)     # Expected: (n_samples,)
print("y_max:", preprocessed_regression["y_max"])                     # Maximum RUL for scaling
print("max_sequence_length:", preprocessed_regression["max_sequence_length"])  # Maximum sequence length
print("Sample X_train[0] shape:", preprocessed_regression["X_train"][0].shape)  # First sequence dimensions
print("Sample y_train[0]:", preprocessed_regression["y_train"][0])     # First normalized RUL

# Additional exploration: Class distribution for classification (if applicable)
if preprocessed_classification["label_mapping"]:
    y_train_classes = np.argmax(preprocessed_classification["y_train"], axis=1)
    print("\nClassification Class Distribution (Training):\n", pd.Series(y_train_classes).value_counts())

# Additional exploration: RUL statistics for regression
print("\nRegression RUL Statistics (Training):")
print("Mean RUL (normalized):", np.mean(preprocessed_regression["y_train"]))
print("Std RUL (normalized):", np.std(preprocessed_regression["y_train"]))
print("Min RUL (normalized):", np.min(preprocessed_regression["y_train"]))
print("Max RUL (normalized):", np.max(preprocessed_regression["y_train"]))

# Classification (CNN) Output Exploration
X_train shape: (4513, 120, 1)
X_val shape: (1129, 120, 1)
X_test shape: (391, 120, 1)
y_train shape: (4513, 7)
y_max: 1000
label_mapping: {'0-200': 0, '200-300': 1, '300-400': 2, '400-500': 3, '500-600': 4, '600-700': 5, '700+': 6}
max_sequence_length: 120
Sample X_train[0]:
 [[0.7065943 ]
 [0.70419202]
 [0.70179161]
 [0.69939201]
 [0.69699363]
 [0.69459688]
 [0.69220215]
 [0.68980987]
 [0.68742044]
 [0.68503425]
 [0.68265173]
 [0.68027327]
 [0.67789927]
 [0.67553016]
 [0.67316633]
 [0.67080819]
 [0.66845615]
 [0.66611061]
 [0.66377198]
 [0.66144066]
 [0.65911706]
 [0.6568016 ]
 [0.65449467]
 [0.65219668]
 [0.64990803]
 [0.64762914]
 [0.64536041]
 [0.64310225]
 [0.64085506]
 [0.63861924]
 [0.63639522]
 [0.63418338]
 [0.63198415]
 [0.62980354]
 [0.6276634 ]
 [0.6255628 ]
 [0.62349819]
 [0.62146602]
 [0.61946276]
 [0.61748485]
 [0.61552876]
 [0.61359094]
 [0.61166785]
 [0.60975594]
 [0.60785167]
 [0.60595149]
 [0.60405187]
 [0.60214925]
 [0.60024

## Model build

In [4]:
from src.models import load_preprocessed_data, build_lstm_model, train_lstm_model
from src.evaluation import evaluate_lstm_model



In [5]:
# Add thesis_experiment/ to sys.path for imports (running from thesis_experiment/)
sys.path.append(os.path.abspath(os.getcwd()))

# Load configuration
config = Config()
model_type = "regression"  # Fixed for LSTM regression

# Load preprocessed data
X_train, X_val, X_test, y_train, y_val, y_test, metadata = load_preprocessed_data(
    model_type, config.eol_capacity
)

# Build and train the LSTM model
model = build_lstm_model((metadata["max_sequence_length"], 1), config)
history = train_lstm_model(model, X_train, y_train, X_val, y_val, config)

# Evaluate the model
test_loss, test_mae = evaluate_lstm_model(model, X_test, y_test, metadata["y_max"])

# Print training and evaluation results for verification
print("Training completed successfully!")
print(f"Training Loss: {history['loss'][-1]:.4f}")
print(f"Validation Loss: {history['val_loss'][-1]:.4f}")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test MAE (rescaled): {test_mae:.4f}")

2025-02-28 11:49:02,701 - INFO - Loaded preprocessed data for regression with EOL 0.65


2025-02-28 11:49:02,808 - INFO - LSTM model built for regression with config: Config(data_path='data/raw/Degradation_Prediction_Dataset_ISEA.mat', eol_capacity=0.65, test_cell_count=3, random_state=42, log_transform=False, classification=False, seq_len=120, train_split_ratio=0.8, val_split_ratio=0.2, lstm_units=32, dropout_rate=0.2, dense_units=16, learning_rate=0.001, clipnorm=1.0, patience=15, batch_size=32, epochs=50)


NameError: name 'datetime' is not defined