In [25]:
import sys
import os
import glob
import pandas as pd
import numpy as np
import torch

## data to check

In [2]:
X_train = np.load('../sample_training_data/X_train.npy') 
X_test = np.load('../sample_training_data/X_test.npy')
y_train = np.load('../sample_training_data/y_train.npy')
y_test = np.load('../sample_training_data/y_test.npy')

# print shapes of the data
print(f'X_train ... {X_train.shape}')
print(f'X_test ... {X_test.shape}')
print(f'y_train ... {y_train.shape}')
print(f'y_test ... {y_test.shape}')

X_train ... (17, 2500, 7)
X_test ... (4, 2500, 7)
y_train ... (17, 2500, 3)
y_test ... (4, 2500, 3)


### input features

In [3]:
one_train_case_input_data = pd.DataFrame(X_train[0,:,:], columns=['Engine speed (rpm)',
'Engine torque (Nm)',
'Engine throttle (%)',
'Coolant temperature downstream the engine (°C)',
'Ambient temperature (°C)',
'Temperature of exhaust gases upstream the muffler (°C)',
'Temperature of oil in sump (°C)'])

In [4]:
one_train_case_input_data.head()

Unnamed: 0,Engine speed (rpm),Engine torque (Nm),Engine throttle (%),Coolant temperature downstream the engine (°C),Ambient temperature (°C),Temperature of exhaust gases upstream the muffler (°C),Temperature of oil in sump (°C)
0,2057.327642,17.814633,17.584523,147.500324,23.303821,-6.030186,-3.794343
1,2055.507159,17.318986,17.311834,148.64017,23.495001,-6.023304,-3.769398
2,2049.333314,17.418549,17.344103,148.81681,23.024075,-6.02052,-3.881337
3,2035.073482,17.045133,17.200031,150.587196,23.157055,-6.01797,-3.897206
4,2043.999238,17.271089,17.276462,150.251657,22.628901,-6.014743,-3.973748


### output targets

In [5]:
one_train_case_output_data = pd.DataFrame(y_train[0,:,:], columns=['Species NOx (ppm)',
'Species CO (ppm)',
'Species UHC (ppm)'])

In [6]:
one_train_case_output_data.head()

Unnamed: 0,Species NOx (ppm),Species CO (ppm),Species UHC (ppm)
0,669.098823,10613.403525,722.069942
1,691.257433,10760.932632,722.126015
2,681.596385,10753.549182,723.018856
3,690.803062,10857.214719,728.95324
4,698.436983,10749.40209,730.281801


### Data processing 
 - load the data
 - Normalize the data
 - convert them to sequential data
 - creating train and test data loaders for training

In [7]:
# Add the parent directory to the system path
sys.path.append(os.path.abspath('../src'))

from data_preprocessing import DataPreprocessor, DataLoaderFact, DataPaths

In [9]:
# initialize the data preprocessor
paths = DataPaths(base_dir ="../sample_training_data")
print(paths.X_train)
data_preprocessor = DataPreprocessor(paths=paths)

# load the raw data
X_train, X_test, y_train, y_test = data_preprocessor.data_processing() 

# print shapes of the data
print(f'X_train ... {X_train.shape}')
print(f'X_test ... {X_test.shape}')
print(f'y_train ... {y_train.shape}')
print(f'y_test ... {y_test.shape}')

../sample_training_data/X_train.npy
X_train ... (17, 484, 80, 7)
X_test ... (4, 484, 80, 7)
y_train ... (17, 484, 5, 3)
y_test ... (4, 484, 5, 3)


### Data seq to seq transformation
- First data is processed to (seq to seq)
- convert to train and test loaders for training

In [13]:
# Create data loaders
train_loader, test_loader = DataLoaderFact.create_loaders(X_train, X_test, y_train, y_test)

# For train loader
print("Train Loader Batch Shapes:")
print("Training data is converted to batches for better processing")
for batch_idx, (inputs, targets) in enumerate(train_loader):
    print(f"Batch {batch_idx + 1} - inputs: {inputs.shape}, targets: {targets.shape}")  

# For test loader
print("\nTest Loader Batch Shapes:")
print("All Testing data is processed in one batch for results")
for batch_idx, (inputs, targets) in enumerate(test_loader):
    print(f"Batch {batch_idx + 1} - inputs: {inputs.shape}, targets: {targets.shape}")

Train Loader Batch Shapes:
Training data is converted to batches for better processing
Batch 1 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 2 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 3 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 4 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 5 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 6 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 7 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 8 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 9 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 10 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 11 - inputs: torch.Size([256, 80, 7]), targets: torch.Size([256, 5, 3])
Batch 12 - inputs: torch.Size([256, 80, 7]), targets: torch.Size

### detail inspection of batches

In [16]:
# Add the parent directory to the system path
sys.path.append(os.path.abspath('../src'))

from training_evaluation_plotting import inspect_batches

In [17]:
# Inspect batches before training
inspect_batches(train_loader, num_batches=2)


Inspecting 2 batches from train_loader
Total batches: 33
Batch size: 256

Batch 1:
  Input shape: torch.Size([256, 80, 7]) (batch_size, seq_len, features)
  Target shape: torch.Size([256, 5, 3])
  Input dtype: torch.float32, Target dtype: torch.float32
  Input range: [-1.8166, 2.8380]
  Target range: [-1.9229, 3.0508]

  Sample Sequence (First in batch):
  Input[0, 0]: [ 0.5923 -0.1345  0.325   0.4554  1.3228  1.405   1.4792]
  Input[0, -1]: [ 0.6516 -0.0642  0.0785  0.5094  1.2192  1.4044  1.4515]
  Target[0, 0]: [ 0.2353 -0.8115 -1.1769]
----------------------------------------

Batch 2:
  Input shape: torch.Size([256, 80, 7]) (batch_size, seq_len, features)
  Target shape: torch.Size([256, 5, 3])
  Input dtype: torch.float32, Target dtype: torch.float32
  Input range: [-1.8166, 3.1850]
  Target range: [-1.9620, 2.9383]

  Sample Sequence (First in batch):
  Input[0, 0]: [0.9069 0.5328 0.3916 0.8513 1.4555 1.408  1.4594]
  Input[0, -1]: [0.6953 0.431  0.5347 0.9309 1.4823 1.4048 1.5

### training the model

In [21]:
from training_evaluation_plotting import train_model

# initialize the model
model_params = {
    'input_dim': 7,
    'output_dim': 3,
    'hidden_dim': 128,
    'num_layers': 2,
    'dropout': 0.2,
    'learning_rate': 0.0001,
    'num_epochs': 50,
    'output_length': 5
} 
# Train the model   
model, train_losses, val_losses = train_model(
    train_loader,
    test_loader,
    model_params=model_params
) 

Using device: cuda
Epoch [1/50], Train Loss: 0.1734, Val Loss: 0.2048
Epoch [2/50], Train Loss: 0.1559, Val Loss: 0.1385
Epoch [3/50], Train Loss: 0.0959, Val Loss: 0.0684
Epoch [4/50], Train Loss: 0.0520, Val Loss: 0.0637
Epoch [5/50], Train Loss: 0.0400, Val Loss: 0.0723
Epoch [6/50], Train Loss: 0.0355, Val Loss: 0.0756
Epoch [7/50], Train Loss: 0.0336, Val Loss: 0.0692
Epoch [8/50], Train Loss: 0.0324, Val Loss: 0.0640
Epoch [9/50], Train Loss: 0.0318, Val Loss: 0.0602
Epoch [10/50], Train Loss: 0.0315, Val Loss: 0.0591
Epoch [11/50], Train Loss: 0.0309, Val Loss: 0.0541
Epoch [12/50], Train Loss: 0.0306, Val Loss: 0.0565
Epoch [13/50], Train Loss: 0.0306, Val Loss: 0.0552
Epoch [14/50], Train Loss: 0.0306, Val Loss: 0.0567
Epoch [15/50], Train Loss: 0.0304, Val Loss: 0.0563
Epoch [16/50], Train Loss: 0.0301, Val Loss: 0.0541
Epoch [17/50], Train Loss: 0.0300, Val Loss: 0.0554
Epoch [18/50], Train Loss: 0.0299, Val Loss: 0.0541
Epoch [19/50], Train Loss: 0.0297, Val Loss: 0.0549
Ep

### evaluate the model

In [26]:
from training_evaluation_plotting import evaluate_model, load_model
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load model
model_paths = glob.glob("saved_models/*.pth")
if not model_paths:
    raise FileNotFoundError("No saved model found in 'saved_models/' directory.")
model = load_model(model_paths[0], model_params['input_dim'],
                   model_params['output_dim'], 
                   model_params['hidden_dim'],
                   model_params['num_layers'],
                   model_params['dropout'],
                   model_params['output_length'], device)
# Evaluate model
predictions, actuals = evaluate_model(model, test_loader, device)

  model.load_state_dict(torch.load(model_path, map_location=device))


### data transform for Metrics R2, RMSE, MAE 

In [31]:
from training_evaluation_plotting import inverse_transform, reshape_data, calculate_metrics

# Data Transformation
sc = data_preprocessor.y_scaler
actuals = inverse_transform(reshape_data(actuals, (-1, model_params['output_dim'])), sc)
predictions = inverse_transform(reshape_data(predictions, (-1, model_params['output_dim'])), sc)
print(f'shpae of actuals: {actuals.shape}')
print(f'shpae of predictions: {predictions.shape}')

# Reshape for metric calculation
reshaped_for_metrics = (4, 2420, 3)
actuals_final = reshape_data(actuals, reshaped_for_metrics)
preds_final = reshape_data(predictions, reshaped_for_metrics)
print(f"Shapes: actuals {actuals_final.shape}, predictions {preds_final.shape}")

# Metrics calculation
total_steps = actuals.shape[0]
metrics = calculate_metrics(actuals, predictions, total_steps)

# Print metrics
gases = ['NO', 'CO', 'UHC']
for idx, gas in enumerate(gases):
    print(f"\nMetrics for {gas}:")
    print(f"R2: { [val[idx] for val in metrics['R2']] }")
    print(f"RMSE: { [val[idx] for val in metrics['RMSE']] }")
    print(f"MAE: { [val[idx] for val in metrics['MAE']] }")

shpae of actuals: (9680, 3)
shpae of predictions: (9680, 3)
Shapes: actuals (4, 2420, 3), predictions (4, 2420, 3)

Metrics for NO:
R2: [-0.132, -0.107, -1.092, -7.387]
RMSE: [13090613.0, 11182808.0, 13384478.0, 26551918.0]
MAE: [11054394.0, 9098706.0, 10895646.0, 24945130.0]

Metrics for CO:
R2: [-0.23, -0.082, -3.946, -0.094]
RMSE: [3387234560.0, 3786436608.0, 5515828736.0, 1746410368.0]
MAE: [2733583872.0, 2978628352.0, 4956944384.0, 1466064640.0]

Metrics for UHC:
R2: [-1.275, -0.042, -0.944, -49.541]
RMSE: [3329665.75, 4093633.25, 2430890.5, 4099068.0]
MAE: [2900034.0, 3257103.25, 1994135.5, 4058273.75]


### plotting the results to folder

In [51]:
from training_evaluation_plotting import range_with_floats, get_formatter, create_plots
import matplotlib
%matplotlib inline

# Plot results
time_series = range_with_floats(0, 800, 0.2)[:2420]
# Formatters
formatter_y = get_formatter((-4, 4))
create_plots(
    data_indices=[1, 0],
    titles=['-15$^\\circ$C', '-7$^\\circ$C'],
    output_filename='plots1.png',
    actuals_final=actuals_final,
    preds_final=preds_final,
    metrics=metrics,
    time_seconds=time_series,
    formatter_y=formatter_y
)

create_plots(
    data_indices=[2, 3],
    titles=['0$^\\circ$C', '23$^\\circ$C'],
    output_filename='plots2.png',
    actuals_final=actuals_final,
    preds_final=preds_final,
    metrics=metrics,
    time_seconds=time_series,
    formatter_y=formatter_y)