# Environment Setup

In [1]:
# --- MacOS Validator ---
import sys
sys.path.insert(0, './setup_validation')

from macos_validator import main
results = main(run_benchmark=False)

#  --- Path Setup ---
import torch
from pathlib import Path

project_root = Path.cwd()
sys.path.insert(0, './src')


=== DEPENDENCY STATUS ===
 ✓ h5py                 v3.15.1         HDF5 file format support
 ✓ matplotlib           v3.10.7         Plotting library
 ✓ memory_profiler      v0.61.0         Memory profiling tool
 ✓ numpy                v1.26.4         Numerical computing library
 ✓ pandas               v2.3.3          Data manipulation library
 ✓ scipy                v1.16.3         Scientific computing library
 ✓ seaborn              v0.13.2         Statistical visualization library
 ✓ sklearn              v1.7.2          Scikit-learn ML library
 ✓ torch                v2.9.1          PyTorch deep learning framework
 ✓ torchaudio           v2.9.1          PyTorch audio utilities
 ✓ torchvision          v0.24.1         PyTorch vision utilities
 ✓ tqdm                 v4.67.1         Progress bar library
 ✓ yaml                 v6.0.3          Config manipulation library

=== GPU/ACCELERATOR STATUS ===
Platform: arm64
Apple Silicon: Yes

PyTorch MPS:
 Installed: Yes
 Version: 2.9.1
 MPS 

# Config Loading

In [3]:
from src.utils import load_config
config = load_config("config.yaml")
print (config)

{'data_profile': 'data_paper', 'dataset_name': 'sample', 'paths': {'datasetdir': 'data', 'csv_pattern': '*.csv', 'extracted_datadir': 'data_extracted', 'hdf5_filename': 'data_paper_sample.h5', 'checkpoint_dir': 'checkpoints'}, 'dataset': {'rated_capacity_ah': 155.0, 'num_cells': 96, 'voltage_window_mv': [3900, 4050]}, 'processing': {'config_file': '{data_profile}.yaml', 'parallel_batch_size': 6}, 'ocv_calibration': {'min_rest_hours': 1.0, 'max_soc_start': 0.6, 'ocv_table_path': None}, 'charging': {'status_value': 3, 'accept_partial_charges': True, 'full_charge_soc_threshold': 0.99, 'full_charge_voltage_v': 4.24}, 'quality_checks': {'min_samples_in_window': 20, 'max_gap_seconds': 60, 'current_bounds_a': [0, 220], 'current_sign_convention': 'physical', 'voltage_smoothing': 'interpolation'}, 'soh_calculation': {'soh_bounds': [0.35, 1.25], 'min_abs_delta_soc': 0.03}}


# TESTS

In [4]:
# In main.ipynb
from src.data_extract import extract_all_features
from src.utils import load_config
import pandas as pd

config = load_config("config.yaml")
df = pd.read_csv("data/sample/vin1.csv")  # Use one actual file
features = extract_all_features(df, config)
print(features)

None


In [None]:
from src.data_process import get_vehicle_paths
from src.utils import load_config
from src.data_extract import extract_all_features
import pandas as pd

config = load_config("config.yaml")
csv_dir = Path(config['paths']['datasetdir']) / config['dataset_name']
vehicle_paths = get_vehicle_paths(csv_dir, "*.csv")[:5]  # Only first 5

for path in vehicle_paths:
    print(f"\n{'='*50}")
    print(f"Processing: {path.stem}")
    print('='*50)
    
    df = pd.read_csv(path)
    df['vehicleid'] = path.stem
    
    features = extract_all_features(df, config)
    if features:
        print(f"✅ SUCCESS: {len(features['soh_labels'])} samples")
    else:
        print(f"❌ FAILED: No valid segments")

# Data Process & H5 Packaging

## SAMPLE

In [1]:
# -- Paper Sample Feature Extrction --
from src.data_process import extraction_pipeline

config_path = "config.yaml"
extraction_pipeline(config_path)

  from .autonotebook import tqdm as notebook_tqdm


checkpoints/data_paper_sample_checkpoint.json
Data Profile: data_paper
Dataset: sample
Found 300 vehicle files


Processing batches: 100%|██████████| 50/50 [01:37<00:00,  1.94s/it]


EXTRACTION COMPLETE
Total vehicles processed: 300
Successful extractions: 5
No valid segments: 295
Errors: 0
Total samples extracted: 8
SOH range: 0.518 to 0.832
HDF5 file closed safely





In [2]:
# -- Sample Sample Feature Extrction --
from src.data_process import extraction_pipeline

config_path = "config.yaml"
extraction_pipeline(config_path)

checkpoints/data_sample_sample_checkpoint.json
Data Profile: data_sample
Dataset: sample
Found 300 vehicle files


Processing batches:   0%|          | 0/50 [00:00<?, ?it/s]

Processing batches: 100%|██████████| 50/50 [02:08<00:00,  2.57s/it]


EXTRACTION COMPLETE
Total vehicles processed: 300
Successful extractions: 24
No valid segments: 276
Errors: 0
Total samples extracted: 51
SOH range: 0.308 to 1.210
HDF5 file closed safely





In [6]:
# -- Strict Sample Feature Extrction --
from src.data_process import extraction_pipeline

config = "config.yaml"
extraction_pipeline(config)

checkpoints/data_strict_sample_checkpoint.json
Data Profile: data_strict
Dataset: sample
Found 300 vehicle files


Processing batches: 100%|██████████| 50/50 [01:31<00:00,  1.83s/it]


EXTRACTION COMPLETE
Total vehicles processed: 300
Successful extractions: 1
No valid segments: 299
Errors: 0
Total samples extracted: 2
SOH range: 0.512 to 0.515
HDF5 file closed safely





In [7]:
# -- Lenient Sample Feature Extrction --
from src.data_process import extraction_pipeline

config = "config.yaml"
extraction_pipeline(config)

checkpoints/data_lenient_sample_checkpoint.json
Data Profile: data_lenient
Dataset: sample
Found 300 vehicle files


Processing batches:   0%|          | 0/50 [00:00<?, ?it/s]

Processing batches: 100%|██████████| 50/50 [02:42<00:00,  3.25s/it]


EXTRACTION COMPLETE
Total vehicles processed: 300
Successful extractions: 61
No valid segments: 239
Errors: 0
Total samples extracted: 136
SOH range: 0.206 to 1.280
HDF5 file closed safely





In [8]:
# -- Current Sample Feature Extrction --
from src.data_process import extraction_pipeline

config = "config.yaml"
extraction_pipeline(config)

checkpoints/data_current_sample_checkpoint.json
Data Profile: data_current
Dataset: sample
Found 300 vehicle files


Processing batches: 100%|██████████| 50/50 [01:51<00:00,  2.23s/it]


EXTRACTION COMPLETE
Total vehicles processed: 300
Successful extractions: 12
No valid segments: 288
Errors: 0
Total samples extracted: 22
SOH range: 0.353 to 1.072
HDF5 file closed safely





In [9]:
# -- Voltage Sample Feature Extrction --
from src.data_process import extraction_pipeline

config = "config.yaml"
extraction_pipeline(config)

checkpoints/data_voltage_sample_checkpoint.json
Data Profile: data_voltage
Dataset: sample
Found 300 vehicle files


Processing batches: 100%|██████████| 50/50 [01:57<00:00,  2.34s/it]


EXTRACTION COMPLETE
Total vehicles processed: 300
Successful extractions: 15
No valid segments: 285
Errors: 0
Total samples extracted: 27
SOH range: 0.353 to 1.072
HDF5 file closed safely





In [10]:
# -- Temporal Sample Feature Extrction --
from src.data_process import extraction_pipeline

config = "config.yaml"
extraction_pipeline(config)

checkpoints/data_temporal_sample_checkpoint.json
Data Profile: data_temporal
Dataset: sample
Found 300 vehicle files


Processing batches: 100%|██████████| 50/50 [01:39<00:00,  1.99s/it]


EXTRACTION COMPLETE
Total vehicles processed: 300
Successful extractions: 3
No valid segments: 297
Errors: 0
Total samples extracted: 3
SOH range: 0.408 to 0.457
HDF5 file closed safely





In [None]:
# -- Final Sample Feature Extrction --
from src.data_process import extraction_pipeline

config = "config.yaml"
extraction_pipeline(config)

# Training

In [None]:
from src.train import model_pipeline

config = "config.yaml"
trained_model, test_results = model_pipeline(config)