<a href="https://colab.research.google.com/github/Kshitijasharma/Li_ion_battery_ML/blob/main/B0006.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import pandas as pd
import scipy.io as sio
import numpy as np


# Load the .mat file
def load_mat_file(file_name, key):
    """Loads the .mat file and extracts the specified key."""
    mat_file = sio.loadmat(file_name)
    if key not in mat_file:
        raise KeyError(f"Key '{key}' not found in the .mat file.")
    return mat_file[key]


def flatten_data(data):
    """Flattens data to ensure it is 1-dimensional."""
    return np.array(data).ravel()


def process_cycle(cycle_id, cycle_data, features_cols, cycles_cols):
    """Processes a single cycle and returns a DataFrame."""
    # Initialize a DataFrame for the current cycle
    cycle_type = cycle_data[0]
    if cycle_type not in features_cols:
        print(f"Unknown cycle type: {cycle_type}, skipping...")
        return None

    # Get features for the specific cycle type
    features = features_cols[cycle_type]
    features_x_cycle = cycle_data[-1]
    tmp = pd.DataFrame()

    # Populate features
    for feature, data in zip(features, features_x_cycle):
        flattened_data = flatten_data(data)
        tmp[feature] = pd.Series(flattened_data)

    # Add columns common to the cycle measurements
    tmp['id_cycle'] = cycle_id
    for k, col in enumerate(cycles_cols):
        tmp[col] = cycle_data[k]

    return cycle_type, tmp


def to_df(mat_db):
    """Returns one pd.DataFrame per cycle type."""
    # Features common for every cycle
    cycles_cols = ['type', 'ambient_temperature', 'time']

    # Features monitored during the cycle
    features_cols = {
        'charge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                   'Current_charge', 'Voltage_charge', 'Time'],
        'discharge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                      'Current_charge', 'Voltage_charge', 'Time', 'Capacity'],
        'impedance': ['Sense_current', 'Battery_current', 'Current_ratio',
                      'Battery_impedance', 'Rectified_impedance', 'Re', 'Rct']
    }

    # Define one pd.DataFrame per cycle type
    dfs = {key: [] for key in features_cols.keys()}

    # Get every cycle
    num_cycles = mat_db[0][0][0].shape[1]
    print(f'Number of cycles: {num_cycles}')
    cycles = [[row.flat[0] for row in line] for line in mat_db[0][0][0][0]]

    # Process each cycle
    for cycle_id, cycle_data in enumerate(cycles):
        result = process_cycle(cycle_id, cycle_data, features_cols, cycles_cols)
        if result is not None:
            cycle_type, tmp = result
            dfs[cycle_type].append(tmp)

    # Concatenate the lists of DataFrames into final DataFrames
    return {cycle_type: pd.concat(dfs[cycle_type], ignore_index=True) for cycle_type in dfs}


# Main Execution
mat_db = load_mat_file('B0006.mat', 'B0006')
dfs = to_df(mat_db)

# Display the first few rows of the 'charge' DataFrame
print(dfs['charge'].head())


Number of cycles: 616
   Voltage_measured  Current_measured  Temperature_measured  Current_charge  \
0          3.864624          0.000082             24.682214          -0.001   
1          3.469113         -4.059185             24.695407          -4.060   
2          3.994806          1.513750             24.711491           1.506   
3          4.005888          1.511389             24.739672           1.506   
4          4.012944          1.510817             24.753180           1.506   

   Voltage_charge    Time  id_cycle    type  ambient_temperature    time  
0          -0.007   0.000         0  charge                   24  2008.0  
1           1.558   2.532         0  charge                   24  2008.0  
2           4.710   5.500         0  charge                   24  2008.0  
3           4.726   8.344         0  charge                   24  2008.0  
4           4.737  11.125         0  charge                   24  2008.0  


CNN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Use 'discharge' cycle data (since it contains Capacity)
df = dfs['discharge']

# Drop missing values
df = df.dropna()

# Select relevant features
features = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Time',
            'Voltage_charge', 'Current_charge', 'ambient_temperature']
X = df[features].values
y = df['Capacity'].values

# Reshape X for CNN (CNN expects 3D input: samples, timesteps, features)
X = X.reshape(X.shape[0], X.shape[1], 1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[1])).reshape(X_test.shape)

# CNN Model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    Dropout(0.2),
    Conv1D(filters=32, kernel_size=3, activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1)  # Regression output
])

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

# Train Model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)

# Predictions
y_pred_cnn = model.predict(X_test)

# Evaluation metrics
mae_cnn = mean_absolute_error(y_test, y_pred_cnn)
mse_cnn = mean_squared_error(y_test, y_pred_cnn)
rmse_cnn = np.sqrt(mse_cnn)
r2_cnn = r2_score(y_test, y_pred_cnn)

print("\nConvolutional Neural Network (CNN) Performance:")
print(f"Mean Absolute Error (MAE): {mae_cnn:.4f}")
print(f"Mean Squared Error (MSE): {mse_cnn:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_cnn:.4f}")
print(f"R² Score: {r2_cnn:.4f}")
print(f"Model Accuracy: {r2_cnn * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - loss: 2.1267 - mae: 1.4325 - val_loss: 1.3388 - val_mae: 1.1210
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.8907 - mae: 0.8916 - val_loss: 0.3051 - val_mae: 0.4800
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.3390 - mae: 0.4032 - val_loss: 0.3217 - val_mae: 0.4219
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.2431 - mae: 0.3555 - val_loss: 0.1682 - val_mae: 0.3362
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.1878 - mae: 0.3264 - val_loss: 0.1427 - val_mae: 0.3131
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.1372 - mae: 0.2965 - val_loss: 0.1126 - val_mae: 0.2794
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.1243 - mae: 0

KNN


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Use 'discharge' cycle data (since it contains Capacity)
df = dfs['discharge']

# Drop missing values
df = df.dropna()

# Select relevant features
features = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Time',
            'Voltage_charge', 'Current_charge', 'ambient_temperature']
X = df[features].values
y = df['Capacity'].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# KNN Model
knn = KNeighborsRegressor(n_neighbors=5, weights='distance', metric='euclidean')  # K=5, weighted by distance
knn.fit(X_train, y_train)

# Predictions
y_pred_knn = knn.predict(X_test)

# Evaluation metrics
mae_knn = mean_absolute_error(y_test, y_pred_knn)
mse_knn = mean_squared_error(y_test, y_pred_knn)
rmse_knn = np.sqrt(mse_knn)
r2_knn = r2_score(y_test, y_pred_knn)

# Print performance metrics
print("\nK-Nearest Neighbors (KNN) Regression Performance:")
print(f"Mean Absolute Error (MAE): {mae_knn:.4f}")
print(f"Mean Squared Error (MSE): {mse_knn:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_knn:.4f}")
print(f"R² Score: {r2_knn:.4f}")
print(f"Model Accuracy: {r2_knn * 100:.2f}%")



K-Nearest Neighbors (KNN) Regression Performance:
Mean Absolute Error (MAE): 0.0942
Mean Squared Error (MSE): 0.0145
Root Mean Squared Error (RMSE): 0.1206
R² Score: 0.7758
Model Accuracy: 77.58%


FNN

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Preprocess the dataset (using the 'charge' DataFrame from your earlier processing)
charge_df = dfs['charge'].dropna()  # Remove rows with missing data

# Define the target variable (binary classification)
charge_df['label'] = (charge_df['Voltage_measured'] > charge_df['Voltage_measured'].mean()).astype(int)

# Define features and target variable
features = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge']
X = charge_df[features]
y = charge_df['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the Feedforward Neural Network (FNN)
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),  # First hidden layer with 64 neurons
    Dropout(0.2),  # Dropout layer to prevent overfitting
    Dense(32, activation='relu'),  # Second hidden layer with 32 neurons
    Dropout(0.2),  # Dropout layer to prevent overfitting
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model on the test data
y_pred = (model.predict(X_test) > 0.5).astype(int)  # Predict labels (0 or 1) based on threshold of 0.5

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print evaluation metrics
print("Feedforward Neural Network Performance Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 4ms/step - accuracy: 0.9942 - loss: 0.0221 - val_accuracy: 0.9994 - val_loss: 0.0011
Epoch 2/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 3ms/step - accuracy: 0.9995 - loss: 0.0013 - val_accuracy: 0.9998 - val_loss: 7.1120e-04
Epoch 3/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3ms/step - accuracy: 0.9996 - loss: 0.0011 - val_accuracy: 0.9994 - val_loss: 0.0015
Epoch 4/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 3ms/step - accuracy: 0.9997 - loss: 9.1479e-04 - val_accuracy: 0.9999 - val_loss: 1.9881e-04
Epoch 5/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 3ms/step - accuracy: 0.9998 - loss: 6.2775e-04 - val_accuracy: 0.9998 - val_loss: 4.2802e-04
Epoch 6/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3ms/step - accuracy: 0.9997 - loss: 7.1280e-04 - val_accuracy: 0.9997 - va

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import scipy.io as sio

# Load the .mat file
def load_mat_file(file_name, key):
    """Loads the .mat file and extracts the specified key."""
    mat_file = sio.loadmat(file_name)
    if key not in mat_file:
        raise KeyError(f"Key '{key}' not found in the .mat file.")
    return mat_file[key]

# Function to flatten nested lists/arrays
def flatten_data(data):
    return np.array(data).ravel()

# Function to process individual cycle
def process_cycle(cycle_id, cycle_data, features_cols, cycles_cols):
    cycle_type = cycle_data[0]
    if cycle_type not in features_cols:
        print(f"Unknown cycle type: {cycle_type}, skipping...")
        return None

    features = features_cols[cycle_type]
    features_x_cycle = cycle_data[-1]
    tmp = pd.DataFrame()

    for feature, data in zip(features, features_x_cycle):
        flattened_data = flatten_data(data)
        tmp[feature] = pd.Series(flattened_data)

    tmp['id_cycle'] = cycle_id
    for k, col in enumerate(cycles_cols):
        tmp[col] = cycle_data[k]

    return cycle_type, tmp

# Function to process the full dataset
def process_mat_to_df(mat_db):
    cycles_cols = ['type', 'ambient_temperature', 'time']
    features_cols = {
        'charge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                   'Current_charge', 'Voltage_charge', 'Time'],
        'discharge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                      'Current_charge', 'Voltage_charge', 'Time', 'Capacity'],
        'impedance': ['Sense_current', 'Battery_current', 'Current_ratio',
                      'Battery_impedance', 'Rectified_impedance', 'Re', 'Rct']
    }

    dfs = {key: [] for key in features_cols.keys()}
    num_cycles = mat_db[0][0][0].shape[1]
    print(f'Number of cycles: {num_cycles}')
    cycles = [[row.flat[0] for row in line] for line in mat_db[0][0][0][0]]

    for cycle_id, cycle_data in enumerate(cycles):
        result = process_cycle(cycle_id, cycle_data, features_cols, cycles_cols)
        if result is not None:
            cycle_type, tmp = result
            dfs[cycle_type].append(tmp)

    return {cycle_type: pd.concat(dfs[cycle_type], ignore_index=True) for cycle_type in dfs}

# Load and process B0006.mat dataset
mat_db = load_mat_file('B0006.mat', 'B0006')
dfs = process_mat_to_df(mat_db)

# Use 'discharge' cycle data (since it contains Capacity)
df = dfs['discharge'].dropna()

# Select features (Avoid using 'Time' if it's highly correlated with Capacity)
features = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge']
X = df[features].values
y = df['Capacity'].values

# Train-test split (Ensure shuffling)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

# Standardize features
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Standardize target variable
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).ravel()

# Build Fully Connected Neural Network (FNN)
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),  # Prevent overfitting
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1)  # Regression output
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)

# Predictions
y_pred_fnn = model.predict(X_test)

# Inverse transform predictions to get original scale
y_pred_fnn = scaler_y.inverse_transform(y_pred_fnn)
y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1))

# Evaluation metrics
mae_fnn = mean_absolute_error(y_test_original, y_pred_fnn)
mse_fnn = mean_squared_error(y_test_original, y_pred_fnn)
rmse_fnn = np.sqrt(mse_fnn)
r2_fnn = r2_score(y_test_original, y_pred_fnn)

# Print results
print("\nFully Connected Neural Network (FNN) Performance:")
print(f"Mean Absolute Error (MAE): {mae_fnn:.4f}")
print(f"Mean Squared Error (MSE): {mse_fnn:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_fnn:.4f}")
print(f"R² Score: {r2_fnn:.4f}")
print(f"Model Accuracy: {r2_fnn * 100:.2f}%")


Number of cycles: 616
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - loss: 1.1696 - mae: 0.8968 - val_loss: 1.0728 - val_mae: 0.8664
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.9336 - mae: 0.8227 - val_loss: 0.8888 - val_mae: 0.8000
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.7796 - mae: 0.7566 - val_loss: 0.7494 - val_mae: 0.7362
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.6768 - mae: 0.7053 - val_loss: 0.6563 - val_mae: 0.6836
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.6006 - mae: 0.6758 - val_loss: 0.5704 - val_mae: 0.6280
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.5378 - mae: 0.6133 - val_loss: 0.5076 - val_mae: 0.5768
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.4484 - mae: 0.5563 - val

RNN (LSTM approach)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Preprocess the dataset (using the 'charge' DataFrame from your earlier processing)
charge_df = dfs['charge'].dropna()  # Remove rows with missing data

# Define the target variable (binary classification)
charge_df['label'] = (charge_df['Voltage_measured'] > charge_df['Voltage_measured'].mean()).astype(int)

# Define features and target variable
features = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge']
X = charge_df[features].values
y = charge_df['label'].values

# Reshape data to 3D for LSTM [samples, time_steps, features]
# Here, we're treating each sample as a sequence of length 1 (just for demonstration)
X = np.reshape(X, (X.shape[0], 1, X.shape[1]))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)

# Build the LSTM model
model = Sequential([
    LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    Dropout(0.2),
    LSTM(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model on the test data
y_pred = (model.predict(X_test) > 0.5).astype(int)  # Predict labels (0 or 1) based on threshold of 0.5

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print evaluation metrics
print("LSTM Performance Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")


  super().__init__(**kwargs)


Epoch 1/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 6ms/step - accuracy: 0.9927 - loss: 0.0407 - val_accuracy: 0.9993 - val_loss: 0.0014
Epoch 2/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 6ms/step - accuracy: 0.9996 - loss: 0.0012 - val_accuracy: 0.9999 - val_loss: 5.0636e-04
Epoch 3/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 6ms/step - accuracy: 0.9996 - loss: 9.0681e-04 - val_accuracy: 0.9995 - val_loss: 0.0011
Epoch 4/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6ms/step - accuracy: 0.9996 - loss: 8.5265e-04 - val_accuracy: 1.0000 - val_loss: 2.9657e-04
Epoch 5/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 6ms/step - accuracy: 0.9998 - loss: 6.5785e-04 - val_accuracy: 0.9999 - val_loss: 2.4842e-04
Epoch 6/10
[1m10824/10824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 6ms/step - accuracy: 0.9997 - loss: 6.0520e-04 - val_accura



[1m3383/3383[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step
LSTM Performance Metrics:
Accuracy: 1.0000
Mean Absolute Error (MAE): 0.0000
Mean Squared Error (MSE): 0.0000
Root Mean Squared Error (RMSE): 0.0030
R-squared (R²): 0.9999


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Load & preprocess dataset
charge_df = dfs['charge'].dropna()

# Ensure balanced classes
threshold = charge_df['Voltage_measured'].quantile(0.5)  # Use median instead of mean
charge_df['label'] = (charge_df['Voltage_measured'] > threshold).astype(int)

# Define features & target
features = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge']
X = charge_df[features].values
y = charge_df['label'].values

# Apply time steps (Use 5 previous readings)
time_steps = 5
X_seq, y_seq = [], []
for i in range(len(X) - time_steps):
    X_seq.append(X[i:i+time_steps])
    y_seq.append(y[i+time_steps])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42, stratify=y_seq)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)

# Build the LSTM model
model = Sequential([
    LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    Dropout(0.2),
    LSTM(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Predictions
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("\nLSTM Model Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


  super().__init__(**kwargs)


Epoch 1/10
[1m13530/13530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 11ms/step - accuracy: 0.9098 - loss: 0.2202 - val_accuracy: 0.9289 - val_loss: 0.1618
Epoch 2/10
[1m13530/13530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 10ms/step - accuracy: 0.9359 - loss: 0.1535 - val_accuracy: 0.9398 - val_loss: 0.1388
Epoch 3/10
[1m13530/13530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 11ms/step - accuracy: 0.9420 - loss: 0.1439 - val_accuracy: 0.9526 - val_loss: 0.1148
Epoch 4/10
[1m13530/13530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 11ms/step - accuracy: 0.9482 - loss: 0.1223 - val_accuracy: 0.9571 - val_loss: 0.1022
Epoch 5/10
[1m13530/13530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 11ms/step - accuracy: 0.9513 - loss: 0.1137 - val_accuracy: 0.9605 - val_loss: 0.0942
Epoch 6/10
[1m13530/13530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 10ms/step - accuracy: 0.9557 - loss: 0.1076 - val_accuracy: 0.8967 - val

SVM Code

In [None]:
from sklearn.svm import LinearSVC  # Use LinearSVC for faster training
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Preprocess the dataset (using the 'charge' DataFrame from your earlier processing)
charge_df = dfs['charge'].dropna()  # Remove rows with missing data

# Define the target variable (binary classification)
charge_df['label'] = (charge_df['Voltage_measured'] > charge_df['Voltage_measured'].mean()).astype(int)

# Define features and target variable
features = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge']
X = charge_df[features]
y = charge_df['label']

# Split the dataset into training and testing sets (use a smaller subset if needed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the LinearSVC model (faster for large datasets)
svm_model = LinearSVC(max_iter=1000, random_state=42)
svm_model.fit(X_train, y_train)

# Predict on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print evaluation metrics
print("SVM Performance Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import scipy.io as sio
import numpy as np

# Load the .mat file
def load_mat_file(file_name, key):
    mat_file = sio.loadmat(file_name)
    if key not in mat_file:
        raise KeyError(f"Key '{key}' not found in the .mat file.")
    return mat_file[key]

def flatten_data(data):
    return np.array(data).ravel()

def process_cycle(cycle_id, cycle_data, features_cols, cycles_cols):
    cycle_type = cycle_data[0]
    if cycle_type not in features_cols:
        print(f"Unknown cycle type: {cycle_type}, skipping...")
        return None

    features = features_cols[cycle_type]
    features_x_cycle = cycle_data[-1]
    tmp = pd.DataFrame()

    for feature, data in zip(features, features_x_cycle):
        flattened_data = flatten_data(data)
        tmp[feature] = pd.Series(flattened_data)

    tmp['id_cycle'] = cycle_id
    for k, col in enumerate(cycles_cols):
        tmp[col] = cycle_data[k]

    return cycle_type, tmp

def to_df(mat_db):
    cycles_cols = ['type', 'ambient_temperature', 'time']
    features_cols = {
        'charge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                   'Current_charge', 'Voltage_charge', 'Time'],
        'discharge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                      'Current_charge', 'Voltage_charge', 'Time', 'Capacity'],
        'impedance': ['Sense_current', 'Battery_current', 'Current_ratio',
                      'Battery_impedance', 'Rectified_impedance', 'Re', 'Rct']
    }

    dfs = {key: [] for key in features_cols.keys()}
    num_cycles = mat_db[0][0][0].shape[1]
    print(f'Number of cycles: {num_cycles}')
    cycles = [[row.flat[0] for row in line] for line in mat_db[0][0][0][0]]

    for cycle_id, cycle_data in enumerate(cycles):
        result = process_cycle(cycle_id, cycle_data, features_cols, cycles_cols)
        if result is not None:
            cycle_type, tmp = result
            dfs[cycle_type].append(tmp)

    return {cycle_type: pd.concat(dfs[cycle_type], ignore_index=True) for cycle_type in dfs}

# Main Execution
mat_db = load_mat_file('B0006.mat', 'B0006')
dfs = to_df(mat_db)

# Prepare the 'charge' DataFrame for logistic regression
charge_df = dfs['charge'].dropna()  # Remove rows with missing data

# Example: Create a binary classification label based on a feature (e.g., Voltage_measured > threshold)
charge_df['label'] = (charge_df['Voltage_measured'] > charge_df['Voltage_measured'].mean()).astype(int)

# Define features and labels
X = charge_df[['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge']]
y = charge_df['label']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Number of cycles: 616
Accuracy: 0.9967478172495033
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99     20249
           1       1.00      1.00      1.00     87986

    accuracy                           1.00    108235
   macro avg       0.99      1.00      0.99    108235
weighted avg       1.00      1.00      1.00    108235



In [None]:
import pandas as pd
import scipy.io as sio
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, classification_report, mean_absolute_error,
    mean_squared_error, r2_score
)

# Load the .mat file
def load_mat_file(file_name, key):
    mat_file = sio.loadmat(file_name)
    if key not in mat_file:
        raise KeyError(f"Key '{key}' not found in the .mat file.")
    return mat_file[key]

def flatten_data(data):
    return np.array(data).ravel()

def process_cycle(cycle_id, cycle_data, features_cols, cycles_cols):
    cycle_type = cycle_data[0]
    if cycle_type not in features_cols:
        print(f"Unknown cycle type: {cycle_type}, skipping...")
        return None

    features = features_cols[cycle_type]
    features_x_cycle = cycle_data[-1]
    tmp = pd.DataFrame()

    for feature, data in zip(features, features_x_cycle):
        flattened_data = flatten_data(data)
        tmp[feature] = pd.Series(flattened_data)

    tmp['id_cycle'] = cycle_id
    for k, col in enumerate(cycles_cols):
        tmp[col] = cycle_data[k]

    return cycle_type, tmp

def to_df(mat_db):
    cycles_cols = ['type', 'ambient_temperature', 'time']
    features_cols = {
        'charge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                   'Current_charge', 'Voltage_charge', 'Time'],
        'discharge': ['Voltage_measured', 'Current_measured', 'Temperature_measured',
                      'Current_charge', 'Voltage_charge', 'Time', 'Capacity'],
        'impedance': ['Sense_current', 'Battery_current', 'Current_ratio',
                      'Battery_impedance', 'Rectified_impedance', 'Re', 'Rct']
    }

    dfs = {key: [] for key in features_cols.keys()}
    num_cycles = mat_db[0][0][0].shape[1]
    print(f'Number of cycles: {num_cycles}')
    cycles = [[row.flat[0] for row in line] for line in mat_db[0][0][0][0]]

    for cycle_id, cycle_data in enumerate(cycles):
        result = process_cycle(cycle_id, cycle_data, features_cols, cycles_cols)
        if result is not None:
            cycle_type, tmp = result
            dfs[cycle_type].append(tmp)

    return {cycle_type: pd.concat(dfs[cycle_type], ignore_index=True) for cycle_type in dfs}

# Load & process dataset
mat_db = load_mat_file('B0006.mat', 'B0006')
dfs = to_df(mat_db)

# Prepare the 'charge' DataFrame
charge_df = dfs['charge'].dropna()

# Binary classification label based on median instead of mean (for better balance)
threshold = charge_df['Voltage_measured'].median()
charge_df['label'] = (charge_df['Voltage_measured'] > threshold).astype(int)

# Define features and labels
X = charge_df[['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge']]
y = charge_df['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train logistic regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print results
print("\nLogistic Regression Model Performance 🔹")
print(f"Accuracy: {accuracy:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f" Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Number of cycles: 616

Logistic Regression Model Performance 🔹
Accuracy: 0.9107
Mean Absolute Error (MAE): 0.0893
 Mean Squared Error (MSE): 0.0893
Root Mean Squared Error (RMSE): 0.2988
R-squared (R²): 0.6430

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.89      0.91     54118
           1       0.89      0.93      0.91     54117

    accuracy                           0.91    108235
   macro avg       0.91      0.91      0.91    108235
weighted avg       0.91      0.91      0.91    108235

