In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load datasets
xlsx_path = 'ev_battery_data.xlsx'
erroneous_csv_path = 'Erroneous_Gassing_Dataset.csv'

xlsx_data = pd.read_excel(xlsx_path)
erroneous_data = pd.read_csv(erroneous_csv_path)

# Data Preprocessing

# Select relevant columns from xlsx_data
relevant_columns = [
    'Battery Temperature (Start) [°C]',
    'Battery Temperature (End)',
    'Battery State of Charge (Start)',
    'Battery State of Charge (End)'
]
filtered_xlsx_data = xlsx_data[relevant_columns].dropna()

# Rename columns for clarity
filtered_xlsx_data.rename(columns={
    'Battery Temperature (Start) [°C]': 'Temperature_Start',
    'Battery Temperature (End)': 'Temperature_End',
    'Battery State of Charge (Start)': 'SOC_Start',
    'Battery State of Charge (End)': 'SOC_End'
}, inplace=True)

# Normalize the features from xlsx_data
scaler = MinMaxScaler()
normalized_xlsx_features = scaler.fit_transform(filtered_xlsx_data)

# Extract features and target from the Erroneous Gassing Dataset
erroneous_features = erroneous_data[['Temperature', 'State of Charge']].dropna().values
erroneous_target = erroneous_data.dropna()['Gassing'].values

# Adjust xlsx data length to match the CSV dataset
erroneous_aligned_features = np.tile(normalized_xlsx_features, (len(erroneous_features) // len(normalized_xlsx_features) + 1, 1))[:len(erroneous_features)]

# Combine normalized xlsx features with CSV features
erroneous_combined_features = np.hstack([erroneous_aligned_features, erroneous_features])

# Split data into train-test sets
erroneous_X_train, erroneous_X_test, erroneous_y_train, erroneous_y_test = train_test_split(erroneous_combined_features, erroneous_target, test_size=0.2, random_state=42)

# Reshape data for LSTM input (samples, timesteps, features)
erroneous_X_train = erroneous_X_train.reshape((erroneous_X_train.shape[0], 1, erroneous_X_train.shape[1]))
erroneous_X_test = erroneous_X_test.reshape((erroneous_X_test.shape[0], 1, erroneous_X_test.shape[1]))

# Define and compile the LSTM model
def create_lstm_model(input_shape):
    model = Sequential([
        LSTM(32, input_shape=input_shape, return_sequences=False),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1, activation='linear')  # Output layer for regression
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

input_shape = (erroneous_X_train.shape[1], erroneous_X_train.shape[2])
erroneous_model = create_lstm_model(input_shape)

# Train the model
erroneous_model.fit(erroneous_X_train, erroneous_y_train, epochs=10, batch_size=32, validation_data=(erroneous_X_test, erroneous_y_test), verbose=1)

# Evaluate the model
erroneous_loss, erroneous_mae = erroneous_model.evaluate(erroneous_X_test, erroneous_y_test, verbose=0)
print(f"Erroneous Dataset - Loss (MSE): {erroneous_loss}, MAE: {erroneous_mae}")

# Save the model
erroneous_model.save("erroneous_gassing_model.h5")
