In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.ensemble import RandomForestClassifier
import joblib


In [2]:
dataset = pd.read_csv("battery_data_full1.csv", index_col=None)
df = dataset

# Rename columns to match your dataset
df.rename(columns={
    'Battery type (N)': 'N',
    'BATTERY.LIFE (Month)': 'BATTERY.LIFE_MONTH',
    'CAPACITY(AH)': 'CAPACITY_AH'
}, inplace=True)

# Encoding categorical variable (STATUS)
label_encoder = LabelEncoder()
df['STATUS'] = label_encoder.fit_transform(df['STATUS'])

# Separate features (X) and target variable (y)
X = df.drop(['STATUS'], axis=1)
y = df['STATUS']

# Print X (features) and y (target) for verification
print("Features (X):")
print(X)
print("\nTarget (y):")
print(y)

Features (X):
      N  VOLTAGE  CCA  S.GRAVITY  BATTERY.LIFE_MONTH  CAPACITY_AH
0    40     12.8  320      1.275                   6           40
1    40     12.8  120      1.275                   6           20
2    40     11.0  120      1.150                   6           20
3    50     12.8  375      1.275                   8           50
4    50     10.0  150      1.280                   8           10
..  ...      ...  ...        ...                 ...          ...
57  100     12.6  725      1.270                  11           88
58  100     12.7  775      1.275                  12           93
59  100     11.2  575      1.230                  10           83
60  100     12.2  675      1.260                  11           78
61  100     12.8  825      1.270                  12           88

[62 rows x 6 columns]

Target (y):
0     2
1     1
2     0
3     2
4     1
     ..
57    2
58    2
59    1
60    0
61    2
Name: STATUS, Length: 62, dtype: int32


In [3]:
dataset.columns

Index(['N', 'VOLTAGE', 'CCA', 'S.GRAVITY', 'BATTERY.LIFE_MONTH', 'CAPACITY_AH',
       'STATUS'],
      dtype='object')

In [4]:
indep_X = dataset.drop(['STATUS'], axis=1)
dep_Y = dataset['STATUS']

# Model Creation and feature_selection

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.ensemble import RandomForestClassifier
import joblib

def split_scalar(X, y):
    """Split data into train and test sets and perform scaling."""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test, scaler

def accuracy_prediction(model, X_test, y_test):
    """Calculate accuracy score for the model."""
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

# Load dataset
dataset = pd.read_csv("battery_data_full1.csv", index_col=None)
df = dataset

# Rename columns to match your dataset
df.rename(columns={
    'Battery type (N)': 'N',
    'BATTERY.LIFE (Month)': 'BATTERY.LIFE_MONTH',
    'CAPACITY(AH)': 'CAPACITY_AH'
}, inplace=True)

# Encoding categorical variable (STATUS)
label_encoder = LabelEncoder()
df['STATUS'] = label_encoder.fit_transform(df['STATUS'])

# Separate features (X) and target variable (y)
X = df.drop(['STATUS'], axis=1)
y = df['STATUS']

# Feature selection using SelectKBest
selector = SelectKBest(f_classif, k=6)
X_selected = selector.fit_transform(X, y)
selected_columns = X.columns[selector.get_support()]

# Split and scale data
X_train, X_test, y_train, y_test, scaler = split_scalar(X[selected_columns], y)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# Train Random Forest model with GridSearchCV
grid_rf = GridSearchCV(RandomForestClassifier(random_state=0), param_grid, refit=True, verbose=3, n_jobs=-1, scoring='accuracy')
grid_rf.fit(X_train, y_train)

# Get the best parameters and the best score
best_params = grid_rf.best_params_
best_score = grid_rf.best_score_
print(f"Best Parameters: {best_params}")
print(f"Best Training Accuracy: {best_score}")

# Predict on the test set
y_pred = grid_rf.predict(X_test)

# Evaluate the model
print("\nEvaluation on Test Set:")
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Print classification report
clf_report = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(clf_report)

# Calculate and print F1 Score (weighted)
f1 = f1_score(y_test, y_pred, average='weighted')
print("\nF1 Score (weighted):", f1)



Fitting 5 folds for each of 27 candidates, totalling 135 fits
Best Parameters: {'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Best Training Accuracy: 0.9577777777777777

Evaluation on Test Set:
Accuracy Score: 1.0

Confusion Matrix:
[[4 0 0]
 [0 3 0]
 [0 0 6]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         6

    accuracy                           1.00        13
   macro avg       1.00      1.00      1.00        13
weighted avg       1.00      1.00      1.00        13


F1 Score (weighted): 1.0


In [6]:
import pickle

# Assuming your GridSearchCV object is 'grid_dt' and you've already saved the best model
# Save the best model
filename = "best_battery_health_model_rf.sav"
pickle.dump(grid_rf.best_estimator_, open(filename, 'wb'))



In [7]:
import pickle
filename = "best_battery_health_model_rf.sav"
# Load the final model
loaded_model = pickle.load(open(filename, 'rb'))

In [8]:

def get_categorical_prediction(prediction):
    if prediction == 0:
        return "ok"
    elif prediction == 1:
        return "notok"
    else:
        return "Charge_retest"

# Example of using the function with input
N = float(input("N: "))
VOLTAGE = float(input("VOLTAGE: "))
CCA = float(input("CCA: "))
S_GRAVITY = float(input("S.GRAVITY: "))
BATTERY_LIFE_MONTH = float(input("BATTERY.LIFE_MONTH: "))
CAPACITY_AH = float(input("CAPACITY_AH: "))

# Predict the future maintenance requirement
future_prediction = loaded_model.predict([[N, VOLTAGE, CCA, S_GRAVITY, BATTERY_LIFE_MONTH, CAPACITY_AH]])
predicted_category = get_categorical_prediction(future_prediction[0])

print(f"Future Battery Health: {predicted_category}")

N:  40
VOLTAGE:  12.8
CCA:  320
S.GRAVITY:  1.275
BATTERY.LIFE_MONTH:  6
CAPACITY_AH:  40


Future Battery Health: Charge_retest


In [None]:
# Example input
N = 40
VOLTAGE = 12.8
CCA = 320
S_GRAVITY = 1.275
BATTERY_LIFE_MONTH = 6
CAPACITY_AH = 40


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.ensemble import RandomForestClassifier
import joblib

def split_scalar(X, y):
    """Split data into train and test sets and perform scaling."""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test, scaler

def accuracy_prediction(model, X_test, y_test):
    """Calculate accuracy score for the model."""
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

# Load dataset
dataset = pd.read_csv("battery_data_full1.csv", index_col=None)
df = dataset

# Rename columns to match your dataset
df.rename(columns={
    'Battery type (N)': 'N',
    'BATTERY.LIFE (Month)': 'BATTERY.LIFE_MONTH',
    'CAPACITY(AH)': 'CAPACITY_AH'
}, inplace=True)

# Encoding categorical variable (STATUS)
label_encoder = LabelEncoder()
df['STATUS'] = label_encoder.fit_transform(df['STATUS'])

# Separate features (X) and target variable (y)
X = df.drop(['STATUS'], axis=1)
y = df['STATUS']

# Feature selection using SelectKBest
selector = SelectKBest(f_classif, k=6)
X_selected = selector.fit_transform(X, y)
selected_columns = X.columns[selector.get_support()]

# Split and scale data
X_train, X_test, y_train, y_test, scaler = split_scalar(X[selected_columns], y)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Train Random Forest model with GridSearchCV
grid_rf = GridSearchCV(RandomForestClassifier(random_state=0), param_grid, refit=True, verbose=3, n_jobs=-1, scoring='accuracy')
grid_rf.fit(X_train, y_train)

# Get the best parameters and the best score
best_params = grid_rf.best_params_
best_score = grid_rf.best_score_
print(f"Best Parameters: {best_params}")
print(f"Best Training Accuracy: {best_score}")

# Save the best model and scaler
joblib.dump(grid_rf.best_estimator_, 'best_battery_health_model_rf.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Predict on the test set
y_pred = grid_rf.predict(X_test)

# Evaluate the model
print("\nEvaluation on Test Set:")
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Print classification report
clf_report = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(clf_report)

# Calculate and print F1 Score (weighted)
f1 = f1_score(y_test, y_pred, average='weighted')
print("\nF1 Score (weighted):", f1)

# Cross-Validation Score
cv_scores = cross_val_score(grid_rf.best_estimator_, X_selected, y, cv=5, scoring='accuracy')
print("\nCross-Validation Scores:")
print(cv_scores)
print(f"Mean CV Accuracy: {cv_scores.mean()}")

# Predict new data
def get_categorical_prediction(prediction):
    if prediction == 0:
        return "ok"
    elif prediction == 1:
        return "notok"
    elif prediction == 2:
        return "Charge_retest"
    else:
        return "Unknown"

# Load the best model and scaler
loaded_model = joblib.load('best_battery_health_model_rf.pkl')
loaded_scaler = joblib.load('scaler.pkl')

# Example input
N = 40
VOLTAGE = 12.8
CCA = 320
S_GRAVITY = 1.275
BATTERY_LIFE_MONTH = 6
CAPACITY_AH = 40

# Predict the future maintenance requirement
input_data = [[N, VOLTAGE, CCA, S_GRAVITY, BATTERY_LIFE_MONTH, CAPACITY_AH]]
input_data_scaled = loaded_scaler.transform(input_data)
future_prediction = loaded_model.predict(input_data_scaled)
predicted_category = get_categorical_prediction(future_prediction[0])

print(f"Future Battery Health: {predicted_category}")
