In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt

In [2]:
dataset = pd.read_csv("preprocessed_Battery_data.csv", index_col=None)
df2 = dataset
df2 = pd.get_dummies(df2, drop_first=True)

indep_X = df2.drop(['Maintenance Required_Yes', 'Charge Needed_Yes', 'Status_Low Charge', 'Status_Weak Battery'], axis=1)
dep_Y = df2['Maintenance Required_Yes']

In [3]:
dataset.columns

Index(['Battery Voltage (V)', 'Battery Age (Years)', 'Battery Age (Months)',
       'Specific Gravity', 'Capacity Range (Ah)', 'Open Circuit Voltage (V)',
       'Visual Inspection_Damage present', 'Visual Inspection_No damage',
       'Starting_Smooth cranking', 'Starting_Struggling cranking',
       'Lights and Electronics_Flickering lights',
       'Lights and Electronics_Normal', 'Maintenance History_Neglected',
       'Maintenance History_No maintenance',
       'Maintenance History_Regular maintenance', 'Acid Level Check_Normal',
       'Charge Needed_Yes', 'Maintenance Required_Yes', 'Status_Low Charge',
       'Status_Weak Battery'],
      dtype='object')

In [4]:
indep_X = dataset.drop(['Maintenance Required_Yes', 'Charge Needed_Yes', 'Status_Low Charge', 'Status_Weak Battery'], axis=1)
dep_Y = dataset['Maintenance Required_Yes']

# Feature Selection

# Recursive Feature Elimination

In [5]:
def split_scalar(indep_X,dep_Y):
        X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)

        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)    
        return X_train, X_test, y_train, y_test
    

In [6]:
def r2_prediction(regressor,X_test,y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2=r2_score(y_test,y_pred)
    return r2

In [7]:
def Linear(X_train,y_train,X_test):       
        from sklearn.linear_model import LinearRegression
        regressor = LinearRegression()
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2 

In [8]:
def Decision(X_train,y_train,X_test):
        from sklearn.tree import DecisionTreeRegressor
        regressor = DecisionTreeRegressor(random_state = 0)
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2 

In [9]:
def random(X_train,y_train,X_test):       
        from sklearn.ensemble import RandomForestRegressor
        regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2

In [10]:
def xgboost(X_train,y_train,X_test):       
        from xgboost import XGBRegressor
        regressor = XGBRegressor(n_jobs=5,learning_rate=0.1,max_depth=10,random_state=1)
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return r2

In [11]:
def rfeFeature(indep_X, dep_Y, n):
    rfelist = []
    colnames_list = []  
    # List to store column names for each model
    r2_values = []  
    # List to store R2 values for each model

    from sklearn.linear_model import LinearRegression
    lin = LinearRegression()

    from sklearn.tree import DecisionTreeRegressor
    dec = DecisionTreeRegressor(random_state=0)

    from sklearn.ensemble import RandomForestRegressor
    rf = RandomForestRegressor(n_estimators=10, random_state=0)

    from xgboost import XGBRegressor
    xgb = XGBRegressor(n_jobs=5, learning_rate=0.1, max_depth=10, random_state=1)

    rfemodellist = [lin, dec, rf, xgb]

    for model in rfemodellist:
        log_rfe = RFE(estimator=model, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)

        # Get the column names selected by RFE
        selected_columns = [col for col, selected in zip(indep_X.columns, log_rfe.support_) if selected]
        colnames_list.append(selected_columns)

        # Fit the model and calculate and store the R2 value
        X_train, X_test, y_train, y_test = split_scalar(pd.DataFrame(log_rfe_feature), dep_Y)
        model.fit(X_train, y_train)  # Fit the model
        r2 = r2_prediction(model, X_test, y_test)
        r2_values.append(r2)

    return rfelist, colnames_list, r2_values

# Call the function with your data
rfelist, colnames_list, r2_values = rfeFeature(indep_X, dep_Y, 5)

# Print the selected column names and R2 values for each model
for model_name, selected_columns, r2_value in zip(["Linear", "Decision", "Random", "XGBoost"], colnames_list, r2_values):
    print(f"Model: {model_name}")
    print("Selected Columns:", selected_columns)
    print(f"R2 Value: {r2_value}\n")

  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):


Model: Linear
Selected Columns: ['Specific Gravity', 'Open Circuit Voltage (V)', 'Lights and Electronics_Flickering lights', 'Maintenance History_Neglected', 'Maintenance History_Regular maintenance']
R2 Value: 0.8564596395591606

Model: Decision
Selected Columns: ['Open Circuit Voltage (V)', 'Maintenance History_Neglected', 'Maintenance History_No maintenance', 'Maintenance History_Regular maintenance', 'Acid Level Check_Normal']
R2 Value: 1.0

Model: Random
Selected Columns: ['Battery Voltage (V)', 'Battery Age (Years)', 'Open Circuit Voltage (V)', 'Maintenance History_Neglected', 'Maintenance History_Regular maintenance']
R2 Value: 0.982

Model: XGBoost
Selected Columns: ['Open Circuit Voltage (V)', 'Maintenance History_Neglected', 'Maintenance History_No maintenance', 'Maintenance History_Regular maintenance', 'Acid Level Check_Normal']
R2 Value: 0.999999845175724

