In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt

In [2]:
import pandas as pd

# Load the dataset
dataset1 = pd.read_csv("predictive_maintenance.csv")

# Perform one-hot encoding for the "Type" column
type_dummies = pd.get_dummies(dataset1['Type'], prefix='Type')

# Concatenate the encoded columns back to the original DataFrame
df2 = pd.concat([dataset1, type_dummies], axis=1)

# Drop the original "Type" column if needed
df2.drop('Type', axis=1, inplace=True)



In [3]:
dataset=df2
dataset

Unnamed: 0,UDI,Product ID,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type,Type_H,Type_L,Type_M
0,1,M14860,298.1,308.6,1551,42.8,0,0,No Failure,0,0,1
1,2,L47181,298.2,308.7,1408,46.3,3,0,No Failure,0,1,0
2,3,L47182,298.1,308.5,1498,49.4,5,0,No Failure,0,1,0
3,4,L47183,298.2,308.6,1433,39.5,7,0,No Failure,0,1,0
4,5,L47184,298.2,308.7,1408,40.0,9,0,No Failure,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,M24855,298.8,308.4,1604,29.5,14,0,No Failure,0,0,1
9996,9997,H39410,298.9,308.4,1632,31.8,17,0,No Failure,1,0,0
9997,9998,M24857,299.0,308.6,1645,33.4,22,0,No Failure,0,0,1
9998,9999,H39412,299.0,308.7,1408,48.5,25,0,No Failure,1,0,0


#  Define the table each column

Battery Voltage (V): The voltage of the battery in volts (V).

Battery Age (Years): The age of the battery in years.

Battery Age (Months): The age of the battery in months.

Specific Gravity: Specific gravity is a measure of the density of a battery's electrolyte compared to the density of water. It indicates the state of charge
of the battery.
    
Capacity Range (Ah): The capacity range of the battery in ampere-hours (Ah). It indicates the amount of charge that a battery can store.

Open Circuit Voltage (V): The voltage of the battery when there is no load connected to it, measured in volts (V).

Visual Inspection_Damage present: Indicates if there is any visible damage present on the battery (1 for Yes, 0 for No).
                                                                                                   
Visual Inspection_No damage: Indicates if there is no visible damage present on the battery (1 for Yes, 0 for No).
                                                                                             
Starting_Smooth cranking: Indicates if the battery provides smooth cranking during starting (1 for Yes, 0 for No).
                                                                                             
Starting_Struggling cranking: Indicates if the battery struggles during starting (1 for Yes, 0 for No).
                                                                                  
Lights and Electronics_Flickering lights: Indicates if there are flickering lights or electronics when the battery is in use (1 for Yes, 0 for No).
                                                                                                                              
Lights and Electronics_Normal: Indicates if lights and electronics work normally when the battery is in use (1 for Yes, 0 for No).
                                                                                                             
Maintenance History_Neglected: Indicates if the battery maintenance has been neglected (1 for Yes, 0 for No).
                                                                                        
Maintenance History_No maintenance: Indicates if there has been no maintenance done on the battery (1 for Yes, 0 for No).
                                                                                                    
Maintenance History_Regular maintenance: Indicates if regular maintenance has been performed on the battery (1 for Yes, 0 for No).
                                                                                                             
Acid Level Check_Normal: Indicates if the acid level check is normal (1 for Yes, 0 for No).
                                                                                                             
Charge Needed_Yes: Indicates if the battery needs to be charged (1 for Yes, 0 for No).
                                                                                                             
Maintenance Required_Yes: Indicates if maintenance is required for the battery (1 for Yes, 0 for No).
                                                                                                    
Status_Low Charge: Indicates if the battery status is low charge (1 for Yes, 0 for No).
                                                                                                    
Status_Weak Battery: Indicates if the battery status is weak (1 for Yes, 0 for No).

In [4]:
dataset.columns

Index(['UDI', 'Product ID', 'Air temperature [K]', 'Process temperature [K]',
       'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Target',
       'Failure Type', 'Type_H', 'Type_L', 'Type_M'],
      dtype='object')

In [5]:
# Define independent variables (features) and dependent variable (target)
indep_X = df2.drop(["UDI", "Product ID", "Failure Type"], axis=1)
dep_Y = df2['Failure Type']


# Feature Selection

# Recursive Feature Elimination

In [6]:
def split_scalar(indep_X,dep_Y):
        X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)

        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)    
        return X_train, X_test, y_train, y_test
    

In [7]:
def r2_prediction(regressor,X_test,y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2=r2_score(y_test,y_pred)
    return r2

In [8]:
def Linear(X_train,y_train,X_test):       
        from sklearn.linear_model import LinearRegression
        regressor = LinearRegression()
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2 

In [9]:
def Decision(X_train,y_train,X_test):
        from sklearn.tree import DecisionTreeRegressor
        regressor = DecisionTreeRegressor(random_state = 0)
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2 

In [10]:
def random(X_train,y_train,X_test):       
        from sklearn.ensemble import RandomForestRegressor
        regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2

In [11]:
def xgboost(X_train,y_train,X_test):       
        from xgboost import XGBRegressor
        regressor = XGBRegressor(n_jobs=5,learning_rate=0.1,max_depth=10,random_state=1)
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return r2

In [14]:
import pandas as pd
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import LabelEncoder

def rfeFeature(indep_X, dep_Y, n):
    rfelist = []
    colnames_list = []  
    r2_values = []  
    
    lin = LinearRegression()
    dec = DecisionTreeRegressor(random_state=0)
    rf = RandomForestRegressor(n_estimators=10, random_state=0)
    xgb = XGBRegressor(n_jobs=5, learning_rate=0.1, max_depth=10, random_state=1)

    rfemodellist = [lin, dec, rf, xgb]

    for model in rfemodellist:
        log_rfe = RFE(estimator=model, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)

        selected_columns = [col for col, selected in zip(indep_X.columns, log_rfe.support_) if selected]
        colnames_list.append(selected_columns)

        X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(log_rfe_feature), dep_Y, test_size=0.3, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        r2 = r2_score(y_test, y_pred)
        r2_values.append(r2)

    return rfelist, colnames_list, r2_values

# Define independent variables (features) and dependent variable (target)
indep_X = df2.drop(["UDI", "Product ID", "Failure Type"], axis=1)
dep_Y = df2['Failure Type']

# Encode the dependent variable
label_encoder = LabelEncoder()
dep_Y_encoded = label_encoder.fit_transform(dep_Y)


# Call the function with your data
rfelist, colnames_list, r2_values = rfeFeature(indep_X,dep_Y_encoded, 6)

# Print the selected column names and R2 values for each model
for model_name, selected_columns, r2_value in zip(["Linear", "Decision", "Random", "XGBoost"], colnames_list, r2_values):
    print(f"Model: {model_name}")
    print("Selected Columns:", selected_columns)
    print(f"R2 Value: {r2_value}\n")


Model: Linear
Selected Columns: ['Air temperature [K]', 'Process temperature [K]', 'Target', 'Type_H', 'Type_L', 'Type_M']
R2 Value: 0.2913038074891299

Model: Decision
Selected Columns: ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Target']
R2 Value: 0.5174045706457437

Model: Random
Selected Columns: ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Target']
R2 Value: 0.7402880449770044

Model: XGBoost
Selected Columns: ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Target']
R2 Value: 0.6781777122082393



# Model Creation

In [22]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import pickle

# Now you can use rfelist and colnames_list to train the Decision Tree model with the selected features
# Perform GridSearchCV for hyperparameter tuning
X_train, X_test, y_train, y_test = train_test_split(rfelist[1],dep_Y_encoded, test_size=0.25, random_state=0)
param_grid_rf = {
    'n_estimators': [10, 50],
    'criterion': ['gini', 'entropy'],
    'max_features': ['sqrt', 'log2'],
    'bootstrap': [True, False]
}

# Reduce n_jobs to 1 to avoid memory issues
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, refit=True, verbose=3, n_jobs=1, scoring='f1_weighted')
grid_rf.fit(X_train, y_train)

re_rf = grid_rf.cv_results_
y_predict_rf = grid_rf.predict(X_test)

cm_rf = confusion_matrix(y_test, y_predict_rf)
clf_report_rf = classification_report(y_test, y_predict_rf)
f1_macro_rf = f1_score(y_test, y_predict_rf, average='weighted')

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=10;, score=0.994 total time=   0.0s
[CV 2/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=10;, score=0.993 total time=   0.0s
[CV 3/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=10;, score=0.992 total time=   0.0s
[CV 4/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=10;, score=0.992 total time=   0.0s
[CV 5/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=10;, score=0.990 total time=   0.0s
[CV 1/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=50;, score=0.993 total time=   0.2s
[CV 2/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=50;, score=0.995 total time=   0.2s
[CV 3/5] END bootstrap=True, criterion=gini, max_features=sqrt, n_estimators=50;, score=0.992 total time=   0.2s
[CV 4/5] END bootstrap=True, criter

[CV 3/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=10;, score=0.992 total time=   0.0s
[CV 4/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=10;, score=0.992 total time=   0.0s
[CV 5/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=10;, score=0.992 total time=   0.0s
[CV 1/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=50;, score=0.994 total time=   0.3s
[CV 2/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=50;, score=0.994 total time=   0.3s
[CV 3/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=50;, score=0.993 total time=   0.2s
[CV 4/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=50;, score=0.992 total time=   0.3s
[CV 5/5] END bootstrap=False, criterion=entropy, max_features=log2, n_estimators=50;, score=0.993 total time=   0.3s


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [24]:

print(cm_rf)

[[  26    0    0    0    0    0]
 [   1 2410    1    1    0    0]
 [   0    0   22    2    0    1]
 [   1    0    0   20    0    0]
 [   0    4    0    0    0    0]
 [   0    0    2    0    0    9]]


In [None]:
print(clf_report )

In [None]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(X_test)[:,1])


In [None]:
table=pd.DataFrame.from_dict(re)
table

In [None]:
import pickle


In [None]:
# Save the best model
filename = "Decision_tree_BATTERY_final.sav"
pickle.dump(grid, open(filename, 'wb'))


In [None]:
# Load the saved model
loaded_model = pickle.load(open(filename, 'rb'))


# Battery input data

In [None]:
def get_categorical_prediction(prediction):
    return "Yes" if prediction == 1 else "No"
# Make predictions
Open_Circuit_Voltage_input = float(input("Open Circuit Voltage (V): "))
Maintenance_History_Neglected_input = float(input("Maintenance History_Neglected: "))
Maintenance_History_No_maintenance_input = float(input("Maintenance History_No maintenance: "))
Maintenance_History_Regular_maintenance_input = float(input("Maintenance History_Regular maintenance: "))
Acid_Level_Check_Normal_input = float(input("Acid Level Check_Normal: "))

# Predict the future maintenance requirement
future_prediction = loaded_model.predict([[Open_Circuit_Voltage_input,Maintenance_History_Neglected_input,Maintenance_History_No_maintenance_input,Maintenance_History_Regular_maintenance_input,Acid_Level_Check_Normal_input]])

# Convert numerical prediction to categorical label
future_prediction_categorical = get_categorical_prediction(future_prediction[0])

# Maintenance Required

In [None]:
# Print the categorical prediction
print("Future Prediction (Maintenance Required):", future_prediction_categorical)
