**Random Forest Classifier - Machines 90 to 100**

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

In [2]:
# Import our input dataset
all_tele_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Project/Resources/all_tele_main.csv')
all_tele_df = all_tele_df.sort_values(by=['datetime']).drop_duplicates(subset=['datetime', 'machineid'], keep='first').reset_index(drop=True)
all_tele_df.head()

  all_tele_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Project/Resources/all_tele_main.csv')


Unnamed: 0,datetime,machineid,machinemodel,machineage,volt,rotate,pressure,vibration,errorid,compmaint,compfail
0,2015-01-01 06:00:00,34,model4,10,181.859394,483.215401,82.463735,37.849856,0,comp2,0
1,2015-01-01 06:00:00,41,model4,9,162.314459,424.540949,129.113722,52.591978,0,0,0
2,2015-01-01 06:00:00,39,model4,0,150.138449,421.747438,100.174935,40.367037,0,0,0
3,2015-01-01 06:00:00,10,model3,10,158.421261,500.830885,119.750673,45.571344,0,0,0
4,2015-01-01 06:00:00,45,model3,14,191.64967,383.200891,85.630577,52.169106,0,0,0


In [None]:
all_tele_df.count()

datetime        876100
machineid       876100
machinemodel    876100
machineage      876100
volt            876100
rotate          876100
pressure        876100
vibration       876100
errorid         876100
compmaint       876100
compfail        876100
dtype: int64

In [3]:
#remove unecessary columns
all_tele_df = all_tele_df.drop(columns=['datetime'])
all_tele_df.head()

Unnamed: 0,machineid,machinemodel,machineage,volt,rotate,pressure,vibration,errorid,compmaint,compfail
0,34,model4,10,181.859394,483.215401,82.463735,37.849856,0,comp2,0
1,41,model4,9,162.314459,424.540949,129.113722,52.591978,0,0,0
2,39,model4,0,150.138449,421.747438,100.174935,40.367037,0,0,0
3,10,model3,10,158.421261,500.830885,119.750673,45.571344,0,0,0
4,45,model3,14,191.64967,383.200891,85.630577,52.169106,0,0,0


In [4]:
# filter for Machine 90 to 100 only
all_tele_df = all_tele_df.loc[(all_tele_df['machineid'] >= 90)]

In [5]:
# use get_dummies to convert categorical variables
all_tele_df = pd.get_dummies(all_tele_df)
all_tele_df

Unnamed: 0,machineid,machineage,volt,rotate,pressure,vibration,compfail,machinemodel_model1,machinemodel_model2,machinemodel_model3,...,errorid_error2,errorid_error3,errorid_error4,errorid_error5,compmaint_0,compmaint_0.1,compmaint_comp1,compmaint_comp2,compmaint_comp3,compmaint_comp4
19,98,20,153.300953,453.352244,86.073228,47.791685,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
34,99,14,168.596133,384.747105,110.921131,41.944692,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
40,96,10,155.273542,440.499900,113.503483,33.131131,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
53,100,5,161.587466,399.879713,105.314528,38.559006,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
55,97,14,143.613163,495.457168,96.905947,32.684637,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
876061,97,14,159.607756,458.143799,100.945230,40.011599,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
876068,99,14,168.439623,427.990029,107.899979,44.193151,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
876072,91,17,165.002636,473.410700,75.559302,38.272293,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
876086,98,20,165.717790,501.520194,114.553412,37.696504,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0


In [6]:
# split preprocessed data into feature and target arrays
y = all_tele_df.compfail.values
X = all_tele_df.drop(columns='compfail').values

# split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [7]:
# Creating StandardScaler instance
scaler = StandardScaler()

In [8]:
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)

In [9]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=500, random_state=78)

In [11]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [12]:
# Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

In [13]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [14]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,24067,1
Actual 1,18,7


Accuracy Score : 0.9992113892001826
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     24068
           1       0.88      0.28      0.42        25

    accuracy                           1.00     24093
   macro avg       0.94      0.64      0.71     24093
weighted avg       1.00      1.00      1.00     24093

