In [32]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score , classification_report
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import randint
from imblearn.over_sampling import SMOTE


data = pd.read_csv('predictive_maintenance.csv')
df = pd.DataFrame(data)

#separate X and y
X = df.drop(columns = ['Target'])
y = df['Target']

#get dummies
categorical_columns = X.select_dtypes(include=['object']).columns.tolist()
numerical_columns = X.select_dtypes(include=['int64','float64']).columns.tolist()

df_encoded = pd.get_dummies(X,columns=categorical_columns)

X = pd.concat([df_encoded,df[numerical_columns]],axis=1)

#scale
scaler = StandardScaler()

X = scaler.fit_transform(X)

#split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

#define a model
classifier = RandomForestClassifier()

param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': randint(2, 11),
    'min_samples_leaf': randint(1, 5),
    'max_features': [1.0, 'sqrt'],
    'random_state': [42]
}

random_search = RandomizedSearchCV(classifier,param_distributions = param_dist, cv = 5, scoring = 'accuracy')

random_search.fit(X_train,y_train)

best_params = random_search.best_params_

best_classifier = RandomForestClassifier(**best_params)

best_classifier.fit(X_train,y_train)

#make predictions
y_train_pred = best_classifier.predict(X_train)
y_test_pred = best_classifier.predict(X_test)

#check accuracy
train_accuracy = accuracy_score(y_train,y_train_pred)
test_accuracy = accuracy_score(y_test,y_test_pred)

#cross Validation score
cv = cross_val_score(best_classifier,X,y,cv = 5,scoring='accuracy')

print('train accuracy = ',train_accuracy)
print('test accuracy = ',test_accuracy)
print('Cross Val accuracy = ',np.mean(cv))

print('Classification Report: ')
print(classification_report(y_test,y_test_pred))

train accuracy =  0.9994172494172494
test accuracy =  0.963
Cross Val accuracy =  0.8897
Classification Report: 
              precision    recall  f1-score   support

           0       0.99      0.97      0.98      1939
           1       0.44      0.75      0.55        61

    accuracy                           0.96      2000
   macro avg       0.72      0.86      0.77      2000
weighted avg       0.98      0.96      0.97      2000

