In [41]:
import wandb
import sklearn
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pyprojroot import here
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score,f1_score, mean_squared_error
from sklearn.multiclass import OneVsOneClassifier
from sklearn.multioutput import MultiOutputClassifier

In [42]:
X_train = pd.read_csv(here('data/processed/X_train_scaled.csv'))
print(X_train.shape)
y_train = pd.read_csv(here('data/processed/Y_train.csv'))
print(y_train.shape)


X_val = pd.read_csv(here('data/processed/X_val_scaled.csv'))
print(X_val.shape)
y_val = pd.read_csv(here('data/processed/Y_val.csv'))
print(y_val.shape)

(7499, 6)
(7499, 6)
(1501, 6)
(1501, 6)


In [43]:
#This is just a simple model no tuning
ovr_lgr = LogisticRegression(multi_class='ovr', max_iter=1000, solver='lbfgs')
multilabel = MultiOutputClassifier(ovr_lgr)
multilabel.fit(X_train,y_train)

preds = multilabel.predict(X_val)




In [44]:
accuracy = accuracy_score(y_val,preds)
print(accuracy)

0.9653564290473018


In [45]:
#Creating a wanb Experiment
wandb.init(project="predictive-maintenance", name="simple_multilabel_LogisticRegression_baseline")

[34m[1mwandb[0m: Currently logged in as: [33mjubacochran-berkeley[0m ([33mw207-predictive-maintenance[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [46]:
accuracy = accuracy_score(y_val,preds)
mode_failures = (preds == y_val).mean(axis=0)
print(mode_failures)
# Not bad!!!!

Machine failure    0.969354
TWF                0.995336
HDF                0.992005
PWF                0.998001
OSF                0.993338
RNF                0.998001
dtype: float64


In [47]:
#logging to wandb

wandb.log({"Mode Failure Accuracy": accuracy})
for i, acc in enumerate(mode_failures):
    label_name = y_val.columns[i] if isinstance(y_val, pd.DataFrame) else f"label_{i}"
    wandb.log({f"accuracy/{label_name}": acc})

In [48]:
#Defining confusion matrix

micro_p = precision_score(y_val,preds, average='micro',zero_division=0)
micro_r = recall_score(y_val,preds, average='micro',zero_division=0)
micro_f1 = f1_score(y_val,preds, average='micro',zero_division=0) # since we have an uneven class distribution I"m curious to see this metric

In [49]:
wandb.log({"micro/precision": micro_p,
           "micro/recall":micro_r,
           "micro/f1": micro_f1
           })

In [50]:
#FP and FN
y_true = y_val.values if isinstance(y_val, pd.DataFrame) else y_val
tp = np.logical_and(preds == 1, y_true == 1).sum(axis=0)
fp = np.logical_and(preds == 1, y_true == 0).sum(axis=0)
fn = np.logical_and(preds == 0, y_true == 1).sum(axis=0)
tn = np.logical_and(preds == 0, y_true == 0).sum(axis=0)

for j in range(y_true.shape[1]):
    label = y_val.columns[j] if isinstance(y_val, pd.DataFrame) else f"label_{j}"
    wandb.log({
        f"{label}/TP": int(tp[j]),
        f"{label}/FP": int(fp[j]),
        f"{label}/FN": int(fn[j]),
        f"{label}/TN": int(tn[j])
    })

In [51]:
#Ovr Needs probabilites to interpret and model implementing here. 
probas_val = np.column_stack([est.predict_proba(X_val)[:, 1] for est in multilabel.estimators_])
mse_overall = mean_squared_error(y_true, probas_val)
wandb.log({"mse/overall": mse_overall})

for j in range(y_true.shape[1]):
    label = y_val.columns[j] if isinstance(y_val, pd.DataFrame) else f"label_{j}"
    mse_j = mean_squared_error(y_true[:, j], probas_val[:, j])
    wandb.log({f"mse/{label}": mse_j})


In [52]:
#finish run
wandb.finish()

0,1
HDF/FN,▁
HDF/FP,▁
HDF/TN,▁
HDF/TP,▁
Machine failure/FN,▁
Machine failure/FP,▁
Machine failure/TN,▁
Machine failure/TP,▁
Mode Failure Accuracy,▁
OSF/FN,▁

0,1
HDF/FN,11
HDF/FP,1
HDF/TN,1482
HDF/TP,7
Machine failure/FN,41
Machine failure/FP,5
Machine failure/TN,1446
Machine failure/TP,9
Mode Failure Accuracy,0.96536
OSF/FN,9
