In [1]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Load data
df = pd.read_csv('data.csv')

In [3]:
df.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,0,0,0,0,0


In [4]:
# Drop ID
df.drop(['UDI'], axis=1, inplace=True)

In [5]:
# Encode categorical variables
le = LabelEncoder()
df['Product ID'] = le.fit_transform(df['Product ID'])
df['Type'] = le.fit_transform(df['Type'])

In [6]:
df.head()

Unnamed: 0,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,7003,2,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,1003,1,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,1004,1,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,1005,1,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,1006,1,298.2,308.7,1408,40.0,9,0,0,0,0,0,0


In [7]:
# Define X and y
X = df.drop('Machine failure', axis=1)
y = df['Machine failure']

In [8]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [9]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# Logistic Regression model with balanced class weights
model = LogisticRegression(class_weight='balanced', random_state=42, max_iter=1000)
model.fit(X_train, y_train)

In [11]:
# Predict and evaluate
y_pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[2428    0]
 [   2   70]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2428
           1       1.00      0.97      0.99        72

    accuracy                           1.00      2500
   macro avg       1.00      0.99      0.99      2500
weighted avg       1.00      1.00      1.00      2500



In [12]:
import dagshub

mlflow.set_tracking_uri('https://dagshub.com/Arsalan3043/Predictive-Maintenance-Capstone-proj.mlflow')
dagshub.init(repo_owner='Arsalan3043', repo_name='Predictive-Maintenance-Capstone-proj', mlflow=True)

# mlflow.set_experiment("Logistic Regression Baseline")
mlflow.set_experiment("Logistic Regression Baseline")

<Experiment: artifact_location='mlflow-artifacts:/65cd4980f58a4f99899d6bfcbc125f60', creation_time=1748541003793, experiment_id='0', last_update_time=1748541003793, lifecycle_stage='active', name='Logistic Regression Baseline', tags={}>

In [13]:
import mlflow
import logging
import os
import time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

logging.info("Starting Mlflow run...")

with mlflow.start_run():
    start_time = time.time()

    try:
        logging.info("Logging preprocessing parameters...")
        mlflow.log_param("scaling", "StandardScaler")
        mlflow.log_param("categorical_encoding", "LabelEncoder")
        mlflow.log_param("test_size", 0.25)

         # Initialize Logistic Regression
        logging.info("Initializing Logistic Regression...")
        model = LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)

        # Train model
        logging.info("Training model...")
        model.fit(X_train, y_train)
        logging.info("Model training complete")

        # Log model hyperparameters
        logging.info("logging model parameters")
        mlflow.log_param("model", "Logistic Regression")
        mlflow.log_param("class_weight", "balanced")
        mlflow.log_param("max_iter", 1000)

        # Predictions
        logging.info("Predicting...")
        y_pred = model.predict(X_test)

        # Evaluation
        logging.info("Calculating Evaluation metrics...")
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        # Log metrics
        logging.info("logging evaluation metrics...")
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)

        # Log model
        logging.info("Logging model to MLflow...")
        mlflow.sklearn.log_model(model, "model")

        # Execution time
        end_time = time.time()
        logging.info(f"Model training and logging completed in {end_time - start_time:.2f} seconds.")

        # Print the results for verification
        logging.info(f"Accuracy: {accuracy}")
        logging.info(f"Precision: {precision}")
        logging.info(f"Recall: {recall}")
        logging.info(f"F1 Score: {f1}")

    except Exception as e:
        logging.error(f"An error occurred: {e}", exc_info=True)

2025-06-01 09:43:14,787 - INFO - Starting Mlflow run...
2025-06-01 09:43:15,889 - INFO - Logging preprocessing parameters...
2025-06-01 09:43:18,316 - INFO - Initializing Logistic Regression...
2025-06-01 09:43:18,319 - INFO - Training model...
2025-06-01 09:43:18,353 - INFO - Model training complete
2025-06-01 09:43:18,353 - INFO - logging model parameters
2025-06-01 09:43:21,795 - INFO - Predicting...
2025-06-01 09:43:21,802 - INFO - Calculating Evaluation metrics...
2025-06-01 09:43:21,838 - INFO - logging evaluation metrics...
2025-06-01 09:43:24,977 - INFO - Logging model to MLflow...
2025-06-01 09:43:44,781 - INFO - Model training and logging completed in 28.89 seconds.
2025-06-01 09:43:44,782 - INFO - Accuracy: 0.9992
2025-06-01 09:43:44,783 - INFO - Precision: 1.0
2025-06-01 09:43:44,785 - INFO - Recall: 0.9722222222222222
2025-06-01 09:43:44,787 - INFO - F1 Score: 0.9859154929577465


🏃 View run wise-crab-361 at: https://dagshub.com/Arsalan3043/Predictive-Maintenance-Capstone-proj.mlflow/#/experiments/0/runs/f5dd57fd94a2421d80874e432575d8ee
🧪 View experiment at: https://dagshub.com/Arsalan3043/Predictive-Maintenance-Capstone-proj.mlflow/#/experiments/0
