In [1]:
import mlflow
import mlflow.sklearn
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


In [2]:
# Run Scripts
%run scripts/data_preprocessing.py
%run scripts/data_exploration.py
%run scripts/data_preparation.py
%run scripts/model_training.py
%run scripts/model_evaluation.py



In [3]:
# Read the data
data = pd.read_csv("Data/MachineFailureData.csv")
print("Data Read Successfully")
# Explore the data
explore_data(data)
print("Data Explored Successfully")
# Preprocess the data
data = preprocess_data(data)
print("Data Preprocessed Successfully")
# Prepare the data
X_train , X_test, y_train, y_test = prepare_data(data)
print("Data Prepared Successfully")


# Standard Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train the model
# Train Model 
parameters = {
    "n_estimators": 100,
    "max_depth": 3,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 42,
}

rf_model = train_model(X_train , y_train,parameters)
print("Model Trained Successfully")
# Get the parameters of the trained model
params = rf_model.get_params()

# Evaluate the model
y_pred = rf_model.predict(X_test)
print("Model Evaluated Successfully")
# Score the model
mae, mse, rmse, r2, accuracy, classifier_report = score_model(rf_model,X_test,y_test,y_pred)
print("Model Scored Successfully")
# Set the experiment name
experiment_name = "RandomForestClassifier"

# If this is not set, a unique name will be auto-generated for a run
run_name = "30% Split"

# Define an artifact path that the model will be saved to.
artifact_path = "splits"

# Check if the experiment already exists; if not, create it
try:
    mlflow.create_experiment(experiment_name)
except mlflow.exceptions.MlflowException:
    print(f"Experiment '{experiment_name}' already exists.")
# Set the experiment

mlflow.set_experiment(experiment_name)

# Start a run within the specified experiment
with mlflow.start_run(run_name=run_name) as run:
    print("MLflow:")
    # Log the model parameters
    mlflow.log_params(params)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", classifier_report[str(1)]['precision'])
    mlflow.log_metric("recall",classifier_report[str(1)]['recall'] )
    mlflow.log_metric("f1_score",classifier_report[str(1)]['f1-score'] )
    mlflow.log_metric("support",classifier_report[str(1)]['support'])
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    
    # Save the model
    mlflow.sklearn.log_model(rf_model, "rf_model")


Data Read Successfully
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   UDI                      10000 non-null  int64  
 1   Product ID               10000 non-null  object 
 2   Type                     10000 non-null  object 
 3   Air temperature [K]      10000 non-null  float64
 4   Process temperature [K]  10000 non-null  float64
 5   Rotational speed [rpm]   10000 non-null  int64  
 6   Torque [Nm]              10000 non-null  float64
 7   Tool wear [min]          10000 non-null  int64  
 8   Machine failure          10000 non-null  int64  
 9   TWF                      10000 non-null  int64  
 10  HDF                      10000 non-null  int64  
 11  PWF                      10000 non-null  int64  
 12  OSF                      10000 non-null  int64  
 13  RNF                      10000 non-null  int64  
dtype

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



MLflow:


SyntaxError: invalid syntax (2558674368.py, line 1)

In [8]:
git.refresh()

NameError: name 'git' is not defined