In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
import mlflow.sklearn
from azureml.core import Workspace
from azureml.core import Experiment

In [2]:
df = pd.read_csv("../data/energy_requirement_dataset.csv")

In [3]:
# Separate features and target
X = df.drop(columns=["Energy_Requirement"])
y = df["Energy_Requirement"].apply(lambda x: 1 if x == "Yes" else 0)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:

# Save the split files
prediction_file = "../data/prediction_data.csv"
X_test.to_csv(prediction_file, index=False)

In [5]:
ws = Workspace.from_config()
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

In [6]:
experiment_name = "energy-requirement-prediction"
mlflow.set_experiment(experiment_name)

2024/12/02 11:11:20 INFO mlflow.tracking.fluent: Experiment with name 'energy-requirement-prediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='', creation_time=1733137880649, experiment_id='fe4a8323-1fb3-49a4-a510-45464a5acfe8', last_update_time=None, lifecycle_stage='active', name='energy-requirement-prediction', tags={}>

In [7]:
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=200, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Support Vector Machine": SVC(probability=True, random_state=42)
}

In [8]:
# Train and log models
for model_name, model in models.items():
    with mlflow.start_run():
        # Log model name
        mlflow.log_param("model_name", model_name)

        # Train the model
        model.fit(X_train, y_train)

        # Predict on test data
        y_pred = model.predict(X_test)

        # Evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        # Log metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)

        # Log model
        mlflow.sklearn.log_model(model, artifact_path="models")

        print(f"Logged {model_name}: Accuracy={accuracy:.2f}, F1 Score={f1:.2f}")



Logged Random Forest: Accuracy=0.94, F1 Score=0.94
🏃 View run busy_heart_0y4f64c3 at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/9c15fe3b-278f-4875-b39a-d756406c9775/resourceGroups/dp-100/providers/Microsoft.MachineLearningServices/workspaces/dp_100_learning/#/experiments/fe4a8323-1fb3-49a4-a510-45464a5acfe8/runs/b4c17d25-8267-429f-a9e5-5afb87073519
🧪 View experiment at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/9c15fe3b-278f-4875-b39a-d756406c9775/resourceGroups/dp-100/providers/Microsoft.MachineLearningServices/workspaces/dp_100_learning/#/experiments/fe4a8323-1fb3-49a4-a510-45464a5acfe8
Logged Logistic Regression: Accuracy=0.83, F1 Score=0.81
🏃 View run silver_queen_fhth8lc2 at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/9c15fe3b-278f-4875-b39a-d756406c9775/resourceGroups/dp-100/providers/Microsoft.MachineLearningServices/workspaces/dp_100_learning/#/experiments/fe4a8323-1fb3-49a4-a510-45464a5acfe8/runs/99a6e7bb-7efd-42

In [9]:
# Completion message
print(f"Experiment '{experiment_name}' completed. Check Azure ML Studio for results.")

Experiment 'energy-requirement-prediction' completed. Check Azure ML Studio for results.
