In [1]:
import sys
sys.path.append('../src/')

In [2]:
from train import *

In [3]:
# Main script
if __name__ == "__main__":
    # Paths and configurations
    DATA_PATH = "../data/energy_data/input_data/energy_data.csv"
    PREDICTION_FILE = "../data/energy_data/input_data/prediction_data.csv"
    WORKSPACE_CONFIG = "./config.json"
    EXPERIMENT_NAME = "energy-requirement-prediction"

    # Step 1: Data Preparation
    data_prep = DataPreparation(DATA_PATH)
    df = data_prep.load_data()
    X, y = data_prep.preprocess_data(df)
    X_train, X_test, y_train, y_test = data_prep.split_data(X, y)

    # Step 2: Save prediction data
    DataSaver.save_data(X_test, PREDICTION_FILE)

    # Step 3: Experimentation
    models = {
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=44),
        "Logistic Regression": LogisticRegression(max_iter=200, random_state=44),
        "Decision Tree": DecisionTreeClassifier(random_state=44),
        "Support Vector Machine": SVC(probability=True, random_state=44)
    }

    experiment_manager = ExperimentManager(EXPERIMENT_NAME, WORKSPACE_CONFIG)
    experiment_manager.train_and_log_models(models, X_train, y_train, X_test, y_test)

Data loaded successfully from ../data/energy_data/input_data/energy_data.csv
Data preprocessing completed.
Data split into training and testing sets.
Prediction data saved at: ../data/energy_data/input_data/prediction_data.csv
Experiment 'energy-requirement-prediction' is set up in MLflow.
Training and logging model: Random Forest
Model: Random Forest, Metrics: {'accuracy': 0.95, 'precision': 0.9431818181818182, 'recall': 0.9431818181818182, 'f1_score': 0.9431818181818182}
Training and logging model: Logistic Regression
Model: Logistic Regression, Metrics: {'accuracy': 0.835, 'precision': 0.8089887640449438, 'recall': 0.8181818181818182, 'f1_score': 0.8135593220338984}
Training and logging model: Decision Tree
Model: Decision Tree, Metrics: {'accuracy': 0.895, 'precision': 0.8850574712643678, 'recall': 0.875, 'f1_score': 0.88}
Training and logging model: Support Vector Machine
Model: Support Vector Machine, Metrics: {'accuracy': 0.92, 'precision': 0.9090909090909091, 'recall': 0.909090

  from google.protobuf import service as _service
