In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import os
import sys

# --- File Paths Setup --- #
try:
    # If running as a standalone script
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    # If running in an environment where __file__ is not defined (e.g., Jupyter Notebook)
    script_dir = os.getcwd()

data_file_path = os.path.join(script_dir, "..", "data", "pump_data.csv")
model_file_path = os.path.join(script_dir, "..", "scripts", "pump_activation_model.pkl")

# --- Dataset Loading --- #
if not os.path.exists(data_file_path):
    raise FileNotFoundError(f"Dataset not found: {data_file_path}")

data = pd.read_csv(data_file_path)

# --- Feature Engineering --- #
if set(["Soil Moisture", "Temperature", "Air Humidity", "Pump Data"]).issubset(data.columns):
    # Features and target variable
    X = data[["Soil Moisture", "Temperature", "Air Humidity"]]
    y = data["Pump Data"]
else:
    raise ValueError("Dataset does not contain required columns: Soil Moisture, Temperature, Air Humidity, Pump Data.")

# --- Split Dataset --- #
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Model Training --- #
def train_model(X_train, y_train):
    """Train the Random Forest model and save it."""
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    # Save the model
    joblib.dump(model, model_file_path)
    print(f"Model saved as {model_file_path}")
    return model

# --- Prediction Function --- #
def predict(data):
    """
    Predict pump activation (0=Off, 1=On) using the trained model.
    :param data: A dictionary or DataFrame containing feature values.
    :return: Predicted pump status (0 or 1).
    """
    # Load the model
    if not os.path.exists(model_file_path):
        raise FileNotFoundError(f"Model file not found: {model_file_path}")
    
    model = joblib.load(model_file_path)

    # Convert input data to DataFrame
    if isinstance(data, dict):
        data = pd.DataFrame(data, index=[0])
    elif not isinstance(data, pd.DataFrame):
        raise ValueError("Input data must be a dictionary or DataFrame.")

    # Ensure correct feature columns
    expected_columns = ["Soil Moisture", "Temperature", "Air Humidity"]
    if not set(expected_columns).issubset(data.columns):
        raise ValueError(f"Input data must contain these columns: {expected_columns}")

    # Make prediction
    prediction = model.predict(data)
    return prediction[0]

# --- Evaluation --- #
def evaluate_model(model, X_test, y_test):
    """Evaluate the model's performance."""
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy:.2f}")
    # print("Classification Report:")
    # print(classification_report(y_test, y_pred))

# --- Main Program --- #
if __name__ == "__main__":
    if len(sys.argv) == 4:
        # Command-line input
        try:
            input_data = {
                "Soil Moisture": float(sys.argv[1]),
                "Temperature": float(sys.argv[2]),
                "Air Humidity": float(sys.argv[3])
            }
            result = predict(input_data)
            print(f"Pump Activation Prediction: {result} (0=Off, 1=On)")
        except Exception as e:
            print(f"Error: {e}")
    else:
        # Train and evaluate the model
        print("Training the model...")
        trained_model = train_model(X_train, y_train)
        print("Evaluating the model...")
        evaluate_model(trained_model, X_test, y_test)

        # # Example Prediction
        # print("\nExample Prediction:")
        # example_data = {
        #     "Soil Moisture": 700,
        #     "Temperature": 28,
        #     "Air Humidity": 70
        # }
        # Get inputs from the user
        soil_moisture = float(input("Enter Soil Moisture: "))
        temperature = float(input("Enter Temperature: "))
        air_humidity = float(input("Enter Air Humidity: "))
        user_input = {
            "Soil Moisture": soil_moisture,
            "Temperature": temperature,
            "Air Humidity": air_humidity
        }


        # Make the prediction
        prediction = predict(user_input)
        print(f"Predicted Pump Activation: {prediction} (0=Off, 1=On)")



Training the model...
Model saved as e:\Epic22\ML\models\..\scripts\pump_activation_model.pkl
Evaluating the model...
Model Accuracy: 1.00
Predicted Pump Activation: 1 (0=Off, 1=On)
