In [11]:
# Step 1: Import necessary libraries
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
import joblib

# Step 2: Load and preprocess data
current_dir = os.getcwd()
path = os.path.join(current_dir, './plant_growth_data.csv')
data = pd.read_csv(path, encoding="ISO-8859-1")
df = data.copy()

def preprocessing(df):
    for i in list(df.columns):
        if pd.api.types.is_object_dtype(df[i]):
            df[i] = df[i].astype('category')
            df[i] = df[i].cat.codes

    X = df.drop('Growth_Milestone', axis=1)
    y = df['Growth_Milestone']
    return X, y

trainset, testset = train_test_split(df, test_size=0.2, random_state=0)
X_train, y_train = preprocessing(trainset)
X_test, y_test = preprocessing(testset)

# Step 3: Train the model
clf = LogisticRegressionCV(cv=10, random_state=0, scoring='accuracy', penalty='l2').fit(X_train, y_train)

# Save the trained model
model_path = os.path.join(current_dir, 'ml_model.pkl')
joblib.dump(clf, model_path)

def predict(data_array):
    # Load the saved model
    model = joblib.load(model_path)
    
    # Create a DataFrame for input data with feature names
    feature_names = list(X_train.columns)  # Assuming X_train was used for training
    data_df = pd.DataFrame([data_array], columns=feature_names)
    
    # Get the predicted class
    predicted_class = model.predict(data_df)[0]
    
    # Get the probabilities for each class
    probabilities = model.predict_proba(data_df)[0]
    
    # Return prediction and probabilities as a dictionary
    return {
        "prediction": int(predicted_class),
        "probabilities": {f"class_{i}": float(prob) for i, prob in enumerate(probabilities)}
    }


# Test the prediction function
test_data = [1, 2, 3, 4, 5, 6]  # Example input
print(f"Prediction: {predict(test_data)}")

Prediction: {'prediction': 1, 'probabilities': {'class_0': 0.37641837176371895, 'class_1': 0.623581628236281}}
