In [0]:
%pip install mlflow prophet scikit-learn pandas
dbutils.library.restartPython()

In [0]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from mlflow.models.signature import infer_signature 

# 1. Load Data
pdf = spark.table("agriculture.silver.crop_recommendation").toPandas()

# 2. Prepare Features
X = pdf[['nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']]
y = pdf['crop_name']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. MLflow Tracking
mlflow.set_experiment("/Users/skmanjunath16@gmail.com/Agriculture_Experiments") 
experiment_name = "Agronomist_Viability_Model"

with mlflow.start_run(run_name=experiment_name) as run:
    # Train Model
    rf = RandomForestClassifier(n_estimators=100, max_depth=10)
    rf.fit(X_train, y_train)
    
    # Evaluate
    predictions = rf.predict(X_test)
    acc = accuracy_score(y_test, predictions)
    print(f"Agronomist Model Accuracy: {acc:.2f}")
    
    # 4. Create the Signature (The "Contract")
    signature = infer_signature(X_train, predictions)
    
    # 5. Log & Register with Signature
    mlflow.sklearn.log_model(
        sk_model=rf, 
        name="model", 
        registered_model_name="agriculture.silver.crop_classifier", 
        signature=signature,
        input_example=X_train.head(1)
    )


print("Agronomist Model trained, signed, and registered successfully.")