In [12]:
# -------------------- IMPORT LIBRARIES --------------------
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

# -------------------- LOAD DATA --------------------
# Replace with your CSV path if saved locally
df = pd.read_csv("data/irrigation_prediction.csv")  # <-- your dataset file

# -------------------- DEFINE FEATURES & TARGET --------------------
target = "Irrigation_Need"
features = [
    "Soil_Type","Soil_pH","Soil_Moisture","Organic_Carbon","Electrical_Conductivity",
    "Temperature_C","Humidity","Rainfall_mm","Sunlight_Hours","Wind_Speed_kmh",
    "Crop_Type","Crop_Growth_Stage","Season","Irrigation_Type","Water_Source",
    "Field_Area_hectare","Mulching_Used","Previous_Irrigation_mm","Region"
]

X = df[features]
y = df[target]

# -------------------- LABEL ENCODING FOR CATEGORICAL FEATURES --------------------
cat_cols = X.select_dtypes(include="object").columns.tolist()
encoder_dict = {}

for col in cat_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    encoder_dict[col] = le

# Encode target variable
target_le = LabelEncoder()
y = target_le.fit_transform(y)

# Save target encoder
joblib.dump(target_le, "target_encoder.pkl")

# -------------------- SCALE NUMERICAL FEATURES --------------------
num_cols = X.select_dtypes(include=np.number).columns.tolist()
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# -------------------- SPLIT DATA --------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------- TRAIN XGBOOST CLASSIFIER --------------------
model = XGBClassifier(
    objective='multi:softmax',
    num_class=len(np.unique(y)),
    eval_metric='mlogloss',
    use_label_encoder=False,
    random_state=42
)
model.fit(X_train, y_train)

# -------------------- EVALUATE MODEL --------------------
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# -------------------- SAVE ARTIFACTS --------------------
joblib.dump(model, "irrigation_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(encoder_dict, "encoder_dict.pkl")

print("\n✅ Model, Scaler, and Encoders saved successfully!")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_index

Accuracy: 0.9965

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.98        59
           1       1.00      1.00      1.00      1204
           2       1.00      0.99      1.00       737

    accuracy                           1.00      2000
   macro avg       0.99      1.00      0.99      2000
weighted avg       1.00      1.00      1.00      2000


Confusion Matrix:
 [[  59    0    0]
 [   0 1202    2]
 [   2    3  732]]

✅ Model, Scaler, and Encoders saved successfully!
