In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore")

# === Load CSV ===
df = pd.read_csv("your_file.csv")  # Change to your actual filename

# === Rename sensor columns ===
sensor_rename_map = {
    'sensor_0': 'soil_moisture_1',
    'sensor_1': 'soil_moisture_2',
    'sensor_2': 'soil_moisture_3',
    'sensor_3': 'air_temperature',
    'sensor_4': 'humidity',
    'sensor_5': 'light_intensity',
    'sensor_6': 'soil_temperature',
    'sensor_7': 'rain_sensor',
    'sensor_8': 'ph_sensor',
    'sensor_9': 'water_level',
    'sensor_10': 'wind_speed',
    'sensor_11': 'wind_direction',
    'sensor_12': 'battery_voltage',
    'sensor_13': 'solar_radiation',
    'sensor_14': 'leaf_wetness',
    'sensor_15': 'evapotranspiration',
    'sensor_16': 'canopy_temperature',
    'sensor_17': 'chlorophyll',
    'sensor_18': 'water_salinity',
    'sensor_19': 'nitrate_level',
}
df = df.rename(columns=sensor_rename_map)

# === Rename parcel to zones ===
parcel_rename_map = {
    0: 'rice_field_zone',
    1: 'wheat_crop_zone',
    2: 'vegetable_plot_zone'
}
df['parcel'] = df['parcel'].map(parcel_rename_map)

# === Feature & Label Split ===
X = df.drop(columns=['parcel'])
y = df['parcel']

# === Encode Labels ===
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# === Train-Test Split ===
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# === SMOTE Oversampling ===
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

# === Random Forest ===
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
rf.fit(X_train_res, y_train_res)
rf_preds = rf.predict(X_test)
print("🌳 Random Forest Classification Report:")
print(classification_report(y_test, rf_preds, target_names=le.classes_))

# === XGBoost ===
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', scale_pos_weight=1, random_state=42)
xgb.fit(X_train_res, y_train_res)
xgb_preds = xgb.predict(X_test)
print("⚡ XGBoost Classification Report:")
print(classification_report(y_test, xgb_preds, target_names=le.classes_))

# === Confusion Matrix ===
def plot_confusion(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=le.classes_, yticklabels=le.classes_)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"{model_name} Confusion Matrix")
    plt.tight_layout()
    plt.show()

plot_confusion(y_test, rf_preds, "Random Forest")
plot_confusion(y_test, xgb_preds, "XGBoost")

# === Feature Importance Plot ===
def plot_feature_importance(model, feature_names, title):
    importance = model.feature_importances_
    indices = importance.argsort()[::-1]
    
    plt.figure(figsize=(10, 6))
    plt.title(title)
    plt.bar(range(len(importance)), importance[indices], align="center", color="green")
    plt.xticks(range(len(importance)), [feature_names[i] for i in indices], rotation=45)
    plt.tight_layout()
    plt.show()

plot_feature_importance(rf, X.columns, "🌳 Random Forest Feature Importance")
plot_feature_importance(xgb, X.columns, "⚡ XGBoost Feature Importance")

# === Cross-Validation ===
rf_cv_scores = cross_val_score(rf, X_train_res, y_train_res, cv=5)
xgb_cv_scores = cross_val_score(xgb, X_train_res, y_train_res, cv=5)

print(f"🔁 RF CV Accuracy (5-Fold): {rf_cv_scores.mean():.2f} ± {rf_cv_scores.std():.2f}")
print(f"🔁 XGB CV Accuracy (5-Fold): {xgb_cv_scores.mean():.2f} ± {xgb_cv_scores.std():.2f}")

# === Export Trained Models ===
joblib.dump(rf, "random_forest_model.pkl")
joblib.dump(xgb, "xgboost_model.pkl")
joblib.dump(le, "label_encoder.pkl")

print("✅ Models and Label Encoder saved for real-time usage.")


ModuleNotFoundError: No module named 'imblearn'