In [9]:
import pandas as pd
import numpy as np
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
import joblib

# 📦 Load dataset
df = pd.read_csv("smart_replanting_dataset.csv")

# 🧱 Split features and target
X = df.drop(["Label", "Grouped_Label"], axis=1)
y = df["Grouped_Label"]

# 🔤 One-hot encode categorical features
X_encoded = pd.get_dummies(X, columns=["Region", "Soil_Type"])

# 🏷️ Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
df_encoded = pd.DataFrame(X_encoded)
df_encoded["Target"] = y_encoded

# 📊 Upsample each class to match the largest class
class_counts = df_encoded["Target"].value_counts()
max_size = class_counts.max()

dfs = []
for _, group in df_encoded.groupby("Target"):
    dfs.append(group.sample(max_size, replace=True, random_state=42))
df_upsampled = pd.concat(dfs)

# 🧪 Final features and labels
X_bal = df_upsampled.drop("Target", axis=1)
y_bal = df_upsampled["Target"]

# 🔄 Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_bal)

# ➕ Add Polynomial Features (degree 2)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# 🎲 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_poly, y_bal, test_size=0.2, stratify=y_bal, random_state=42
)

# 🧠 Train Naive Bayes
model = GaussianNB()
model.fit(X_train, y_train)

# 🧪 Evaluation
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"✅ Accuracy: {acc:.2f}")
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_, zero_division=0))






✅ Accuracy: 0.57

📊 Classification Report:
                  precision    recall  f1-score   support

DroughtResistant       0.48      1.00      0.65        13
      FastGrower       0.00      0.00      0.00        14
        Hardwood       0.67      0.14      0.24        14
        Mangrove       0.88      1.00      0.93        14
       Medicinal       0.47      1.00      0.64        14
       ShadeTree       0.57      0.29      0.38        14

        accuracy                           0.57        83
       macro avg       0.51      0.57      0.47        83
    weighted avg       0.51      0.57      0.47        83

