In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.utils import resample
import joblib
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import RMSprop

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# -------------------- STEP 1: Load & Preprocess Data --------------------
df = pd.read_csv("smart_replanting_dataset.csv")

X = df.drop(["Label", "Grouped_Label"], axis=1)
y = df["Grouped_Label"]

# One-hot encode categorical columns
X_encoded = pd.get_dummies(X, columns=["Region", "Soil_Type"])

# Label encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
df_encoded = X_encoded.copy()
df_encoded["Target"] = y_encoded

# ⚖️ Moderate upsampling (not too extreme)
mean_size = int(df_encoded["Target"].value_counts().mean() * 1.5)
df_balanced = pd.concat([
    group.sample(mean_size, replace=True, random_state=42)
    for _, group in df_encoded.groupby("Target")
])

X_bal = df_balanced.drop("Target", axis=1)
y_bal = df_balanced["Target"]

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_bal)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_bal, test_size=0.2, stratify=y_bal, random_state=42
)

# Convert targets to categorical for Keras
num_classes = len(le.classes_)
y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes)
y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes)

# -------------------- STEP 2: Build & Train Model --------------------
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.4),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(num_classes, activation='softmax')
])

from tensorflow.keras.losses import CategoricalCrossentropy
loss_fn = CategoricalCrossentropy(label_smoothing=0.05)

model.compile(
    optimizer=RMSprop(learning_rate=0.0002),
    loss=loss_fn,
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train, y_train_cat,
    epochs=252,
    batch_size=2,
    validation_split=0.2,
    verbose=0
)

# -------------------- STEP 3: Evaluate --------------------

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

acc = accuracy_score(y_test, y_pred)
print(f"\n✅ Accuracy: {acc:.2f}")
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_, zero_division=0))

# -------------------- STEP 4: Save for Streamlit --------------------

model.save("ann_model.keras")  # Recommended Keras format
joblib.dump(scaler, "ann_scaler.pkl")
joblib.dump(le, "ann_label_encoder.pkl")
joblib.dump(X_bal.columns.tolist(), "ann_feature_columns.pkl")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step

✅ Accuracy: 0.60

📊 Classification Report:
                  precision    recall  f1-score   support

DroughtResistant       0.60      1.00      0.75        12
      FastGrower       0.71      0.38      0.50        13
        Hardwood       0.42      0.38      0.40        13
        Mangrove       0.81      1.00      0.90        13
       Medicinal       0.50      0.46      0.48        13
       ShadeTree       0.50      0.38      0.43        13

        accuracy                           0.60        77
       macro avg       0.59      0.60      0.58        77
    weighted avg       0.59      0.60      0.57        77



['ann_feature_columns.pkl']