#  04_model_training.ipynb
## Train a Random Forest model using terrain features (DEM, Slope, Aspect)


# v1 and v2
## v1-Too safe — predicts only non-fire everywhere (100% non-fire bias).
## v2-Balanced — detects some fires with improved precision, but still misses many fires

## Step 1: Import required libraries


In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib

## Step 2: Load feature matrix and labels

In [2]:
X = np.load('C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/X_train_final.npy')  # Shape: (n_samples, 3)
y = np.load('C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/y_train_final.npy')  # Shape: (n_samples,)


In [3]:
print(f" Input Feature Matrix Shape: {X.shape}")
print(f" Labels Shape: {y.shape}")
print(f" Fire Pixels: {np.sum(y == 1)}")
print(f" Non-Fire Pixels: {np.sum(y == 0)}")


 Input Feature Matrix Shape: (379135, 3)
 Labels Shape: (379135,)
 Fire Pixels: 11957
 Non-Fire Pixels: 367178


## Step 3: Split into training and testing sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

##  Step 4: Initialize and train Random Forest

In [18]:
clf = RandomForestClassifier(
    n_estimators=200,         # more trees → better generalization
    max_depth=15,             # limit depth to reduce overfitting
    min_samples_split=5,      # avoid splits on tiny noisy subsets
    min_samples_leaf=3,       # same idea
    random_state=42,
    n_jobs=-1                 # use all cores
)

clf.fit(X_train, y_train)
print("✅ Model training complete!")


✅ Model training complete!


## Step 5: Evaluate model on test set

In [None]:
y_pred = clf.predict(X_test)

print("\n📈 Classification Report:")
print(classification_report(y_test, y_pred))

print("🧾 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

## Step 6: Save the trained model to disk

In [17]:
joblib.dump(clf, 'C:/Users/Siddharth Tomar/forest-fire-simulation/model/training/random_forest_terrain_only_v2_balanced.pkl')
print("Model saved to model/training/random_forest_terrain_only.pkl")

Model saved to model/training/random_forest_terrain_only.pkl


# Above is the basic code to train the bot now below i will just update the code to test different ways to updaet the code  

# v3
## Aggressive — catches most fires but creates too many false fire alarms

In [21]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import joblib

# Load full data
X = np.load("C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/X_train_final.npy")
y = np.load("C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/y_train_final.npy")

# Separate fire and non-fire pixels
fire_mask = y == 1
nonfire_mask = y == 0

X_fire = X[fire_mask]
y_fire = y[fire_mask]

X_nonfire = X[nonfire_mask]
y_nonfire = y[nonfire_mask]

# Downsample non-fire to match fire
np.random.seed(42)
indices = np.random.choice(len(X_nonfire), size=len(X_fire), replace=False)
X_nonfire_down = X_nonfire[indices]
y_nonfire_down = y_nonfire[indices]

# Combine balanced dataset
X_balanced = np.vstack([X_fire, X_nonfire_down])
y_balanced = np.concatenate([y_fire, y_nonfire_down])

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)

# Train model with tuned hyperparameters
clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=15,
    min_samples_split=5,
    min_samples_leaf=3,
    random_state=42,
    n_jobs=-1
)
clf.fit(X_train, y_train)



In [22]:
# Evaluate on original full test set (not balanced)
X_full_test = X
y_full_test = y
y_pred = clf.predict(X_full_test)

print("\n📈 Classification Report:")
print(classification_report(y_full_test, y_pred, digits=2))

print("\n🧾 Confusion Matrix:")
print(confusion_matrix(y_full_test, y_pred))



📈 Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.38      0.54    367178
           1       0.04      0.84      0.08     11957

    accuracy                           0.39    379135
   macro avg       0.51      0.61      0.31    379135
weighted avg       0.96      0.39      0.53    379135


🧾 Confusion Matrix:
[[137854 229324]
 [  1942  10015]]


In [24]:

# ✅ Save model
joblib.dump(clf, "C:/Users/Siddharth Tomar/forest-fire-simulation/model/training/random_forest_v3_downsampled_tuned.pkl")
print("\nSaved: v3 model to disk.")



Saved: v3 model to disk.


# v4 
## Highly sensitive — detects 85% of fire pixels but wrongly flags thousands of safe areas as fire-prone

In [25]:
#  Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import joblib  # For saving model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

#  Load the feature matrix and labels
X = np.load("C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/X_train_final.npy")
y = np.load("C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/y_train_final.npy")

print(" Loaded Data")
print(" Feature shape:", X.shape)
print(" Labels shape:", y.shape)
print(" Fire pixels:", np.sum(y == 1))
print(" Non-fire pixels:", np.sum(y == 0))


 Loaded Data
 Feature shape: (379135, 3)
 Labels shape: (379135,)
 Fire pixels: 11957
 Non-fire pixels: 367178


In [27]:

#  Train Random Forest Classifier (v4 - hybrid approach)
clf = RandomForestClassifier(
    n_estimators=200,             # Number of trees
    max_depth=18,                 # Slightly restricted tree depth to prevent overfitting
    min_samples_leaf=4,           # Minimum leaf size to add regularization
    min_samples_split=5,          # Minimum samples to split node
    class_weight='balanced_subsample',  # Dynamic balancing at tree level
    n_jobs=-1,                    # Use all CPU cores
    random_state=42               # For reproducibility
)

print("🚀 Training model v4 (Hybrid)...")
clf.fit(X, y)
print("✅ Model training complete!")


🚀 Training model v4 (Hybrid)...
✅ Model training complete!


In [28]:

# 📈 Evaluate the model
y_pred = clf.predict(X)

print("\n📈 Classification Report:")
print(classification_report(y, y_pred))

print("\n🧾 Confusion Matrix:")
print(confusion_matrix(y, y_pred))



📈 Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.58      0.73    367178
           1       0.06      0.85      0.12     11957

    accuracy                           0.59    379135
   macro avg       0.53      0.72      0.43    379135
weighted avg       0.96      0.59      0.71    379135


🧾 Confusion Matrix:
[[213937 153241]
 [  1757  10200]]


In [29]:

# 💾 Save model to disk
model_path = "C:/Users/Siddharth Tomar/forest-fire-simulation/model/training/model_v4_mixOf_v2&v3.pkl"
joblib.dump(clf, model_path)
print(f"✅ Model v4 saved to: {model_path}")


✅ Model v4 saved to: C:/Users/Siddharth Tomar/forest-fire-simulation/model/training/model_v4_mixOf_v2&v3.pkl


# v5
## Balanced-aggressive model with high fire recall (~77%) and moderate false positives — best trade-off so far for real fire detection

In [30]:
# 📦 Import packages
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import numpy as np

# 📥 Load feature matrix and labels
X = np.load("C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/X_train_final.npy")  # shape: (379135, 3)
y = np.load("C:/Users/Siddharth Tomar/forest-fire-simulation/data/processed/y_train_final.npy")  # shape: (379135,)

# 🎯 Create and train RandomForestClassifier (v5)
clf_v5 = RandomForestClassifier(
    n_estimators=300,
    max_depth=14,
    min_samples_leaf=5,
    min_samples_split=10,
    class_weight='balanced_subsample',
    random_state=42,
    n_jobs=-1
)

print("🚀 Training model v5...")
clf_v5.fit(X, y)
print("✅ Training complete!")


🚀 Training model v5...
✅ Training complete!


In [31]:

# 🧪 Evaluate performance
y_pred = clf_v5.predict(X)
print("\n📈 Classification Report:")
print(classification_report(y, y_pred, digits=4))

print("🧾 Confusion Matrix:")
print(confusion_matrix(y, y_pred))



📈 Classification Report:
              precision    recall  f1-score   support

           0     0.9868    0.5628    0.7168    367178
           1     0.0542    0.7691    0.1012     11957

    accuracy                         0.5693    379135
   macro avg     0.5205    0.6659    0.4090    379135
weighted avg     0.9574    0.5693    0.6974    379135

🧾 Confusion Matrix:
[[206637 160541]
 [  2761   9196]]


In [32]:

# 💾 Save the trained model
joblib.dump(clf_v5, "C:/Users/Siddharth Tomar/forest-fire-simulation/model/training/model_v5_mixOf_v2&v4.pkl")
print("💾 Saved model as model_v5.pkl")


💾 Saved model as model_v5.pkl
