In [1]:
# Phase 1 – Loading dataset & processing (India filter + basic features)

import pandas as pd

# Load global active fire data (from NASA FIRMS via Kaggle)
df = pd.read_csv("india_fires.csv")
print(f"Global rows: {len(df)}")

# Filter only India by latitude/longitude bounding box
df_india = df[
    (df["latitude"].between(8, 37)) &
    (df["longitude"].between(68, 97))
]
print(f"India rows: {len(df_india)}")

# Add simple weather-style placeholder features
df_india = df_india.copy()
df_india["temp"] = 35       # Placeholder temperature
df_india["humidity"] = 30   # Placeholder humidity

# Save Phase 1 processed file
df_india.to_csv("india_fires_final.csv", index=False)
print("--- Phase 1 DONE: india_fires_final.csv saved ---")



Global rows: 142181
India rows: 142181
--- Phase 1 DONE: india_fires_final.csv saved ---


In [2]:
# Phase 2 – Create fire_risk label and inspect features

import pandas as pd

df = pd.read_csv("india_fires_final.csv")

# Inspect intensity-related columns
print(df[["brightness", "frp"]].head())

# Create fire_risk from brightness (binary label)
df["fire_risk"] = (df["brightness"] > 330).astype(int)
print("fire_risk counts (brightness-based):")
print(df["fire_risk"].value_counts())

# Time feature (month)
df["acq_date"] = pd.to_datetime(df["acq_date"])
df["month"] = df["acq_date"].dt.month

# Define features and target
features = ["latitude", "longitude", "brightness", "temp", "humidity", "month"]
X = df[features]
y = df["fire_risk"]

print("\nFeatures used:", features)
print("X shape:", X.shape)
print("y distribution:\n", y.value_counts())


   brightness   frp
0      326.15  3.64
1      333.58  3.09
2      331.49  2.53
3      332.08  3.74
4      332.10  6.05
fire_risk counts (brightness-based):
fire_risk
1    98110
0    44071
Name: count, dtype: int64

Features used: ['latitude', 'longitude', 'brightness', 'temp', 'humidity', 'month']
X shape: (142181, 6)
y distribution:
 fire_risk
1    98110
0    44071
Name: count, dtype: int64


In [3]:
# Phase 2 – Save labeled dataset and train a baseline model

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv("india_fires_final.csv")

# Ensure fire_risk is created (brightness-based)
df["fire_risk"] = (df["brightness"] > 330).astype(int)
print("fire_risk counts:")
print(df["fire_risk"].value_counts())

# Time feature
df["acq_date"] = pd.to_datetime(df["acq_date"])
df["month"] = df["acq_date"].dt.month

# Save labeled dataset
df.to_csv("india_fires_labeled.csv", index=False)
print(" Saved india_fires_labeled.csv with fire_risk target.")

# Simple prediction pipeline (baseline model)
features = ["latitude", "longitude", "brightness", "temp", "humidity", "month"]
X = df[features]
y = df["fire_risk"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Baseline Accuracy:", acc)


fire_risk counts:
fire_risk
1    98110
0    44071
Name: count, dtype: int64
 Saved india_fires_labeled.csv with fire_risk target.
Baseline Accuracy: 1.0


In [4]:
# Phase 2 – Evaluation: confusion matrix and classification report

from sklearn.metrics import classification_report, confusion_matrix

print("Baseline Accuracy:", acc)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Baseline Accuracy: 1.0

Confusion Matrix:
[[ 8766     0]
 [    0 19671]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8766
           1       1.00      1.00      1.00     19671

    accuracy                           1.00     28437
   macro avg       1.00      1.00      1.00     28437
weighted avg       1.00      1.00      1.00     28437



In [5]:
# Phase 2 – Feature importance (model interpretability)

import numpy as np

importances = model.feature_importances_
for name, imp in sorted(zip(features, importances), key=lambda x: x[1], reverse=True):
    print(f"{name}: {imp:.3f}")


brightness: 0.891
longitude: 0.070
latitude: 0.031
month: 0.008
temp: 0.000
humidity: 0.000


In [6]:
# Phase 3 – Train Random Forest and save model for UI

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib  # pip install joblib

# 1) Load and prepare data
df = pd.read_csv("india_fires_final.csv")

df["fire_risk"] = (df["brightness"] > 330).astype(int)
df["acq_date"] = pd.to_datetime(df["acq_date"])
df["month"] = df["acq_date"].dt.month

features = ["latitude", "longitude", "brightness", "temp", "humidity", "month"]
X = df[features]
y = df["fire_risk"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 2) Train Random Forest for deployment
model = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    min_samples_split=5,
    random_state=42,
    n_jobs=-1,
)
model.fit(X_train, y_train)

# 3) Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 4) Save model and feature list
joblib.dump(model, "fire_risk_rf.joblib")
joblib.dump(features, "features.joblib")
print("Saved fire_risk_rf.joblib and features.joblib")


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8766
           1       1.00      1.00      1.00     19671

    accuracy                           1.00     28437
   macro avg       1.00      1.00      1.00     28437
weighted avg       1.00      1.00      1.00     28437

Saved fire_risk_rf.joblib and features.joblib


In [29]:
import streamlit as st
import numpy as np
import joblib

# Load model and feature names
model = joblib.load("fire_risk_rf.joblib")
features = joblib.load("features.joblib")

st.title("India Fire Risk Predictor")

st.write("Enter conditions to estimate whether the location is high fire risk.")

# User inputs
lat = st.number_input("Latitude", value=20.0, format="%.4f")
lon = st.number_input("Longitude", value=78.0, format="%.4f")
brightness = st.number_input("Brightness", value=330.0)
temp = st.number_input("Temperature (°C)", value=30.0)
humidity = st.number_input("Humidity (%)", value=40.0)
month = st.slider("Month", 1, 12, 5)

if st.button("Predict Fire Risk"):
    x = np.array([[lat, lon, brightness, temp, humidity, month]])
    prob = model.predict_proba(x)[0][1]
    pred = model.predict(x)[0]

    label = "High Fire Risk" if pred == 1 else "Low Fire Risk"
    st.subheader(label)
    st.write(f"Predicted probability of high fire risk: {prob:.2f}")


2025-12-11 23:36:57.188 
  command:

    streamlit run C:\Users\LENOVO\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-12-11 23:36:57.190 Session state does not function when running a script without `streamlit run`
