In [170]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv('/content/Dataset.csv')

In [None]:
df.shape

(3161300, 14)

In [None]:
df.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,62.3992,68.51383,300.68,0.73,0.76,2024-11-01,20,N,VIIRS,n,2.0NRT,268.14,1.38,N
1,61.98473,68.57809,329.34,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,269.44,3.21,N
2,61.98024,68.56583,310.01,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,268.0,3.21,N
3,61.97791,68.58144,295.72,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,268.13,3.32,N
4,61.98361,68.57188,328.57,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,269.28,6.07,N


In [None]:
df.tail()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
3161295,-33.83881,18.52974,301.42,0.39,0.36,2025-01-12,2341,N,VIIRS,n,2.0NRT,290.92,1.14,N
3161296,-33.88737,19.10021,312.8,0.39,0.36,2025-01-12,2341,N,VIIRS,n,2.0NRT,288.78,1.13,N
3161297,-33.45296,18.60756,316.77,0.39,0.36,2025-01-12,2341,N,VIIRS,n,2.0NRT,288.82,1.08,N
3161298,-18.98866,-174.76753,336.02,0.74,0.76,2025-01-13,20,N,VIIRS,n,2.0NRT,293.23,7.71,D
3161299,-18.99028,-174.76622,337.32,0.75,0.77,2025-01-13,20,N,VIIRS,n,2.0NRT,294.67,9.48,D


In [None]:
df['confidence'].value_counts()

Unnamed: 0_level_0,count
confidence,Unnamed: 1_level_1
n,2617583
l,384570
h,159147


In [None]:
df = df[df["confidence"] != "l"]

In [None]:
df.shape

(2776730, 14)

In [None]:
df.isnull().sum()

Unnamed: 0,0
latitude,0
longitude,0
brightness,0
scan,0
track,0
acq_date,0
acq_time,0
satellite,0
instrument,0
confidence,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2776730 entries, 0 to 3161299
Data columns (total 14 columns):
 #   Column      Dtype  
---  ------      -----  
 0   latitude    float64
 1   longitude   float64
 2   brightness  float64
 3   scan        float64
 4   track       float64
 5   acq_date    object 
 6   acq_time    int64  
 7   satellite   object 
 8   instrument  object 
 9   confidence  object 
 10  version     object 
 11  bright_t31  float64
 12  frp         float64
 13  daynight    object 
dtypes: float64(7), int64(1), object(6)
memory usage: 317.8+ MB


In [None]:
df.describe()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_time,bright_t31,frp
count,2776730.0,2776730.0,2776730.0,2776730.0,2776730.0,2776730.0,2776730.0,2776730.0
mean,6.609819,23.53485,334.8215,0.4593245,0.4831317,1177.632,298.4343,8.054186
std,15.35623,52.71176,17.42298,0.08767663,0.1151641,553.1092,10.64026,15.19292
min,-53.80413,-179.8305,295.0,0.32,0.36,0.0,203.13,0.0
25%,3.6189,-0.48613,329.29,0.39,0.38,1021.0,291.52,2.51
50%,7.937405,23.27404,337.85,0.44,0.45,1155.0,300.04,4.84
75%,11.52069,35.64737,345.54,0.51,0.57,1409.0,305.86,8.73
max,78.7354,179.594,367.0,0.8,0.78,2358.0,384.84,6870.24


In [None]:
# q1 = df["frp"].quantile(0.50)
# q2 = df["frp"].quantile(0.75)

In [None]:
q1 = df["frp"].quantile(0.60)
q2 = df["frp"].quantile(0.90)

Severity Func

In [None]:
def frp_to_severity(frp):
  if frp <= q1:
    return "Low"
  elif frp <= q2:
    return "Medium"
  else:
    return "High"

In [None]:
df["severity"] = df["frp"].apply(frp_to_severity)

In [None]:
df["severity"].value_counts()

Unnamed: 0_level_0,count
severity,Unnamed: 1_level_1
Low,1666170
Medium,833122
High,277438


In [None]:
df.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,severity
0,62.3992,68.51383,300.68,0.73,0.76,2024-11-01,20,N,VIIRS,n,2.0NRT,268.14,1.38,N,Low
1,61.98473,68.57809,329.34,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,269.44,3.21,N,Low
2,61.98024,68.56583,310.01,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,268.0,3.21,N,Low
3,61.97791,68.58144,295.72,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,268.13,3.32,N,Low
4,61.98361,68.57188,328.57,0.77,0.77,2024-11-01,20,N,VIIRS,n,2.0NRT,269.28,6.07,N,Medium


In [None]:
df["daynight"] = df["daynight"].map({"D": 1, "N": 0})

In [None]:
df["brightness_diff"] = df["brightness"] - df["bright_t31"]

In [None]:
le = LabelEncoder()

df['severity'] = le.fit_transform(df['severity'])
df['confidence'] = le.fit_transform(df['confidence'])

In [None]:
df = df.sample(n=150000, random_state=42)

Model Training

In [None]:
x = df[[
    "brightness",
    "bright_t31",
    "brightness_diff",
    "scan",
    "track",
    "latitude",
    "longitude",
    "confidence"
]]
y = df["severity"]

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
model = RandomForestClassifier(
    n_estimators=200,
    max_depth = 25,
    class_weight="balanced",
    random_state=42,
    n_jobs = -1,
    min_samples_leaf=5,
)

In [None]:
model.fit(x_train,y_train)

In [None]:
pred = model.predict(x_test)

In [None]:
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.49      0.40      0.44      3063
           1       0.85      0.82      0.83     17890
           2       0.56      0.64      0.60      9047

    accuracy                           0.72     30000
   macro avg       0.63      0.62      0.62     30000
weighted avg       0.73      0.72      0.72     30000



In [None]:
import joblib
joblib.dump(model, "severity_model_firms.pkl")

['severity_model_firms.pkl']