In [135]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

latitude: Latitude of the fire pixel detected by the satellite (degrees)

longitude: Longitude of the fire pixel detected by the satellite (degrees)

brightness: Brightness temperature of the fire pixel (in K)
scan: Area of a MODIS pixel at the Earth’s surface (Along-scan: ΔS)

track: Area of a MODIS pixel at the Earth’s surface (Along-track: ΔT)

acq_time: Time at which the fire was detected

satellite: Satellite used to detect the fire

instrument: MODIS

confidence: Detection confidence (range 0-100)

bright_t31: Band 31 brightness temperature of the pixel (in K)
frp: Fire radiative power (in MW- megawatts)

daynight: Detected during the day or night. Either Day(D) or Night(N)

- 0= presumed vegetation fire 
   - 1= active volcano
   - 2= other static land source
   - 3= offshore

In [136]:
df=pd.read_csv("fire.csv")
df

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,-27.57028,30.72040,304.96,0.61,0.71,2025-03-01,0,N,VIIRS,n,2.0NRT,282.85,0.96,N
1,-26.66647,27.82798,297.39,0.40,0.60,2025-03-01,0,N,VIIRS,n,2.0NRT,274.08,3.24,N
2,-28.35803,23.50957,310.29,0.46,0.47,2025-03-01,0,N,VIIRS,n,2.0NRT,288.57,1.64,N
3,-33.97059,18.57978,301.72,0.45,0.39,2025-03-01,2,N,VIIRS,n,2.0NRT,289.70,0.68,N
4,-34.13718,18.95899,319.55,0.47,0.40,2025-03-01,2,N,VIIRS,n,2.0NRT,287.21,2.40,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5009281,-29.98458,29.10835,296.18,0.41,0.61,2025-07-01,2356,N,VIIRS,n,2.0NRT,274.56,1.07,N
5009282,-30.67487,30.34217,295.09,0.49,0.65,2025-07-01,2356,N,VIIRS,n,2.0NRT,283.05,0.32,N
5009283,-31.32767,29.87484,305.35,0.47,0.64,2025-07-01,2356,N,VIIRS,n,2.0NRT,282.48,0.62,N
5009284,-30.00682,29.09813,298.88,0.41,0.61,2025-07-01,2356,N,VIIRS,n,2.0NRT,274.19,1.28,N


In [137]:
df.isnull().sum()

latitude      0
longitude     0
brightness    0
scan          0
track         0
acq_date      0
acq_time      0
satellite     0
instrument    0
confidence    0
version       0
bright_t31    0
frp           0
daynight      0
dtype: int64

In [138]:
print(df['satellite'].value_counts())
print(df['daynight'].value_counts())
print(df['version'].value_counts())
print(df['acq_time'].value_counts())
print(df['instrument'].value_counts())


satellite
N    5009286
Name: count, dtype: int64
daynight
D    3603789
N    1405497
Name: count, dtype: int64
version
2.0NRT    5009286
Name: count, dtype: int64
acq_time
1142    42779
1225    40432
1147    37124
1219    36966
1200    36640
        ...  
832         2
2135        2
1530        1
1335        1
1354        1
Name: count, Length: 849, dtype: int64
instrument
VIIRS    5009286
Name: count, dtype: int64


In [139]:
df.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,-27.57028,30.7204,304.96,0.61,0.71,2025-03-01,0,N,VIIRS,n,2.0NRT,282.85,0.96,N
1,-26.66647,27.82798,297.39,0.4,0.6,2025-03-01,0,N,VIIRS,n,2.0NRT,274.08,3.24,N
2,-28.35803,23.50957,310.29,0.46,0.47,2025-03-01,0,N,VIIRS,n,2.0NRT,288.57,1.64,N
3,-33.97059,18.57978,301.72,0.45,0.39,2025-03-01,2,N,VIIRS,n,2.0NRT,289.7,0.68,N
4,-34.13718,18.95899,319.55,0.47,0.4,2025-03-01,2,N,VIIRS,n,2.0NRT,287.21,2.4,N


Assuming the op feature to be day and night

In [140]:
#get all numeric features
num_features=[features for features in df.columns if df[features].dtype != 'O']
print("Num of numeric features: ", len(num_features))

Num of numeric features:  8


In [141]:
##categorical features
cat_features=[features for features in df.columns if df[features].dtype == 'O']
cat_features

['acq_date', 'satellite', 'instrument', 'confidence', 'version', 'daynight']

In [142]:
df.drop(['version','instrument','satellite'],axis=1,inplace=True)

In [143]:
df['acq_datetime'] = pd.to_datetime(df['acq_date'].astype(str) + df['acq_time'].astype(str).str.zfill(4), format='%Y-%m-%d%H%M')


In [144]:
df.drop(['acq_date', 'acq_time'], axis=1, inplace=True)

In [145]:
df

Unnamed: 0,latitude,longitude,brightness,scan,track,confidence,bright_t31,frp,daynight,acq_datetime
0,-27.57028,30.72040,304.96,0.61,0.71,n,282.85,0.96,N,2025-03-01 00:00:00
1,-26.66647,27.82798,297.39,0.40,0.60,n,274.08,3.24,N,2025-03-01 00:00:00
2,-28.35803,23.50957,310.29,0.46,0.47,n,288.57,1.64,N,2025-03-01 00:00:00
3,-33.97059,18.57978,301.72,0.45,0.39,n,289.70,0.68,N,2025-03-01 00:02:00
4,-34.13718,18.95899,319.55,0.47,0.40,n,287.21,2.40,N,2025-03-01 00:02:00
...,...,...,...,...,...,...,...,...,...,...
5009281,-29.98458,29.10835,296.18,0.41,0.61,n,274.56,1.07,N,2025-07-01 23:56:00
5009282,-30.67487,30.34217,295.09,0.49,0.65,n,283.05,0.32,N,2025-07-01 23:56:00
5009283,-31.32767,29.87484,305.35,0.47,0.64,n,282.48,0.62,N,2025-07-01 23:56:00
5009284,-30.00682,29.09813,298.88,0.41,0.61,n,274.19,1.28,N,2025-07-01 23:56:00


In [146]:
# Create future df shifted by -2 hours (t+2h becomes aligned with t)
future_df = df[['acq_datetime', 'latitude', 'longitude']].copy()
future_df['acq_datetime'] = future_df['acq_datetime'] - pd.Timedelta(hours=2)
future_df.rename(columns={'latitude': 'lat_future', 'longitude': 'lon_future'}, inplace=True)




In [147]:
future_df

Unnamed: 0,acq_datetime,lat_future,lon_future
0,2025-02-28 22:00:00,-27.57028,30.72040
1,2025-02-28 22:00:00,-26.66647,27.82798
2,2025-02-28 22:00:00,-28.35803,23.50957
3,2025-02-28 22:02:00,-33.97059,18.57978
4,2025-02-28 22:02:00,-34.13718,18.95899
...,...,...,...
5009281,2025-07-01 21:56:00,-29.98458,29.10835
5009282,2025-07-01 21:56:00,-30.67487,30.34217
5009283,2025-07-01 21:56:00,-31.32767,29.87484
5009284,2025-07-01 21:56:00,-30.00682,29.09813


In [148]:
# STEP 1: Get common timestamps
valid_times = set(df['acq_datetime']).intersection(set(future_df['acq_datetime']))

# STEP 2: Filter using common timestamps
df_filtered = df[df['acq_datetime'].isin(valid_times)].copy()
future_filtered = future_df[future_df['acq_datetime'].isin(valid_times)].copy()

# STEP 3: Drop duplicate timestamps (keep first)
df_filtered = df_filtered.drop_duplicates(subset='acq_datetime', keep='first').copy()
future_filtered = future_filtered.drop_duplicates(subset='acq_datetime', keep='first').copy()

# STEP 4: Sort and reset index
df_filtered.sort_values(by='acq_datetime', inplace=True)
future_filtered.sort_values(by='acq_datetime', inplace=True)
df_filtered.reset_index(drop=True, inplace=True)
future_filtered.reset_index(drop=True, inplace=True)

# STEP 5: Confirm alignment
assert df_filtered.shape[0] == future_filtered.shape[0], "Mismatch in aligned rows!"

print("✅ Aligned rows:", df_filtered.shape[0])


✅ Aligned rows: 4427


In [149]:
# One-hot encode 'daynight'
# Define the mappings
confidence_map = {'low': 0, 'nominal': 1, 'high': 2}
daynight_map = {'D': 0, 'N': 1}  # Ensure you're using correct values from your data ('D'/'N')

# Apply the mappings to df_filtered
df_filtered['confidence'] = df_filtered['confidence'].map(confidence_map)
df_filtered['daynight'] = df_filtered['daynight'].map(daynight_map)




In [150]:
df_filtered

Unnamed: 0,latitude,longitude,brightness,scan,track,confidence,bright_t31,frp,daynight,acq_datetime
0,50.80745,20.53053,302.17,0.39,0.44,,270.18,0.95,1,2025-03-01 01:17:00
1,49.47588,0.53562,297.23,0.55,0.51,,273.44,0.91,1,2025-03-01 01:19:00
2,38.79449,15.21314,312.12,0.43,0.46,,271.55,1.93,1,2025-03-01 01:21:00
3,65.56101,22.22199,304.54,0.35,0.56,,266.69,1.35,1,2025-03-01 02:53:00
4,64.66219,21.28280,298.34,0.37,0.58,,270.18,1.41,1,2025-03-01 02:55:00
...,...,...,...,...,...,...,...,...,...,...
4422,63.55983,-116.22204,325.78,0.44,0.62,,292.51,8.30,0,2025-07-01 21:31:00
4423,65.12093,-148.19632,367.00,0.45,0.39,,338.19,124.44,0,2025-07-01 21:33:00
4424,67.64730,83.21359,338.12,0.42,0.38,,279.43,53.79,0,2025-07-01 21:46:00
4425,61.75395,66.95597,329.21,0.42,0.37,,289.12,4.50,1,2025-07-01 21:48:00


In [163]:
features = ['latitude', 'longitude', 'brightness', 'scan', 'track', 'confidence','bright_t31', 'frp','daynight','acq_datetime']
X = df_filtered[features]

y = future_filtered[['lat_future', 'lon_future']]


In [164]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [165]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


DTypePromotionError: The DType <class 'numpy.dtypes.DateTime64DType'> could not be promoted by <class 'numpy.dtypes.Float64DType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Int64DType'>, <class 'numpy.dtypes.DateTime64DType'>)

In [None]:
from sklearn.metrics import mean_squared_error


In [None]:
# Predict
y_pred = model.predict(X_test)

# Evaluate
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("✅ RMSE:", rmse)

✅ RMSE: 21.659451524433347


In [None]:
# Set a threshold for "acceptable" prediction error (in degrees)
threshold = 0.5

# Calculate absolute error
errors = np.abs(y_test - y_pred)

# Check if both lat and lon errors are within the threshold
within_threshold = (errors['lat_future'] <= threshold) & (errors['lon_future'] <= threshold)

# Calculate accuracy
accuracy = np.mean(within_threshold) * 100
print(f"✅ Custom Accuracy (±{threshold}°): {accuracy:.2f}%")


✅ Custom Accuracy (±0.5°): 0.56%


In [None]:
def predict_future_location(model, latitude, longitude, brightness, scan, track, confidence, bright_t31, frp, daynight):
    # Encode daynight using one-hot
    daynight_encoded = {'day': 0, 'night': 1}
    if daynight.lower() not in daynight_encoded:
        raise ValueError("daynight must be 'day' or 'night'")
    daynight_val = daynight_encoded[daynight.lower()]

    # Encode confidence
    confidence_map = {'low': 0, 'nominal': 1, 'high': 2}
    if isinstance(confidence, str):
        confidence = confidence.lower()
        if confidence not in confidence_map:
            raise ValueError("confidence must be 'low', 'nominal', or 'high'")
        confidence_val = confidence_map[confidence]
    elif isinstance(confidence, (int, float)):
        # If already numerical, keep as is
        confidence_val = confidence
    else:
        raise ValueError("confidence must be a string or number")

    # Prepare input as DataFrame
    input_df = pd.DataFrame([{
        'latitude': latitude,
        'longitude': longitude,
        'brightness': brightness,
        'scan': scan,
        'track': track,
        'confidence': confidence_val,
        'bright_t31': bright_t31,
        'frp': frp,
        'daynight': daynight_val
    }])

    # Predict
    prediction = model.predict(input_df)
    lat_future, lon_future = prediction[0]
    print(f"📍 Predicted Location in 2 Hours:\nLatitude: {lat_future:.4f}, Longitude: {lon_future:.4f}")
    return lat_future, lon_future


In [None]:
predict_future_location(
    model=model,
    latitude=12.34,
    longitude=77.56,
    brightness=330.5,
    scan=1.2,
    track=1.1,
    confidence='high',
    bright_t31=310.2,
    frp=25.6,
    daynight='Day'
)


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- daynight
