## Raw Positive Class Data 

In [3]:
import pandas as pd
# Read the data from the CSV file
MODIS = pd.read_csv('Fire_archive_AlaskaOnly.csv')

# Renaming the column for detection time
MODIS = MODIS.rename(columns={'acq_date': 'DetectionTime'})

# Convert 'DetectionTime' to datetime
MODIS['DetectionTime'] = pd.to_datetime(MODIS['DetectionTime'])

# Filter data based on confidence
fireData = MODIS[MODIS['confidence'] >= 90]
fireData

Unnamed: 0,DetectionTime,latitude,longitude,brightness,scan,track,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,2000-12-28,62.9625,-162.8261,355.5,1.1,1.0,2251,Terra,MODIS,100,6.03,271.5,84.4,N,0
1,2000-12-28,62.9591,-162.8066,346.4,1.1,1.0,2251,Terra,MODIS,100,6.03,270.7,63.3,N,0
3,2000-12-28,62.9504,-162.8139,384.9,1.1,1.0,2251,Terra,MODIS,100,6.03,276.7,193.8,N,0
5,2000-12-28,62.9417,-162.8212,389.5,1.1,1.0,2251,Terra,MODIS,100,6.03,275.9,214.6,N,0
12,2001-05-19,64.7894,-147.1595,360.7,1.1,1.0,2121,Terra,MODIS,100,6.03,291.4,99.2,D,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
303316,2022-11-30,55.4176,-161.8975,324.6,1.5,1.2,742,Terra,MODIS,100,6.03,268.0,53.8,N,1
303317,2022-11-30,55.4192,-161.8815,316.6,2.8,1.6,1223,Aqua,MODIS,91,6.03,268.7,96.0,N,1
303319,2022-12-01,55.4154,-161.8838,333.5,1.1,1.0,1306,Aqua,MODIS,100,6.03,269.7,43.7,N,1
303320,2022-12-02,55.4188,-161.8941,323.0,2.2,1.4,726,Terra,MODIS,100,6.03,270.3,83.4,N,1


## Temperal clustering v1

In [2]:
# from datetime import timedelta
# # Temporal clustering
# timeWindow = timedelta(days=7)  # Time window for clustering

# # Sort data by acquisition time
# fireData = fireData.sort_values(by='DetectionTime')

# # Initialize temporal cluster ID
# temporalClusterId = 0

# # Initialize a column for temporal cluster IDs
# fireData['temporalCluster'] = 0

# # Iterate through each fire instance
# for i in fireData.index:
#     if fireData.at[i, 'temporalCluster'] == 0:
#         # Assign a new cluster ID
#         temporalClusterId += 1
#         fireData.at[i, 'temporalCluster'] = temporalClusterId

#         # Find the end time of this cluster
#         clusterEndTime = fireData.at[i, 'DetectionTime'] + timeWindow

#         # Assign the same cluster ID to all fires within the time window
#         withinWindow = fireData['DetectionTime'] <= clusterEndTime
#         fireData.loc[withinWindow & (fireData['temporalCluster'] == 0), 'temporalCluster'] = temporalClusterId

# # Save the modified DataFrame
# fireData.to_csv('Stage1_TemporalClusters.csv')

## Temporal clustering v2

In [4]:
from datetime import timedelta
import pandas as pd

# Assuming fireData is a DataFrame with a 'DetectionTime' column in datetime format
# Example: fireData = pd.read_csv('your_file.csv', parse_dates=['DetectionTime'])

# Temporal clustering
maxDuration = timedelta(days=6*30)  # Maximum duration of a cluster
interruption = timedelta(days=2)    # Maximum allowed interruption within a cluster

# Sort data by acquisition time
fireData = fireData.sort_values(by='DetectionTime')

# Initialize temporal cluster ID
temporalClusterId = 0

# Initialize a column for temporal cluster IDs
fireData['temporalCluster'] = 0

# Iterate through each fire instance
for i in fireData.index:
    if fireData.at[i, 'temporalCluster'] == 0:
        # Assign a new cluster ID
        temporalClusterId += 1
        fireData.at[i, 'temporalCluster'] = temporalClusterId

        # Find the end time of this cluster
        clusterStartTime = fireData.at[i, 'DetectionTime']
        clusterEndTime = clusterStartTime + maxDuration

        # Iterate through subsequent fire events
        for j in fireData[fireData.index > i].index:
            fireTime = fireData.at[j, 'DetectionTime']
            
            # Check if fire is within the max duration and interruption period
            if fireTime <= clusterEndTime and (fireData.at[j, 'temporalCluster'] == 0):
                # Update the cluster ID if the fire is within one week of the last fire
                if fireTime - clusterStartTime <= interruption:
                    fireData.at[j, 'temporalCluster'] = temporalClusterId
                    clusterStartTime = fireTime  # Reset the start time to the current fire's time
                else:
                    break  # Break the loop if the interruption is more than a week

# Save the modified DataFrame
fireData.to_csv('Stage1_TemporalClusters_v2.csv')


In [5]:
fireData = pd.read_csv('Stage1_TemporalClusters_v2.csv',index_col=0)

fireData = fireData.reset_index()

In [6]:
fireData[-30:]

Unnamed: 0,index,DetectionTime,latitude,longitude,brightness,scan,track,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,temporalCluster
94285,303120,2022-08-04,66.9082,-143.8032,351.3,1.0,1.0,627,Terra,MODIS,100,6.03,299.7,66.5,N,0,572
94286,303119,2022-08-04,66.9047,-143.8248,339.8,1.0,1.0,627,Terra,MODIS,100,6.03,296.2,44.3,N,0,572
94287,303118,2022-08-04,66.9356,-143.7564,328.8,1.0,1.0,627,Terra,MODIS,100,6.03,290.6,30.8,N,0,572
94288,303117,2022-08-04,66.9321,-143.778,347.1,1.0,1.0,627,Terra,MODIS,100,6.03,297.1,58.5,N,0,572
94289,303116,2022-08-04,66.9285,-143.7996,341.2,1.0,1.0,627,Terra,MODIS,100,6.03,296.0,46.0,N,0,572
94290,303111,2022-08-04,66.9101,-145.533,324.1,1.0,1.0,627,Terra,MODIS,100,6.03,287.3,25.1,N,0,572
94291,303110,2022-08-04,66.9062,-145.5551,316.6,1.0,1.0,627,Terra,MODIS,94,6.03,286.1,17.3,N,0,572
94292,303108,2022-08-04,66.9223,-143.7169,366.8,1.0,1.0,627,Terra,MODIS,100,6.03,294.8,110.3,N,0,572
94293,303126,2022-08-04,66.9354,-145.5623,385.5,1.0,1.0,627,Terra,MODIS,100,6.03,317.1,186.9,N,0,572
94294,303277,2022-08-05,70.4883,-148.6906,319.2,1.2,1.1,1236,Aqua,MODIS,98,6.03,280.8,27.1,N,3,572


## Spatial clustering

In [7]:
# !pip install hdbscan
import pandas as pd
import numpy as np
import hdbscan
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import haversine_distances

# Function to convert decimal degrees to radians
def deg2rad(degrees):
    return degrees * np.pi / 180

# Function to perform DBSCAN clustering with Haversine distance
def perform_dbscan(data, min_cluster_size=1, cluster_selection_epsilon=5/6371.0088):
    radian_coords = np.radians(data[['latitude', 'longitude']])
    clusterer = DBSCAN(min_samples=min_cluster_size,
                                metric='haversine',
                                eps=cluster_selection_epsilon,
                                algorithm='ball_tree')
    cluster_labels = clusterer.fit_predict(radian_coords)
    return cluster_labels


# Assuming 'temporalCluster' is the column with temporal cluster IDs
unique_temporal_clusters = fireData['temporalCluster'].unique()

# Assuming 'temporalCluster' is the column with temporal cluster IDs
unique_temporal_clusters = fireData['temporalCluster'].unique()
last_spatial_cluster_id = 0

# Iterate through each temporal cluster and perform HDBSCAN clustering
for cluster_id in unique_temporal_clusters:
    # Extract data for the current temporal cluster
    temp_cluster_data = fireData[fireData['temporalCluster'] == cluster_id]
    
    # If only one point in the cluster, assign it its own cluster ID
    if len(temp_cluster_data) == 1:
        fireData.loc[temp_cluster_data.index, 'spatialCluster'] = last_spatial_cluster_id
        last_spatial_cluster_id += 1
    else:
        # Perform spatial clustering using DBSCAN
        spatial_clusters = perform_dbscan(temp_cluster_data)#not hdbscan
        
        # Find the highest cluster ID assigned in this batch
        max_cluster_id_in_batch = max([x for x in spatial_clusters if x != -1], default=-1)

        # Adjust cluster IDs for noise points and increment IDs to be cumulative
        for i in range(len(spatial_clusters)):
            if spatial_clusters[i] == -1:
                # Assign a unique ID to noise points
                spatial_clusters[i] = last_spatial_cluster_id
                last_spatial_cluster_id += 1
            else:
                # Adjust cluster IDs to be cumulative for non-noise points
                spatial_clusters[i] += last_spatial_cluster_id

        # Update the last assigned cluster index only if there were non-noise clusters
        if max_cluster_id_in_batch != -1:
            last_spatial_cluster_id += max_cluster_id_in_batch + 1

        # Assign updated cluster IDs to the dataframe
        fireData.loc[temp_cluster_data.index, 'spatialCluster'] = spatial_clusters
    
# Save the DataFrame with the added spatial cluster information
fireData.to_csv('Stage2_SpatialClusters_DBSCAN_v2.csv')

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [10]:
fireData[-30:]

Unnamed: 0,index,DetectionTime,latitude,longitude,brightness,scan,track,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,temporalCluster,spatialCluster
94285,303120,2022-08-04,66.9082,-143.8032,351.3,1.0,1.0,627,Terra,MODIS,100,6.03,299.7,66.5,N,0,572,2118.0
94286,303119,2022-08-04,66.9047,-143.8248,339.8,1.0,1.0,627,Terra,MODIS,100,6.03,296.2,44.3,N,0,572,2118.0
94287,303118,2022-08-04,66.9356,-143.7564,328.8,1.0,1.0,627,Terra,MODIS,100,6.03,290.6,30.8,N,0,572,2118.0
94288,303117,2022-08-04,66.9321,-143.778,347.1,1.0,1.0,627,Terra,MODIS,100,6.03,297.1,58.5,N,0,572,2118.0
94289,303116,2022-08-04,66.9285,-143.7996,341.2,1.0,1.0,627,Terra,MODIS,100,6.03,296.0,46.0,N,0,572,2118.0
94290,303111,2022-08-04,66.9101,-145.533,324.1,1.0,1.0,627,Terra,MODIS,100,6.03,287.3,25.1,N,0,572,2119.0
94291,303110,2022-08-04,66.9062,-145.5551,316.6,1.0,1.0,627,Terra,MODIS,94,6.03,286.1,17.3,N,0,572,2119.0
94292,303108,2022-08-04,66.9223,-143.7169,366.8,1.0,1.0,627,Terra,MODIS,100,6.03,294.8,110.3,N,0,572,2118.0
94293,303126,2022-08-04,66.9354,-145.5623,385.5,1.0,1.0,627,Terra,MODIS,100,6.03,317.1,186.9,N,0,572,2119.0
94294,303277,2022-08-05,70.4883,-148.6906,319.2,1.2,1.1,1236,Aqua,MODIS,98,6.03,280.8,27.1,N,3,572,2120.0


In [20]:
# # !pip install hdbscan
# import pandas as pd
# import numpy as np
# import hdbscan
# from sklearn.cluster import DBSCAN
# from sklearn.metrics.pairwise import haversine_distances

# # Function to convert decimal degrees to radians
# def deg2rad(degrees):
#     return degrees * np.pi / 180

# # Function to perform HDBSCAN clustering with Haversine distance
# def perform_hdbscan(data, min_cluster_size=2, cluster_selection_epsilon=5/6371.0088):
#     radian_coords = np.radians(data[['latitude', 'longitude']])
#     clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size,
#                                 metric='haversine',
#                                 cluster_selection_epsilon=cluster_selection_epsilon,
#                                 algorithm='best')
#     cluster_labels = clusterer.fit_predict(radian_coords)
#     return cluster_labels

# # Function to perform DBSCAN clustering with Haversine distance
# def perform_dbscan(data, min_cluster_size=1, cluster_selection_epsilon=5/6371.0088):
#     radian_coords = np.radians(data[['latitude', 'longitude']])
#     clusterer = DBSCAN(min_samples=min_cluster_size,
#                                 metric='haversine',
#                                 eps=cluster_selection_epsilon,
#                                 algorithm='ball_tree')
#     cluster_labels = clusterer.fit_predict(radian_coords)
#     return cluster_labels


# # Assuming 'temporalCluster' is the column with temporal cluster IDs
# unique_temporal_clusters = fireData['temporalCluster'].unique()
# last_spatial_cluster_id = 0

# # Iterate through each temporal cluster and perform HDBSCAN clustering
# for cluster_id in unique_temporal_clusters:
#     # Extract data for the current temporal cluster
#     temp_cluster_data = fireData[fireData['temporalCluster'] == cluster_id]
    
#     # If only one point in the cluster, assign it its own cluster ID
#     if len(temp_cluster_data) == 1:
#         fireData.loc[temp_cluster_data.index, 'spatialCluster'] = last_spatial_cluster_id
#         last_spatial_cluster_id += 1
#     else:
#         # Perform spatial clustering using HDBSCAN
#         spatial_clusters = perform_hdbscan(temp_cluster_data)
#         # Adjust cluster IDs for noise points and increment IDs to be cumulative
#         for i in range(len(spatial_clusters)):
#             if spatial_clusters[i] == -1:
#                 # Assign a unique ID to noise points
#                 spatial_clusters[i] = last_spatial_cluster_id
#                 last_spatial_cluster_id += 1
#             else:
#                 # Adjust cluster IDs to be cumulative for non-noise points
#                 spatial_clusters[i] += last_spatial_cluster_id

#         # Update the last assigned cluster index
#         last_spatial_cluster_id = spatial_clusters.max() + 1

#         # Assign updated cluster IDs to the dataframe
#         fireData.loc[temp_cluster_data.index, 'spatialCluster'] = spatial_clusters

# # Save the DataFrame with the added spatial cluster information
# fireData.to_csv('Stage2_SpatialClusters_HDBSCAN.csv')

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dty

## Raw Negative Class Data 

In [3]:
import pandas as pd
# Read the data from the CSV file
MODIS = pd.read_csv('fire_archive_M-C61_402012.csv')

# Renaming the column for detection time
MODIS = MODIS.rename(columns={'acq_date': 'DetectionTime'})

# Convert 'DetectionTime' to datetime
MODIS['DetectionTime'] = pd.to_datetime(MODIS['DetectionTime'])

# Filter data based on confidence
fireData = MODIS[MODIS['confidence'] <= 10]
fireData

Unnamed: 0,latitude,longitude,brightness,scan,track,DetectionTime,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
7,67.1907,-147.2679,303.1,1.1,1.0,2001-07-04,2131,Terra,MODIS,0,6.03,288.2,3.6,D,0
28,67.0162,-158.2815,302.2,1.0,1.0,2002-05-17,2232,Terra,MODIS,0,6.03,290.5,8.1,D,0
102,65.0727,-146.1791,309.4,1.1,1.1,2002-05-25,641,Terra,MODIS,7,6.03,286.7,11.4,N,0
108,65.3732,-148.7687,319.0,1.3,1.1,2002-05-25,641,Terra,MODIS,8,6.03,290.1,22.9,N,0
199,65.3202,-148.7612,326.7,1.0,1.0,2002-05-26,724,Terra,MODIS,0,6.03,293.1,23.0,N,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210933,67.3191,-142.8660,319.1,1.0,1.0,2020-06-13,2129,Terra,MODIS,0,6.03,297.4,12.4,D,0
210966,66.6689,-144.6659,305.0,1.3,1.1,2020-06-14,2228,Aqua,MODIS,0,6.03,289.8,6.1,D,0
210981,66.9135,-145.0827,320.0,1.0,1.0,2020-06-18,2204,Aqua,MODIS,0,6.03,295.7,13.4,D,0
210997,64.3888,-147.3094,301.8,1.2,1.1,2020-07-03,2105,Terra,MODIS,10,6.03,289.5,4.0,D,0
