First I import the libraries and load the dataset.

In [1]:
import pandas as pd # pandas for reading the dataset and array manipulation
import os # os for paths; specifically current working directory
import plotly.graph_objects as go # plotly.graph_objects for creating graphs

# get home path than go to the data folder and the data file name
path = os.getcwd() + "\\data\\fire_archive_M-C61_626683.csv.xz"
# load the dataset
data = pd.read_csv(path)
# print dataset columns
data.columns

Index(['latitude', 'longitude', 'brightness', 'scan', 'track', 'acq_date',
       'acq_time', 'satellite', 'instrument', 'confidence', 'version',
       'bright_t31', 'frp', 'daynight', 'type'],
      dtype='object')

next I am going to transform the acq_date(acquired data) and acq_time(acquired time) to be a single datetime, which makes time analysis easier.

In [3]:
from datetime import timedelta # import timedelta from datetime to convert hours and minutes to be addable to date

data['acq_date'] = pd.to_datetime(data['acq_date'])
data['acq_datetime'] = data['acq_date'] + pd.Series(
    [timedelta(minutes=i % 100, hours=i // 100) for i in data['acq_time']])

# remove redundant columns.
# acq_time, and acq_date are now one as acq_datetime so they are redundant
data.drop('acq_time', axis=1, inplace=True)
data.drop('acq_date', axis=1, inplace=True)
# everything in the instrument column is the same and thus redundant
data.drop('instrument', axis=1, inplace=True)
# show data after adding datetime and removing redundant columns
data

AttributeError: 'Series' object has no attribute 'normalize'

next I bin the confidence into high low and nominal, which makes the data categorical, which can be easier to analyze.

In [29]:
data['confidence_binned']=pd.cut(data['confidence'],bins=[-1,30,70,101],labels=['l','n','h'])

than I show the data

In [30]:
data

Unnamed: 0,latitude,longitude,brightness,scan,track,satellite,confidence,version,bright_t31,frp,daynight,type,acq_datetime,confidence_binned
0,38.5422,-78.3047,304.8,2.8,1.6,Terra,23,6.03,280.9,40.3,N,0,2000-11-01 02:50:00,l
1,38.5451,-78.3107,309.9,2.8,1.6,Terra,79,6.03,280.7,58.8,N,0,2000-11-01 02:50:00,h
2,38.5563,-78.3084,309.4,2.8,1.6,Terra,70,6.03,280.4,54.5,N,0,2000-11-01 02:50:00,n
3,38.5586,-78.3170,302.3,2.8,1.6,Terra,45,6.03,279.8,36.0,N,0,2000-11-01 02:50:00,n
4,31.3393,-89.9124,304.9,1.0,1.0,Terra,62,6.03,287.5,8.5,N,0,2000-11-01 04:27:00,n
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2960417,41.6966,-99.1437,319.8,1.1,1.0,Aqua,80,61.03,284.7,21.2,D,0,2025-01-31 20:28:00,h
2960418,42.4419,-94.3783,300.6,1.2,1.1,Aqua,40,61.03,284.3,6.3,D,0,2025-01-31 20:28:00,n
2960419,41.4014,-97.9485,319.9,1.0,1.0,Aqua,80,61.03,284.9,19.0,D,0,2025-01-31 20:28:00,h
2960420,41.4032,-97.9369,322.9,1.0,1.0,Aqua,82,61.03,285.0,22.1,D,0,2025-01-31 20:28:00,h
