In [107]:
import pandas as pd
import sqlite3

# Set filepaths to CSVs
path1 = r"Resources/fire_archive_M-C61_423958.csv"
path2 = r"Resources/fire_nrt_M-C61_423958.csv"

In [108]:
# Read in CSVs & convert to dataframes
df1 = pd.read_csv(path1)
df2 = pd.read_csv(path2)

# Merge datasets into single dataframe
df = pd.concat([df1, df2], ignore_index = True)
df.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,19.405,-155.2786,362.3,1.7,1.3,2014-01-01,1147,Aqua,MODIS,100,6.03,287.3,210.6,N,1.0
1,19.4384,-155.0778,352.9,1.7,1.3,2014-01-01,1147,Aqua,MODIS,100,6.03,294.7,147.6,N,1.0
2,19.4366,-155.0624,340.5,1.7,1.3,2014-01-01,1147,Aqua,MODIS,100,6.03,294.3,95.5,N,1.0
3,19.403,-155.2622,314.0,1.7,1.3,2014-01-01,1147,Aqua,MODIS,84,6.03,285.3,32.4,N,1.0
4,19.427,-155.0791,314.7,1.7,1.3,2014-01-01,1147,Aqua,MODIS,86,6.03,288.3,30.0,N,1.0


In [109]:
# Drop unused columns
df = df.drop(columns=['scan', 'track', 'instrument', 'version'])
df.dtypes

latitude      float64
longitude     float64
brightness    float64
acq_date       object
acq_time        int64
satellite      object
confidence      int64
bright_t31    float64
frp           float64
daynight       object
type          float64
dtype: object

In [110]:
# Convert Acquired Time to from INT to STR to allow for splitting by index
df['acq_time'] = (df['acq_time']).astype(str)

# Initialize empty list for Acquired Times
acq_times = []

# Loop through all Acquired Times
for x in df['acq_time']:
    
    # If too short to be converted to HH:MM, add some 0s to the front of it
    if len(x) == 3:
        x = f"0{x}"
    elif len(x) == 2:
        x = f"00{x}"
    elif len(x) == 1:
        x = f"000{x}"
    
    # Change format from HHMM to HH:MM
    acq_times.append(f"{x[:2]}:{x[2:]}")

# Assign now-full list to column
df['acq_time'] = acq_times
df.tail()

Unnamed: 0,latitude,longitude,brightness,acq_date,acq_time,satellite,confidence,bright_t31,frp,daynight,type
1291315,40.09293,-94.5488,308.78,2024-01-01,19:57,Aqua,68,278.44,12.79,D,
1291316,37.62087,-121.23211,302.41,2024-01-01,21:33,Aqua,52,288.5,5.63,D,
1291317,37.59554,-120.78835,301.75,2024-01-01,21:33,Aqua,49,287.41,5.35,D,
1291318,41.52074,-120.35908,324.74,2024-01-01,21:35,Aqua,83,281.16,27.0,D,
1291319,43.82557,-119.12015,314.06,2024-01-01,21:35,Aqua,74,275.92,20.68,D,


In [100]:
# Convert Date & Time to datetime datatype
df['acq_date'] = pd.to_datetime(df['acq_date'], format='%Y-%m-%d')
df['acq_time'] = pd.to_datetime(df['acq_time'], format='%H:%M')
df.dtypes

latitude             float64
longitude            float64
brightness           float64
acq_date      datetime64[ns]
acq_time      datetime64[ns]
satellite             object
confidence             int64
bright_t31           float64
frp                  float64
daynight              object
type                 float64
dtype: object

In [113]:
# Rename Date & Time columns
df = df.rename(columns={'acq_date': 'date', 'acq_time': 'time'})
df.head()

Unnamed: 0,latitude,longitude,brightness,date,time,satellite,confidence,bright_t31,frp,daynight,type
0,19.405,-155.2786,362.3,2014-01-01,11:47,Aqua,100,287.3,210.6,N,1.0
1,19.4384,-155.0778,352.9,2014-01-01,11:47,Aqua,100,294.7,147.6,N,1.0
2,19.4366,-155.0624,340.5,2014-01-01,11:47,Aqua,100,294.3,95.5,N,1.0
3,19.403,-155.2622,314.0,2014-01-01,11:47,Aqua,84,285.3,32.4,N,1.0
4,19.427,-155.0791,314.7,2014-01-01,11:47,Aqua,86,288.3,30.0,N,1.0


In [114]:
# Add Year column
df['year'] = pd.DatetimeIndex(df['date']).year
df.head()

Unnamed: 0,latitude,longitude,brightness,date,time,satellite,confidence,bright_t31,frp,daynight,type,year
0,19.405,-155.2786,362.3,2014-01-01,11:47,Aqua,100,287.3,210.6,N,1.0,2014
1,19.4384,-155.0778,352.9,2014-01-01,11:47,Aqua,100,294.7,147.6,N,1.0,2014
2,19.4366,-155.0624,340.5,2014-01-01,11:47,Aqua,100,294.3,95.5,N,1.0,2014
3,19.403,-155.2622,314.0,2014-01-01,11:47,Aqua,84,285.3,32.4,N,1.0,2014
4,19.427,-155.0791,314.7,2014-01-01,11:47,Aqua,86,288.3,30.0,N,1.0,2014
