## Script to clean wildfire data fetched from the NASA FIRMS API 
This dataset represents dates, times and coordinates rated with a fire confidence level 'high' based on Visible Infrared Imaging Radiometer Suite (VIIRS) sensors
1. Latitude and longitude are rounded to 2 decimal points 
2. Coordinates within a 0.2x0.2 degree tile on the same date are united to a single point (lowest bottom left of a tile)

In [37]:
import pandas as pd
import numpy as np

df = pd.read_csv('fire_confidence.csv')

# round latitude and longitude to 2 decimal points
df['latitude'] = df['latitude'].round(2)
df['longitude'] = df['longitude'].round(2)

# create new columns latitude_clean and longitude_clean
df['latitude_clean'] = df['latitude']
df['longitude_clean'] = df['longitude']
df.rename(columns={'acq_date': 'date', 'acq_time' : 'time'}, inplace=True)


In [41]:
grouped_df = df.groupby('date')
processed_rows = []

def coordinates_part_of_same_tile(lat_corner, lon_corner, lat, lon):
    if lat_corner <= lat <= lat_corner + 0.2 and lon_corner <= lon <= lon_corner + 0.2:
        return True
    else:
        return False

def coordinates_same(lat_corner, lon_corner, lat, lon):
    if lat == lat_corner and lon == lon_corner:
        return True
    else:
        return False



# Iterate over each group
for date, group in grouped_df:

    # Initialize lowest coordinates from the first row of the group
    lowest_lat = group.iloc[0]['latitude_clean']
    lowest_lon = group.iloc[0]['longitude_clean']

    # Iterate over each row in the group
    for index, row in group.iterrows():
        lat = row['latitude_clean']
        lon = row['longitude_clean']

        if coordinates_same (lowest_lat, lowest_lon, lat, lon):
            continue

        # Check if current coordinates are part of the same tile
        if coordinates_part_of_same_tile(lowest_lat, lowest_lon, lat, lon):
            print(f"uniting {lowest_lat} {lowest_lon} and {lat} {lon} from {row['date']}")
            df.loc[index, 'latitude_clean'] = lowest_lat
            df.loc[index, 'longitude_clean'] = lowest_lon
        elif coordinates_part_of_same_tile(lat, lon, lowest_lat, lowest_lon):
            print(f"uniting {lowest_lat} {lowest_lon} and {lat} {lon}  from {row['date']}")
            lowest_lat = lat
            lowest_lon = lon

        processed_rows.append(row)

# Create a new DataFrame from the processed rows
processed_df = pd.DataFrame(processed_rows)

# Save the DataFrame to CSV
processed_df.to_csv('fire_confidence_clean.csv', index=False)


uniting 38.8 15.21 and 38.79 15.21  from 2022-12-09
uniting 37.75 15.02 and 37.75 15.01  from 2022-12-26
uniting 37.76 15.03 and 37.76 15.02  from 2023-01-06
uniting 37.75 15.02 and 37.75 15.01  from 2023-01-16
uniting 44.28 7.65 and 44.28 7.64  from 2023-04-01
uniting 37.31 14.45 and 37.27 14.42  from 2023-07-12
uniting 37.18 14.08 and 37.17 14.08  from 2023-07-18
uniting 41.64 15.67 and 41.64 15.66  from 2023-07-19
uniting 37.15 14.08 and 37.15 14.06  from 2023-07-20
uniting 37.49 14.35 and 37.49 14.34  from 2023-07-24
uniting 39.35 9.56 and 39.28 9.56  from 2023-08-06
uniting 40.06 18.25 and 40.06 18.24  from 2023-08-20
uniting 38.97 17.01 and 38.96 16.99  from 2023-09-04
uniting 40.45 18.18 and 40.45 18.17  from 2023-10-07
uniting 37.98 13.66 and 37.98 13.65  from 2023-10-20
uniting 37.74 14.99 and 37.73 14.99  from 2023-11-13
