# Data Wrangling USGS
## Sid Gurajala
## Last Updated: 11/18/2024

### Import Libraries/Read in Files

In [1]:
import json 
import os
import pandas as pd 
import re as re

cwd = os.getcwd()
data_dir = os.path.join(cwd, "../../data/")

In [2]:
#open files
fire_json_file_path = os.path.join(data_dir + "raw/USGS_Wildland_Fire_Combined_Dataset_filtered.json")
with open(fire_json_file_path, "r") as file:
    fire_json = json.load(file)

### Filter Fire JSON file

We need to restrict the distance from dearborn, michigan and also check for the month of the fire. We will also restrict for the years between 1980 and 2021 since this is what we have data for from the GBD.

In [3]:
fire_dataframes = []
#Pattern to look for
regex = r"Listed Wildfire Discovery Date\(s\): (\d{4})-(0[5-9]|10|11|12)-(0[1-9]|[12][0-9]|3[01])\s"
#filter 
for fire in fire_json:
    if fire["Distance_to_DearbornMI"] <= 650 and re.search(regex, fire["Listed_Fire_Dates"]) and fire['Fire_Year'] >= 1980 and fire['Fire_Year'] <= 2021:
        fire_df = pd.DataFrame({"FireType" : [fire['Assigned_Fire_Type']],
                                "Year" : [fire['Fire_Year']],
                                "Acreage" : [fire["GIS_Acres"]],
                                "Distance_to_Dearborn" : [fire["Distance_to_DearbornMI"]]})
        fire_dataframes.append(fire_df)
#concatenate
filtered_fire_df = pd.concat(fire_dataframes)

### Write Out Data

In [6]:
filtered_fire_df.to_csv(os.path.join(data_dir, "final/USGS_wildfire_dearborn_filtered.csv"), index = False)