In [None]:
import json
import sys
import hvplot.pandas
import pandas as pd
from utils import load_config, fetch_api_data, write_to_csv
import numpy as np

In [None]:
#https://www.ncdc.noaa.gov/swdiws
# 'nx3tvs'       - (Point)   NEXRAD Level-3 Tornado Vortex Signatures
# 'nx3meso'      - (Point)   NEXRAD Level-3 Mesocyclone Signatures
# 'nx3hail'      - (Point)   NEXRAD Level-3 Hail Signatures
# 'nx3structure' - (Point)   NEXRAD Level-3 Storm Cell Structure Information
# 'warn'         - (Polygon) Severe Thunderstorm, Tornado, Flash Flood and Special Marine warnings
datasets = ["nx3tvs"]
outputFormat = "json"
daterange = "20240701:20240731"  # "periodOfRecord"

for dataset in datasets:
    base_url = f"https://www.ncdc.noaa.gov/swdiws/{outputFormat}/{dataset}/{daterange}"
    filename = f"swdiws_{dataset}.csv"

    data = fetch_api_data(base_url)
    #print(json.dumps(data, indent=2))
    #print(data)

    if data and "result" in data:
        write_to_csv(data["result"], filename, "w")
    else:
        print(f"No data found or invalid response format for dataset: {dataset}")

In [None]:
# 'nx3tvs'       - (Point)   NEXRAD Level-3 Tornado Vortex Signatures
# 'nx3meso'      - (Point)   NEXRAD Level-3 Mesocyclone Signatures
# 'nx3hail'      - (Point)   NEXRAD Level-3 Hail Signatures
# 'nx3structure' - (Point)   NEXRAD Level-3 Storm Cell Structure Information
# 'warn'         - (Polygon) Severe Thunderstorm, Tornado, Flash Flood and Special Marine warnings
datasets = ["nx3tvs"]
outputFormat = "geojson"
daterange = "20240701:20240731"  # "periodOfRecord"
numResults = 2500

# Initialize an empty list to store merged data
merged_data_list = []

for dataset in datasets:
    base_url = f"https://www.ncdc.noaa.gov/swdiws/{outputFormat}/{dataset}/{daterange}/{numResults}"
    filename = f"swdiws_{dataset}_{outputFormat}.csv"

    data = fetch_api_data(base_url)
    # Data is nested, retrieve "features" dictionary
    rows = data["features"]
    
    # Iterate over each record in rows as that is nested as well
    for record in rows:
       # Merge the 'properties' and 'geometry' dictionaries
        merged_data = {**record["properties"], **record["geometry"]}
        # Append the merged data to the list
        merged_data_list.append(merged_data)

    # Convert to DataFrame
    merged_df = pd.DataFrame(merged_data_list)

# Split the coordinates column into latitude and longitude
merged_df[['longitude', 'latitude']] = pd.DataFrame(merged_df['coordinates'].tolist(), index=merged_df.index)
# Drop the original coordinates column
merged_df = merged_df.drop(columns=['coordinates'])
merged_df.head(25)

    # if data and "features" in data:
    #     write_to_csv(data["features"], filename, "w")
    # else:
    #     print(f"No data found or invalid response format for dataset: {dataset}")

In [None]:
# Convert 'MXDV' to numeric, forcing errors to NaN if conversion fails
merged_df['MXDV'] = pd.to_numeric(merged_df['MXDV'], errors='coerce')
# Ensure 'WSR_ID' is treated as a string
merged_df['WSR_ID'] = merged_df['WSR_ID'].astype(str)
merged_df.to_csv('swdiws_nx3tvs_geojson.csv', index=False)
print(merged_df.dtypes)

In [None]:
# https://www.fema.gov/about/openfema/data-sets
base_url = f"https://www.fema.gov/api/open/v2/"

params = {"$count": "true",
          "$filter": "incidentType eq 'Tornado'"}

endpoint = "DisasterDeclarationsSummaries"
filename = f"{endpoint}.csv"
endpoint_url = f"{base_url}{endpoint}"

data = fetch_api_data(endpoint_url, params)
#print(json.dumps(data, indent=2))
tornado_summary_df = pd.DataFrame(data["DisasterDeclarationsSummaries"])
write_to_csv(data["DisasterDeclarationsSummaries"], filename, "w")

disaster_numbers = tornado_summary_df.disasterNumber.unique()
formatted_disaster_numbers = ', '.join(f'{num}' for num in disaster_numbers)
formatted_disaster_numbers

    #disasterNumber
#tornado_newer_df = tornado_df[tornado_df["declarationDate"] > "2019-12-31"]
#tornado_df.groupby(by="disasterNumber").count()

In [None]:
params = {"$count": "true",
          "$filter": f"disasterNumber in ({formatted_disaster_numbers})"}

endpoint = "HousingAssistanceOwners"
filename = f"{endpoint}.csv"
endpoint_url = f"{base_url}{endpoint}"
data = fetch_api_data(endpoint_url, params)
#print(json.dumps(data, indent=2))
housing_assistance_df = pd.DataFrame(data["HousingAssistanceOwners"])
write_to_csv(data["HousingAssistanceOwners"], filename, "w")
housing_assistance_df

In [None]:
%%capture --no-display
# Configure the map plot
tornadoes_plot = merged_df.hvplot.points(
    "longitude",
    "latitude",
    geo=True,
    tiles="OSM",
    frame_width=800,
    frame_height=600,
    size="MXDV",
    color="WSR_ID"
)

# Display the map
tornadoes_plot

In [None]:
#DataCleaning

In [None]:
merged_df

# Standardize column names to lowercase
merged_df.columns = [col.lower() for col in merged_df.columns]

#dropped cell and cell id columns
merged_df.drop(columns=['cell_type', 'cell_id',], inplace=True)

#renamed columns 
#max_shear = change in wind speed and direction with height in the atmosphere
#wsr_id = weather stations
#mxdv = maximum difference in velocity, particularly within areas of rotation
#ztime = time of the event
#azimuth = direction in which the tornado is moving
merged_df.rename(columns={
    'max_shear': 'wind_speed',
    'wsr_id': 'radar_id',
    'mxdv': 'velocity',
    'ztime': 'event_time',
    'azimuth': 'directional_movement',
}, inplace=True)

# Convert 'ztime' column to datetime
merged_df['event_time'] = pd.to_datetime(merged_df['event_time'])

# Check for missing values
print(merged_df.isnull().sum())

# Remove duplicates
merged_df.drop_duplicates(inplace=True)

# Display the cleaned dataframe
merged_df.head()

In [None]:
housing_assistance_df

#Made columns names lowercase
housing_assistance_df.columns = housing_assistance_df.columns.str.lower().str.replace(' ', '_')

#dropped unused columns
housing_assistance_df.drop(columns=['id', 
                                    'nofemainspecteddamage', 
                                    'disasternumber',
                                    'validregistrations', 
                                    'averagefemainspecteddamage', 
                                    'totalinspected',
                                   'femainspecteddamagebetween1and10000',
                                    'femainspecteddamagebetween10001and20000',
                                    'femainspecteddamagebetween20001and30000',
                                    'femainspecteddamagegreaterthan30000',
                                    'approvedbetween1and10000',
                                    'approvedbetween10001and25000', 
                                    'approvedbetween25001andmax'], inplace=True)

#renaming columns
#approvedforfemaassistance = number of applications that have been approved for FEMA assistance
#totalapprovedihpamount = total amount of money approved under the FEMA Individual and Households Program (IHP)
#repairreplaceamount = amount of money approved or provided for repair or replacement of damaged property
#rentalamount = amount of money approved or provided for rental assistance
#otherneedsamount = amount of money approved or provided for other necessary expenses
#totalmaxgrants = amount of grants approved or provided to an individual or household

housing_assistance_df.rename(columns={
    'approvedforfemaassistance': 'approvedapplicants',
    'totalapprovedihpamount': 'approvedbudget',
    'repairreplaceamount': 'repairs',
    'rentalamount': 'rentalbudget',
    'otherneedsamount': 'misc.budget',
    'totalmaxgrants': 'grants'
}, inplace=True)

#Gets rid of description in county column
housing_assistance_df['county'] = housing_assistance_df['county'].str.replace(' \(County\)', '', regex=True)

#Sets state and county as index
housing_assistance_df.set_index(['state', 'county'], inplace=True)

#Gets rid of rows in the totaldamage column that have a value of 0
housing_assistance_df[housing_assistance_df['totaldamage'] != 0]

# Remove any NaN values
housing_assistance_df = housing_assistance_df.copy()
housing_assistance_df.dropna(inplace=True)
housing_assistance_df