In [1]:
from geopandas import GeoDataFrame, GeoSeries
from shapely.geometry import Point, Polygon, MultiPolygon
from pyproj import CRS
import matplotlib.pyplot as plt
import contextily as ctx
import geopandas as gpd
import pandas as pd
import pyproj
import fiona
import boto3

In [3]:
# https://catalog.data.gov/dataset/tiger-line-shapefile-2017-nation-u-s-current-state-and-equivalent-national
states = gpd.read_file('C:/Users/ryane/Documents/Github/Flood-Analysis/data/Census-State/tl_2017_us_state.shp')
states.crs = fiona.crs.from_epsg(4326)
# https://ais-faa.opendata.arcgis.com/datasets/e747ab91a11045e8b3f8a3efd093d3b5_0
airports = gpd.read_file('C:/Users/ryane/Documents/Github/Flood-Analysis/data/FAA-Airports/Airports.shp')
airports.crs = fiona.crs.from_epsg(4326)
# https://catalog.data.gov/dataset/tiger-line-shapefile-2019-nation-u-s-current-metropolitan-statistical-area-micropolitan-statist
us_cbsa = gpd.read_file('C:/Users/ryane/Documents/Github/Flood-Analysis/data/Census-CBSA/tl_2019_us_cbsa.shp')
us_cbsa.crs = fiona.crs.from_epsg(4326)
# http://www2.census.gov/programs-surveys/ahs/2017/AHS%202017%20National%20PUF%20v3.0%20Flat%20CSV.zip?#
ahs_data = pd.read_csv('C:/Users/ryane/Documents/Github/Flood-Analysis/data/Census-AHS/ahs2017n.csv', usecols=['OMB13CBSA', 'DPFLDINS'])

In [None]:
def meters_to_degrees(distance_meters):
    #https://sciencing.com/convert-distances-degrees-meters-7858322.html (111,139)
    distance_degrees = (distance_meters / 111194.926644559) # number derived from matlab calculations
    return distance_degrees

In [None]:
def df_to_gdf(df, what_on):
    
    merged_df = df.merge(image_metadata_gdf, on=what_on)
    clean_df = merged_df.dropna()
    gdf = GeoDataFrame(clean_df, crs=fiona.crs.from_epsg(4326))
    
    return gdf

In [None]:
def label_join(label):
    label_by_state = gpd.sjoin(label, states, op='within')
    label_by_state = label_by_state.drop(columns=['index_right'])
    
    return(label_by_state)

In [None]:
def images_per_cbsa(label, cbsa):
    image_counter = {}

    images_within_cbsa = gpd.sjoin(label, cbsa, how='left', op='within')
    images = images_within_cbsa.NAMELSAD.tolist()

    for i in images:
        if i not in image_counter:
            image_counter[i] = images.count(i)

    image_counter = pd.DataFrame.from_dict(image_counter, orient='index')
    
    
    return image_counter

In [None]:
def get_column_values(dataset_name, dataset_column):    
    column_dictionary_counter = {} 
    values = dataset_name[dataset_column].tolist()
    for i in values:
        #i = i.strip("''")
        if i not in column_dictionary_counter:
            column_dictionary_counter[i] = values.count(i)
            
    return(column_dictionary_counter)

In [None]:
def months_with_images(state_abbreviation, label, year):
    images_by_state = label_join(label)

    state_images = images_by_state[images_by_state.STUSPS == state_abbreviation]
    state_images_timestamp = state_images[state_images['timestamp'].str.contains(year, na=False, case=True)]
    

    stamps = {}
    for i in range(len(state_images_timestamp.timestamp)):
        stamp = pd.Timestamp(state_images_timestamp.timestamp.iloc[i])
        if stamp.month_name() not in stamps:
            state_images_per_month = 0
            
            for j in range(len(state_images_timestamp.timestamp)):
                stamp_count = pd.Timestamp(state_images_timestamp.timestamp.iloc[j])
                if stamp.month_name() == stamp_count.month_name():
                    state_images_per_month += 1
                stamps[stamp.month_name()] = state_images_per_month
    return(print(stamps))

In [None]:
def flood_insurance_by_cbsa(cbsa_values):
    has_flood_insurance = ["'2'", "'1'", "'3'"]
    cbsa_dict = {}
    percent_with_fldins = {}

    for i in cbsa_values.keys():
        temp_lst = []
        for idx in range(len(ahs_data.OMB13CBSA)):
            if i == ahs_data.OMB13CBSA.loc[idx]:
                temp_lst.append(ahs_data.DPFLDINS.loc[idx])
        cbsa_dict[i] = temp_lst

    for i in cbsa_dict.keys():
        temp = 0
        values = cbsa_dict.get(i)
        for idx in has_flood_insurance:
            x = values.count(idx)
            temp += x
        percent_with_fldins[i] = temp/cbsa_values.get(i)       
        
    return(cbsa_dict, percent_with_fldins)

## Now we can read in the LADI images metadata from it's Amazon S3 bucket:

In [None]:
bucket_name = 'ladi'
file_1_path = 'Labels/ladi_images_metadata.csv'
client = boto3.client('s3')

In [None]:
obj_1 = client.get_object(Bucket = bucket_name, Key = file_1_path)

image_metadata = pd.read_csv(obj_1['Body'])
image_metadata_renamed = image_metadata.rename(columns={"uuid": "image_uuid"})
image_metadata_clean = image_metadata_renamed.dropna()

latitude = image_metadata_clean['gps_lat'].tolist() 
longitude = image_metadata_clean['gps_lon'].tolist()

image_metadata_gdf = GeoDataFrame(image_metadata_clean, crs=fiona.crs.from_epsg(4326), geometry=gpd.points_from_xy(longitude, latitude))


In [None]:
image_metadata_gdf

## Now we can read in the LADI human labels specifically those labeled 'damage' or 'flood':

In [None]:
#################### CLEAN AND VALIDATE LADI HUMAN LABELS #########################
human_label_filepath = "Labels/ladi_aggregated_responses_url.tsv"
obj_2 = client.get_object(Bucket = bucket_name, Key = human_label_filepath)
human_label_file = pd.read_csv(obj_2['Body'],sep = '\t' )

#STRIP OFF BRACKET AND COMMA FROM THE ANSWER CATEGORY
human_label_file["Answer"] = human_label_file["Answer"].str.strip('[|]')
human_label_file["Answer"] = human_label_file["Answer"].str.split(",",expand = True)

#EXTRACT LABELS WITH DAMAGE AND INFRASTRUCTURE CATEGORIES AND REMOVE THOSE LABELED 'NONE'
label_damage_infra = human_label_file[human_label_file['Answer'].str.contains('damage|infrastructure',na=False,case=False)]
label_clean = label_damage_infra[~label_damage_infra['Answer'].str.contains('none',na=False,case=False)]
human_flood_label = label_clean[label_clean['Answer'].str.contains('flood',na=False,case=False)]
human_damage_label = label_clean[label_clean['Answer'].str.contains('damage',na=False,case=False)]

In [None]:
human_damage_label

In [None]:
human_labeled_damage = df_to_gdf(human_damage_label, 'url')

In [None]:
print(len(human_labeled_damage))

In [None]:
print(len(human_flood_label))

In [None]:
human_labeled_floods = df_to_gdf(human_flood_label, 'url')

In [None]:
human_labeled_floods.columns

In [None]:
print(len(human_labeled_floods))

## Below is the code to read in the Ladi machine labels:

#################### CLEAN AND VALIDATE LADI MACHINE LABELS #########################

machine_label_filepath = "Labels/ladi_machine_labels.csv"
obj_3 = client.get_object(Bucket = bucket_name, Key = machine_label_filepath)
machine_flood_label = pd.read_csv(obj_3['Body'], usecols=['image_uuid', 'label_text'])
machine_flood_label_clean = machine_flood_label[machine_flood_label['label_text'].str.contains('flood', na=False,case=False)]

machine_labeled_floods = df_to_gdf(machine_flood_label_clean, 'image_uuid')

print(len(machine_labeled_floods))

lst = []
for i in machine_labeled_floods.timestamp:
    if timestamp.str.contains('2019'):
        lst.append(i)
print(len(lst))

## Now we can draw a circle around each of the airports at a given radius from the center changing the geometric Points to Polygons:

In [None]:
airports.geometry

In [None]:
airports.geometry = airports.geometry.buffer(meters_to_degrees(8046.72)) #equal to 5 miles in meters
airports.geometry

## Now we filter out images based on state, the year they were taken and the label dataset:

In [None]:
def airports_in_range(state_abbreviation, label, year):
    
    images_by_state = label_join(label)
    airports_by_state = label_join(airports)
    
    state_images = images_by_state[images_by_state.STUSPS == state_abbreviation]
    state_images_timestamp = state_images[state_images['timestamp'].str.contains(year, na=False, case=True)]
    
    cbsa_by_state = label_join(us_cbsa)
    state_cbsa = cbsa_by_state[cbsa_by_state['STUSPS'].str.contains(state_abbreviation, na=False, case=True)]
    
    num_images_per_cbsa = images_per_cbsa(state_images_timestamp, state_cbsa)
    count = len(state_images_timestamp)
    
    column_values = get_column_values(ahs_data, 'OMB13CBSA')
    temp = flood_insurance_by_cbsa(column_values)
    
    percent_with_fldins = temp[1]
    
    return(airports_within_range, images_within_range, state_images_timestamp, state_cbsa, count, num_images_per_cbsa, percent_with_fldins, stamps)

In [None]:
def state_plotter(state_abbreviation, label, year, conditional): # if the conditional is set to True it will plot images only within range of atleast one of the airports):
    if conditional is True:
        
        images_by_state = label_join(label)
        airports_by_state = label_join(airports) 
        
        ax = states[states.STUSPS == state_abbreviation].plot(figsize=(10,10), alpha = .3, edgecolor = 'k')
        state_airports = airports_by_state[airports_by_state.STUSPS == state_abbreviation]
        
        state_images = images_by_state[images_by_state.STUSPS == state_abbreviation]
        state_images_timestamp = state_images[state_images['timestamp'].str.contains(year, na=False, case=True)]
        
        images_within_range = gpd.sjoin(state_images_timestamp, state_airports, op='within')
        images_within_range = images_within_range.drop(columns=['index_right'])
        images_within_range.plot(ax=ax, marker='.', markersize = 5, color='red', zorder=3)
        airports_within_range = gpd.sjoin(state_airports, state_images_timestamp, op='contains')
        #airports_within_range.plot(ax=ax, color='black', alpha=.5, zorder=2)
        
        cbsa_by_state = label_join(us_cbsa)
        state_cbsa = cbsa_by_state[cbsa_by_state['STUSPS'].str.contains(state_abbreviation, na=False, case=True)]
        state_cbsa.plot(ax=ax, alpha= .5, edgecolor = 'black', zorder=1)

        ctx.add_basemap(ax, crs = fiona.crs.from_epsg(4326))
            
        num_images_per_cbsa = images_per_cbsa(images_within_range, state_cbsa)
        count = len(images_within_range)
        
        return(plt.show(), print('Total images: ', count), print(num_images_per_cbsa))
            
    else:
        images_by_state = label_join(label)
        airports_by_state = label_join(airports)
        cbsa_by_state = label_join(us_cbsa)

        
        ax = states[states.STUSPS == state_abbreviation].plot(figsize=(10,10), alpha = .3, edgecolor = 'k')
        state_airports = airports_by_state[airports_by_state.STUSPS == state_abbreviation]
        #state_airports.plot(ax=ax, color='black', alpha = .5, zorder=2)

        state_images = images_by_state[images_by_state.STUSPS == state_abbreviation]
        state_images_timestamp = state_images[state_images['timestamp'].str.contains(year, na=False, case=True)]

        state_images_timestamp.plot(ax=ax, marker='.', markersize = 5, color='red', zorder=3)
        
        cbsa_by_state = label_join(us_cbsa)
        state_cbsa = cbsa_by_state[cbsa_by_state['STUSPS'].str.contains(state_abbreviation, na=False, case=True)]
        state_cbsa.plot(ax=ax, alpha= .5, edgecolor = 'black', zorder=1)
        
        ctx.add_basemap(ax, crs = fiona.crs.from_epsg(4326))
        
        num_images_per_cbsa = images_per_cbsa(state_images_timestamp, state_cbsa)
        count = len(state_images_timestamp)

        return(plt.show(), print('Total images: ', count), print(num_images_per_cbsa))

# Hurricane Michael:
- Makes landfall in Florida on October 10th, 2018
- MICHAEL COMES UP THROUGH THE GULF COAST AND PUSHES NORTH THROUGH TALLAHASSE INTO GEORGIA
- A MAJORITY OF THE IMAGES WERE TAKEN IN OCTOBER (THE SAME MONTH IT HIT) 
- ONLY ONE FOLLOW UP IMAGE TAKEN IN NOVEMBER, 2018

In [None]:
months_with_images('FL', image_metadata_gdf, '2018')

In [None]:
months_with_images('FL', image_metadata_gdf, '2019')

In [None]:
temp = airports_in_range('FL', image_metadata_gdf, '2019')
temp[4]

In [None]:
state_plotter('FL', image_metadata_gdf, '2018', False)
state_plotter('FL', image_metadata_gdf, '2018-10', False)
state_plotter('FL', image_metadata_gdf, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('FL', human_labeled_floods, '2018')

In [None]:
months_with_images('FL', human_labeled_floods, '2019')

In [None]:
state_plotter('FL', human_labeled_floods, '2018', False)
state_plotter('FL', human_labeled_floods, '2018-10', False)
state_plotter('FL', human_labeled_floods, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('GA', image_metadata_gdf, '2018')

In [None]:
months_with_images('GA', image_metadata_gdf, '2019')

In [None]:
state_plotter('GA', image_metadata_gdf, '2018', False)
state_plotter('GA', image_metadata_gdf, '2018-10', False)
state_plotter('GA', image_metadata_gdf, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('GA', human_labeled_floods, '2018')

In [None]:
months_with_images('GA', human_labeled_floods, '2019')

In [None]:
state_plotter('GA', human_labeled_floods, '2018', False)
state_plotter('GA', human_labeled_floods, '2018-10', False)
state_plotter('GA', human_labeled_floods, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

# Hurricane Florence:
- HIT CAROLINAS DIRECTLY ALONG COAST LINE (AUGUST 31, 2018-SEPTEMBER 18TH, 2018)
- MAKES LANDFALL IN NC ON SEPTEMBER 14TH
- PUSHES WESTWARD THROUGH THE CAROLINAS AND INTO GEORGIA


In [None]:
months_with_images('NC', image_metadata_gdf, '2018')

In [None]:
months_with_images('NC', image_metadata_gdf, '2019')

In [None]:
print('2018: ')
state_plotter('NC', image_metadata_gdf, '2018', False)
print('\nSEPTEMBER 2018: ')
state_plotter('NC', image_metadata_gdf, '2018-09', False)
print('\nOCTOBER 2018: ')
state_plotter('NC', image_metadata_gdf, '2018-10', False)
print('\nNOVEMBER 2018: ')
state_plotter('NC', image_metadata_gdf, '2018-11', False)
print('\nDECEMBER 2018: ')
state_plotter('NC', image_metadata_gdf, '2018-12', False)
print('\n2019: ')
state_plotter('NC', image_metadata_gdf, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('NC', human_labeled_floods, '2018')

In [None]:
months_with_images('NC', human_labeled_floods, '2019')

In [None]:
print('2018: ')
state_plotter('NC', human_labeled_floods, '2018', False)
print('\nSEPTEMBER 2018: ')
state_plotter('NC', human_labeled_floods, '2018-09', False)
print('\nOCTOBER 2018: ')
state_plotter('NC', human_labeled_floods, '2018-10', False)
print('\nNOVEMBER 2018: ')
state_plotter('NC', human_labeled_floods, '2018-11', False)
print('\nDECEMBER 2018: ')
state_plotter('NC', human_labeled_floods, '2018-12', False)
print('\n2019: ')
state_plotter('NC', human_labeled_floods, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('SC', image_metadata_gdf, '2018')

In [None]:
months_with_images('SC', image_metadata_gdf, '2019')

In [None]:
print('2018: ')
state_plotter('SC', image_metadata_gdf, '2018', False)



print('\n2019: ')
state_plotter('SC', image_metadata_gdf, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('SC', human_labeled_floods, '2018')

In [None]:
months_with_images('SC', human_labeled_floods, '2019')

In [None]:
print('2018: ')
state_plotter('SC', human_labeled_floods, '2018', False)
print('\nSEPTEMBER 2018: ')
state_plotter('SC', human_labeled_floods, '2018-09', False)
print('\nOCTOBER 2018: ')
state_plotter('SC', human_labeled_floods, '2018-10', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('FL', image_metadata_gdf, '2018')

In [None]:
months_with_images('FL', image_metadata_gdf, '2019')

In [None]:
print('2018: ')
state_plotter('FL', image_metadata_gdf, '2018', False)

print('\nAUGUST 2018: ')
state_plotter('FL', image_metadata_gdf, '2018-08', False)
print('\nSEPTEMBER 2018: ')
state_plotter('FL', image_metadata_gdf, '2018-09', False)
print('\nOCTOBER 2018: ')
state_plotter('FL', image_metadata_gdf, '2018-10', False)
print('\nNOVEMBER 2018: ')
state_plotter('FL', image_metadata_gdf, '2018-11', False)
print('\nDECEMBER 2018: ')
state_plotter('FL', image_metadata_gdf, '2018-12', False)

print('\n2019: ')
state_plotter('FL', image_metadata_gdf, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images

In [None]:
months_with_images('FL', human_labeled_floods, '2018')

In [None]:
months_with_images('FL', human_labeled_floods, '2019')

In [None]:
print('2018: ')
state_plotter('FL', human_labeled_floods, '2018', False)

print('\nOCTOBER 2018: ')
state_plotter('FL', human_labeled_floods, '2018-10', False)
print('\nNOVEMBER 2018: ')
state_plotter('FL', human_labeled_floods, '2018-11', False)

print('\n2019: ')
state_plotter('FL', human_labeled_floods, '2019', False)
#black circles represent the given buffer radius for the airports
#red dots are points for disaster images