In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

%matplotlib inline


We will continue from the EDA in the last notebook and try to produce a visualisation that captures the natural disaster incidents information. First we need to do some preprocessing to get our data in the format we want.

In [2]:
data_dir = '../data'

processed_oilspills_csvpath = os.path.join(data_dir, 'US_oilspills_p1.csv')
US_shapely_filepath = os.path.join(data_dir, 'states_21basic/states.shp')

In [3]:
oilspills = pd.read_csv(processed_oilspills_csvpath, index_col='id', parse_dates=['open_date'])

oilspills.head(3)

Unnamed: 0_level_0,open_date,name,location,lat,lon,threat,commodity,max_ptl_release_gallons,description,year,month,commodity_key_tokens,description_key_tokens
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10049,2020-02-27,Partially Submerged Recreational Vessel Huron ...,"Huron, OH",41.391532,-82.554591,Oil,,,"On February 27, 2020, SSC received notificatio...",2020,2,,"[('February 27, 2020', 'DATE'), ('34', 'CARDIN..."
10050,2020-02-27,Whitney Tank Battery 160 Loomis Pass Spill,"Venice, LA, USA",29.116625,-89.180917,Oil,,,"On February 27, 2020, the USCG Sector New Orle...",2020,2,,"[('February 27, 2020', 'DATE'), ('the USCG Sec..."
10048,2020-02-24,Gray whale carcass,"Port Hueneme, CA",34.149744,-119.208226,Other,,,"On 24-FEB-2020, NMFS Stranding Coordinator in ...",2020,2,,"[('24-FEB-2020', 'CARDINAL'), ('NMFS Stranding..."


In [4]:
oilspills.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3709 entries, 10049 to 6200
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   open_date                3709 non-null   datetime64[ns]
 1   name                     3709 non-null   object        
 2   location                 3707 non-null   object        
 3   lat                      3709 non-null   float64       
 4   lon                      3709 non-null   float64       
 5   threat                   2413 non-null   object        
 6   commodity                3101 non-null   object        
 7   max_ptl_release_gallons  2074 non-null   float64       
 8   description              1776 non-null   object        
 9   year                     3709 non-null   int64         
 10  month                    3709 non-null   int64         
 11  commodity_key_tokens     3101 non-null   object        
 12  description_key_tokens   1776 

In [5]:
# US_gpd = gpd.read_file(US_shapely_filepath)
# US_gpd.head()

# fig, ax = plt.subplots(figsize = (10,10))
# US_gpd.plot(ax=ax)

First, we have to fill in our NaN values

In [6]:
nan_value_mapping = {
    'location': 'unknown',
    'threat': 'unknown',
    'commodity': '',
    'description': '',
    'max_ptl_release_gallons': -1
}

In [7]:
oilspills = oilspills.fillna(value=nan_value_mapping)

In [9]:
oilspills.head()

Unnamed: 0_level_0,open_date,name,location,lat,lon,threat,commodity,max_ptl_release_gallons,description,year,month,commodity_key_tokens,description_key_tokens
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10049,2020-02-27,Partially Submerged Recreational Vessel Huron ...,"Huron, OH",41.391532,-82.554591,Oil,,-1.0,"On February 27, 2020, SSC received notificatio...",2020,2,,"[('February 27, 2020', 'DATE'), ('34', 'CARDIN..."
10050,2020-02-27,Whitney Tank Battery 160 Loomis Pass Spill,"Venice, LA, USA",29.116625,-89.180917,Oil,,-1.0,"On February 27, 2020, the USCG Sector New Orle...",2020,2,,"[('February 27, 2020', 'DATE'), ('the USCG Sec..."
10048,2020-02-24,Gray whale carcass,"Port Hueneme, CA",34.149744,-119.208226,Other,,-1.0,"On 24-FEB-2020, NMFS Stranding Coordinator in ...",2020,2,,"[('24-FEB-2020', 'CARDINAL'), ('NMFS Stranding..."
10047,2020-02-20,Recreational vessel sunk in Anacortes Skyline ...,"2400 Skyline Way, Anacortes, WA 98221, USA",48.492559,-122.680368,Oil,,-1.0,"On February 20, 2020, the SEA WOLF, a 44 foot ...",2020,2,,"[('February 20, 2020', 'DATE'), ('44 foot', 'Q..."
10046,2020-02-17,North Santiam River - Truck Spill,"N Santiam Hwy, Detroit, OR 97342, USA",44.697,-122.22689,Oil,,-1.0,On the morning of 16 FEB 2020 along the Santi...,2020,2,,"[('the morning', 'TIME'), ('Santiam River', 'L..."


In [12]:
# fig = px.scatter_geo(oilspills, 
#                      lat='lat',
#                      lon='lon',
#                      color="threat",
#                      hover_name="name", 
#                      animation_frame="year",
#                      projection="natural earth")
# fig.show()