In [0]:
import pandas as pd
import numpy as np
import os
import datetime as dt
import scipy.stats as st
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.ticker as ticker

import plotly.express as px
import chart_studio.plotly as py
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")
plt.style.use('ggplot')
pd.plotting.register_matplotlib_converters()
import plotly
plotly.__version__
import plotly.io as pio
pio.templates.default = "plotly_dark"


In [0]:
dff=pd.read_csv('/dbfs/mnt/AFcovid19/AFClimate/atlantic_hurricane.csv')

In [0]:
df=dff.copy()

In [0]:
# Status can be the following types:
# TD – Tropical cyclone of tropical depression intensity (< 34 knots)
# TS – Tropical cyclone of tropical storm intensity (34-63 knots)
# HU – Tropical cyclone of hurricane intensity (> 64 knots)
# EX – Extratropical cyclone (of any intensity)
# SD – Subtropical cyclone of subtropical depression intensity (< 34 knots)
# SS – Subtropical cyclone of subtropical storm intensity (> 34 knots)
# LO – A low that is neither a tropical cyclone, a subtropical cyclone, nor an extratropical cyclone (of any intensity)
# WV – Tropical Wave (of any intensity)
# DB – Disturbance (of any intensity)

In [0]:
bin_ranges = [1850,1870,1890,1910,1930,1950,1970,1990,2010,2020]
bin_names = ['1851-1870','1871-1890','1891-1910','1911-1930','1931-1950','1951-1970','1971-1990','1991-2010','2010-2020']

df['year_bin_range'] = pd.cut(
                                           np.array(
                                              df['year']), 
                                              bins=bin_ranges)
df['year_range'] = pd.cut(
                                           np.array(
                                            df['year']), 
                                              bins=bin_ranges,            
                                              labels=bin_names,ordered=False)
# view the binned features 
df

Unnamed: 0.1,Unnamed: 0,storm_id,name,landfall,hurricane,vmax,mslp,init_date,final_date,year,status,category,start_lat,start_lon,mon,month,year_bin_range,year_range
0,0,AL011851,UNNAMED,1,1,80.0,,1851-06-25,1851-06-28,1851,,1,28.0,-94.8,Jun,6,"(1850, 1870]",1851-1870
1,1,AL021851,UNNAMED,0,1,80.0,,1851-07-05,1851-07-05,1851,,1,22.2,-97.6,Jul,7,"(1850, 1870]",1851-1870
2,2,AL031851,UNNAMED,0,0,50.0,,1851-07-10,1851-07-10,1851,,0,12.0,-60.0,Jul,7,"(1850, 1870]",1851-1870
3,3,AL041851,UNNAMED,1,1,100.0,,1851-08-16,1851-08-27,1851,,3,13.4,-48.0,Aug,8,"(1850, 1870]",1851-1870
4,4,AL051851,UNNAMED,0,0,50.0,,1851-09-13,1851-09-16,1851,,0,32.5,-73.5,Sep,9,"(1850, 1870]",1851-1870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1919,26,AL272020,EPSILON,0,1,100.0,952.0,2020-10-19,2020-10-26,2020,HU,3,25.6,-55.4,Oct,10,"(2010, 2020]",2010-2020
1920,27,AL282020,ZETA,0,1,90.0,973.0,2020-10-24,2020-10-29,2020,HU,2,18.4,-82.6,Oct,10,"(2010, 2020]",2010-2020
1921,28,AL292020,ETA,0,1,130.0,923.0,2020-10-31,2020-11-13,2020,HU,4,14.9,-72.4,Oct,10,"(2010, 2020]",2010-2020
1922,29,AL302020,THETA,0,0,60.0,989.0,2020-11-10,2020-11-15,2020,TS,0,28.8,-41.0,Nov,11,"(2010, 2020]",2010-2020


In [0]:
bin_ranges = [-2,-1,0,2,4,5]
bin_names = ['Tropical Depression','Tropical Storm','Cat 1-2','Cat 3-4','Cat 5']

df['cat_bin_range'] = pd.cut(
                                           np.array(
                                              df['category']), 
                                              bins=bin_ranges)
df['Hu_Category'] = pd.cut(
                                           np.array(
                                             df['category']), 
                                              bins=bin_ranges,            
                                              labels=bin_names,ordered=False)
# view the binned features 
df

Unnamed: 0.1,Unnamed: 0,storm_id,name,landfall,hurricane,vmax,mslp,init_date,final_date,year,status,category,start_lat,start_lon,mon,month,year_bin_range,year_range,cat_bin_range,Hu_Category
0,0,AL011851,UNNAMED,1,1,80.0,,1851-06-25,1851-06-28,1851,,1,28.0,-94.8,Jun,6,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2
1,1,AL021851,UNNAMED,0,1,80.0,,1851-07-05,1851-07-05,1851,,1,22.2,-97.6,Jul,7,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2
2,2,AL031851,UNNAMED,0,0,50.0,,1851-07-10,1851-07-10,1851,,0,12.0,-60.0,Jul,7,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm
3,3,AL041851,UNNAMED,1,1,100.0,,1851-08-16,1851-08-27,1851,,3,13.4,-48.0,Aug,8,"(1850, 1870]",1851-1870,"(2, 4]",Cat 3-4
4,4,AL051851,UNNAMED,0,0,50.0,,1851-09-13,1851-09-16,1851,,0,32.5,-73.5,Sep,9,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1919,26,AL272020,EPSILON,0,1,100.0,952.0,2020-10-19,2020-10-26,2020,HU,3,25.6,-55.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4
1920,27,AL282020,ZETA,0,1,90.0,973.0,2020-10-24,2020-10-29,2020,HU,2,18.4,-82.6,Oct,10,"(2010, 2020]",2010-2020,"(0, 2]",Cat 1-2
1921,28,AL292020,ETA,0,1,130.0,923.0,2020-10-31,2020-11-13,2020,HU,4,14.9,-72.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4
1922,29,AL302020,THETA,0,0,60.0,989.0,2020-11-10,2020-11-15,2020,TS,0,28.8,-41.0,Nov,11,"(2010, 2020]",2010-2020,"(-1, 0]",Tropical Storm


In [0]:
bin1_ranges = [0,1850,1860,1870,1880,1890,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000,2010,2020]
bin1_names = ['1850s','1860s','1870s','1880s','1890s','1900s','1910s','1920s','1930s','1940s','1950s','1960s','1970s','1980s','1990s','2000s','2010s','2020s']

df['Year_bin_range'] = pd.cut(
                                           np.array(
                                              df['year']), 
                                              bins=bin1_ranges)
df['Year_range'] = pd.cut(
                                           np.array(
                                             df['year']), 
                                              bins=bin1_ranges,            
                                              labels=bin1_names,ordered=False)
# view the binned features 
df

Unnamed: 0.1,Unnamed: 0,storm_id,name,landfall,hurricane,vmax,mslp,init_date,final_date,year,status,category,start_lat,start_lon,mon,month,year_bin_range,year_range,cat_bin_range,Hu_Category,Year_bin_range,Year_range
0,0,AL011851,UNNAMED,1,1,80.0,,1851-06-25,1851-06-28,1851,,1,28.0,-94.8,Jun,6,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2,"(1850, 1860]",1860s
1,1,AL021851,UNNAMED,0,1,80.0,,1851-07-05,1851-07-05,1851,,1,22.2,-97.6,Jul,7,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2,"(1850, 1860]",1860s
2,2,AL031851,UNNAMED,0,0,50.0,,1851-07-10,1851-07-10,1851,,0,12.0,-60.0,Jul,7,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm,"(1850, 1860]",1860s
3,3,AL041851,UNNAMED,1,1,100.0,,1851-08-16,1851-08-27,1851,,3,13.4,-48.0,Aug,8,"(1850, 1870]",1851-1870,"(2, 4]",Cat 3-4,"(1850, 1860]",1860s
4,4,AL051851,UNNAMED,0,0,50.0,,1851-09-13,1851-09-16,1851,,0,32.5,-73.5,Sep,9,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm,"(1850, 1860]",1860s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1919,26,AL272020,EPSILON,0,1,100.0,952.0,2020-10-19,2020-10-26,2020,HU,3,25.6,-55.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4,"(2010, 2020]",2020s
1920,27,AL282020,ZETA,0,1,90.0,973.0,2020-10-24,2020-10-29,2020,HU,2,18.4,-82.6,Oct,10,"(2010, 2020]",2010-2020,"(0, 2]",Cat 1-2,"(2010, 2020]",2020s
1921,28,AL292020,ETA,0,1,130.0,923.0,2020-10-31,2020-11-13,2020,HU,4,14.9,-72.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4,"(2010, 2020]",2020s
1922,29,AL302020,THETA,0,0,60.0,989.0,2020-11-10,2020-11-15,2020,TS,0,28.8,-41.0,Nov,11,"(2010, 2020]",2010-2020,"(-1, 0]",Tropical Storm,"(2010, 2020]",2020s


In [0]:
# dfw = df.groupby(['Year_range'])['vmax'].agg('mean').reset_index().rename(columns={'mean':'AWS'})
# dfw= dfw.dropna()
# dfw['vmax'] = dfw['vmax'].astype('int')
# dfw

# dfwm = df.groupby(['Year_range'])['vmax'].agg('max').reset_index().rename(columns={'mean':'AWS'})
# dfwm= dfwm.dropna()
# dfwm['vmax'] = dfwm['vmax'].astype('int')
# dfwm

In [0]:
# fig=go.Figure()


# fig.add_trace(go.Scatter(x=dfwm['Year_range'], y=dfwm['vmax'], name='max hur wind speed',line = dict(color='red', width=3)))
                         


# fig.update_layout(title='Trend of Max Wind speeds in US by decades',
#                    xaxis_title='Years',
#                    yaxis_title='Max Wind Speed')

# fig.show()
# # fig.write_html("/dbfs/mnt/AFcovid19/APS/Charts/TrendMaxWind.html")




In [0]:
# fig=go.Figure()



# fig.add_trace(go.Scatter(x=dfw['Year_range'], y=dfw['vmax'], name='avg hur wind speed',
#                          line = dict(color='royalblue', width=3)))



# fig.update_layout(title='Trend of Avg Wind speeds in US by decades',
#                    xaxis_title='Years',
#                    yaxis_title='Avg Wind Speed')

# fig.show()

In [0]:
dfl = df.groupby(['Year_range'])['landfall'].agg('sum').reset_index().rename(columns={'sum':'landfalls'})
dfl= dfl.dropna()
dfl['landfall'] = dfl['landfall'].astype('int')
dfl

Unnamed: 0,Year_range,landfall
0,1850s,0
1,1860s,22
2,1870s,18
3,1880s,37
4,1890s,35
5,1900s,40
6,1910s,36
7,1920s,32
8,1930s,20
9,1940s,36


In [0]:
df

Unnamed: 0.1,Unnamed: 0,storm_id,name,landfall,hurricane,vmax,mslp,init_date,final_date,year,status,category,start_lat,start_lon,mon,month,year_bin_range,year_range,cat_bin_range,Hu_Category,Year_bin_range,Year_range
0,0,AL011851,UNNAMED,1,1,80.0,,1851-06-25,1851-06-28,1851,,1,28.0,-94.8,Jun,6,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2,"(1850, 1860]",1860s
1,1,AL021851,UNNAMED,0,1,80.0,,1851-07-05,1851-07-05,1851,,1,22.2,-97.6,Jul,7,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2,"(1850, 1860]",1860s
2,2,AL031851,UNNAMED,0,0,50.0,,1851-07-10,1851-07-10,1851,,0,12.0,-60.0,Jul,7,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm,"(1850, 1860]",1860s
3,3,AL041851,UNNAMED,1,1,100.0,,1851-08-16,1851-08-27,1851,,3,13.4,-48.0,Aug,8,"(1850, 1870]",1851-1870,"(2, 4]",Cat 3-4,"(1850, 1860]",1860s
4,4,AL051851,UNNAMED,0,0,50.0,,1851-09-13,1851-09-16,1851,,0,32.5,-73.5,Sep,9,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm,"(1850, 1860]",1860s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1919,26,AL272020,EPSILON,0,1,100.0,952.0,2020-10-19,2020-10-26,2020,HU,3,25.6,-55.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4,"(2010, 2020]",2020s
1920,27,AL282020,ZETA,0,1,90.0,973.0,2020-10-24,2020-10-29,2020,HU,2,18.4,-82.6,Oct,10,"(2010, 2020]",2010-2020,"(0, 2]",Cat 1-2,"(2010, 2020]",2020s
1921,28,AL292020,ETA,0,1,130.0,923.0,2020-10-31,2020-11-13,2020,HU,4,14.9,-72.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4,"(2010, 2020]",2020s
1922,29,AL302020,THETA,0,0,60.0,989.0,2020-11-10,2020-11-15,2020,TS,0,28.8,-41.0,Nov,11,"(2010, 2020]",2010-2020,"(-1, 0]",Tropical Storm,"(2010, 2020]",2020s


In [0]:
# dflc = df.groupby(['Year_range','category'])['landfall'].agg('sum').reset_index().rename(columns={'sum':'landfalls'})
# dflc= dflc.dropna()
# dflc['landfall'] = dflc['landfall'].astype('int')
# dflc

grouped_df=df.groupby(['Year_range','Hu_Category'])
function_dict= {'landfall':'sum','storm_id':'count'}
grouped_df = grouped_df.aggregate(function_dict).reset_index().rename(columns={'storm_id':'storms'})
grouped_df['landfall'] = grouped_df['landfall'].fillna(0)
grouped_df['landfall'] =grouped_df['landfall'].astype('int')
grouped_df=grouped_df[~grouped_df['Year_range'].isin(['1850s'])]
grouped_df=grouped_df[~grouped_df['Hu_Category'].isin(['Tropical Depression','Tropical Storm'])]
grouped_df 

Unnamed: 0,Year_range,Hu_Category,landfall,storms
7,1860s,Cat 1-2,12,35
8,1860s,Cat 3-4,7,10
9,1860s,Cat 5,0,0
12,1870s,Cat 1-2,11,48
13,1870s,Cat 3-4,1,5
14,1870s,Cat 5,0,0
17,1880s,Cat 1-2,14,40
18,1880s,Cat 3-4,10,14
19,1880s,Cat 5,0,0
22,1890s,Cat 1-2,17,42


In [0]:
dfly = grouped_df.groupby(['Year_range'])
function_dict= {'landfall':'sum','storms':'sum'}
dfly= dfly.aggregate(function_dict).reset_index()
dfly['landfall'] = dfly['landfall'].astype('int')
dfly['pct'] = dfly['landfall']/ dfly['storms']
dfly=dfly[~dfly['Year_range'].isin(['1850s'])]
format_dict ={'pct': '{:.2%}'}
dfly.style.format(format_dict)
dfly

Unnamed: 0,Year_range,landfall,storms,pct
1,1860s,19,45,0.422222
2,1870s,12,53,0.226415
3,1880s,24,54,0.444444
4,1890s,23,56,0.410714
5,1900s,23,51,0.45098
6,1910s,21,42,0.5
7,1920s,24,38,0.631579
8,1930s,14,39,0.358974
9,1940s,18,56,0.321429
10,1950s,31,58,0.534483


In [0]:

trace1 = (go.Bar(name='Total Hurricanes', y=dfly['Year_range'], x=dfly['storms'],text=dfly['storms'],textposition='auto',orientation='h'))
trace2 = (go.Bar(name='Total landfalls', y=dfly['Year_range'], x=dfly['landfall'],text=dfly['landfall'],textposition='auto',textfont_color="white",orientation='h'))  
data1 = [trace1,trace2]
fig1 = go.Figure(data=data1)
fig1.update_layout(barmode='group')
fig1.update_layout(title='Atlantic Hurricane Landfalls trending upwards over the years',
                   xaxis_title='Years',
                   yaxis_title='Number of Storms',yaxis=dict(autorange="reversed"))

fig1.update_layout(
    autosize=False,
    width=1700,
    height=1000
    
    
   
)

fig1.show()

In [0]:

fig = px.bar(grouped_df,x='Year_range', y="landfall", color="Hu_Category", title="Atlantic Hurricanes making landfall is trending upwards",category_orders={"Category":['0','1','2','3','4','5'],"year_range":['1860s','1870s','1880s','1890s','1900s','1910s','1920s','1930s','1940s','1950s','1960s','1970s','1980s','1990s','2000s','2010s','2020s']},color_discrete_sequence=["grey","chocolate","firebrick"],barmode='stack',text=grouped_df['landfall'])

fig.add_trace( go.Scatter(x=dfly['Year_range'],y=dfly['landfall'],name='Moving avg',
                         line = dict(color='blue', width=1.9)))
        
fig.show()

# fig.update_layout(yaxis={"ticksuffix":"%"})

In [0]:
df

Unnamed: 0.1,Unnamed: 0,storm_id,name,landfall,hurricane,vmax,mslp,init_date,final_date,year,status,category,start_lat,start_lon,mon,month,year_bin_range,year_range,cat_bin_range,Hu_Category,Year_bin_range,Year_range
0,0,AL011851,UNNAMED,1,1,80.0,,1851-06-25,1851-06-28,1851,,1,28.0,-94.8,Jun,6,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2,"(1850, 1860]",1860s
1,1,AL021851,UNNAMED,0,1,80.0,,1851-07-05,1851-07-05,1851,,1,22.2,-97.6,Jul,7,"(1850, 1870]",1851-1870,"(0, 2]",Cat 1-2,"(1850, 1860]",1860s
2,2,AL031851,UNNAMED,0,0,50.0,,1851-07-10,1851-07-10,1851,,0,12.0,-60.0,Jul,7,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm,"(1850, 1860]",1860s
3,3,AL041851,UNNAMED,1,1,100.0,,1851-08-16,1851-08-27,1851,,3,13.4,-48.0,Aug,8,"(1850, 1870]",1851-1870,"(2, 4]",Cat 3-4,"(1850, 1860]",1860s
4,4,AL051851,UNNAMED,0,0,50.0,,1851-09-13,1851-09-16,1851,,0,32.5,-73.5,Sep,9,"(1850, 1870]",1851-1870,"(-1, 0]",Tropical Storm,"(1850, 1860]",1860s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1919,26,AL272020,EPSILON,0,1,100.0,952.0,2020-10-19,2020-10-26,2020,HU,3,25.6,-55.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4,"(2010, 2020]",2020s
1920,27,AL282020,ZETA,0,1,90.0,973.0,2020-10-24,2020-10-29,2020,HU,2,18.4,-82.6,Oct,10,"(2010, 2020]",2010-2020,"(0, 2]",Cat 1-2,"(2010, 2020]",2020s
1921,28,AL292020,ETA,0,1,130.0,923.0,2020-10-31,2020-11-13,2020,HU,4,14.9,-72.4,Oct,10,"(2010, 2020]",2010-2020,"(2, 4]",Cat 3-4,"(2010, 2020]",2020s
1922,29,AL302020,THETA,0,0,60.0,989.0,2020-11-10,2020-11-15,2020,TS,0,28.8,-41.0,Nov,11,"(2010, 2020]",2010-2020,"(-1, 0]",Tropical Storm,"(2010, 2020]",2020s


In [0]:
dfc = df.groupby(['category'])['storm_id'].agg('count').reset_index().rename(columns={'storm_id':'Storms'})
# dfc= dfc.dropna()
# dfc['vmax'] = dfw['vmax'].astype('int')
dfc=dfc[~dfc['category'].isin(['-1'])]
dfc=dfc.replace([0, 1, 2, 3,4,5], ['Tropical Storm', 'CAT 1','CAT 2','CAT 3','CAT 4','CAT 5'])

dfc

Unnamed: 0,category,Storms
1,Tropical Storm,735
2,CAT 1,364
3,CAT 2,250
4,CAT 3,162
5,CAT 4,125
6,CAT 5,37


In [0]:
dfc1=dfc[dfc['category']!='Tropical Storm']
dfc1

Unnamed: 0,category,Storms
2,CAT 1,364
3,CAT 2,250
4,CAT 3,162
5,CAT 4,125
6,CAT 5,37


In [0]:
fig = px.bar(dfc, x="Storms", y="category", orientation='h',
             hover_data=[ "category"],
             height=400,
             title='Atlantic Hurricanes By Category',category_orders={"category":['0','1','2','3','4','5']},text="Storms",
             color = "Storms",  color_continuous_scale='Bluered_r')
fig.update_layout(
    yaxis = dict(
        
        tick0 = 0,
        dtick = 1
    )
)
fig.show()

# color_continuous_scale='Bluered_r'

In [0]:
dfl=pd.read_csv('/dbfs/mnt/AFcovid19/AFClimate/atlantic_hurricane_USLF.csv')


In [0]:
dfl = dfl.groupby(['category'])['storm_id'].agg('count').reset_index().rename(columns={'storm_id':'Storms'})
# dfc= dfc.dropna()
# dfc['vmax'] = dfw['vmax'].astype('int')
dfcl=dfl[~dfl['category'].isin(['-1'])]
dfl=dfl.replace([0, 1, 2, 3,4,5], ['Tropical Storm', 'CAT 1','CAT 2','CAT 3','CAT 4','CAT 5'])

dfl

Unnamed: 0,category,Storms
0,CAT 1,69
1,CAT 2,26
2,CAT 3,29
3,CAT 4,38
4,CAT 5,20


In [0]:
fig = px.bar(dfl, x="Storms", y="category", orientation='h',
             hover_data=[ "category"],
             height=400,
             title='Atlantic Hurricanes making landfall By Category',category_orders={"category":['0','1','2','3','4','5']},text="Storms",
             color = "Storms",  color_continuous_scale='Bluered_r')
fig.update_layout(
    yaxis = dict(
        
        tick0 = 0,
        dtick = 1
    )
)
fig.show()

In [0]:
trace1 = (go.Bar(name='Total Storms', y=dfc1['category'], x=dfc1['Storms'],text=dfc1['Storms'],textposition='auto',orientation='h'))
trace2 = (go.Bar(name='US landfalls', y=dfl['category'], x=dfl['Storms'],text=dfl['Storms'],textposition='auto',textfont_color="white",orientation='h'))  
data1 = [trace1,trace2]
fig1 = go.Figure(data=data1)
fig1.update_layout(barmode='group')
fig1.update_layout(title='Atlantic Hurricane Total Vs Landfalls By category',
                   xaxis_title='Category',
                   yaxis_title='Number of Storms')
fig1.show()