# Rapidly intensifying hurricanes - Extraction

*By Aric Chokey and Yiran Zhu*

A look at hurricanes that have "rapidly intensified" as defined by the National Weather Service.

In [1]:
from datetime import datetime, timedelta

import pandas as pd
import numpy as np

### 1. Load in all the storm data. 

We'll use the raw download of the data from here. The spreadhseet file contains a subhead, the table hears and the unit measurement definitions. So we'll skip the first and third rows by setting the header to `header=1` and `skiprows=[2]`.

>NOTE: We can't open the file in Excel because the program will coerce the dates pre-1901 to the wrong numbers. 

In [2]:
all_storms_df = pd.read_csv('Allstorms.ibtracs_all.v03r10.csv',
                header=1,
                skiprows=[2],
                usecols=[
                   'Serial_Num',
                   'Season',
                   'Basin',
                   'Sub_basin',
                   'Name',
                   'ISO_time',
                   'Latitude',
                   'Longitude',
                   'hurdat_atl_wind'
                ],
                dtype={'hurdat_atl_wind': np.int32},
                low_memory = False)
all_storms_df.head()

Unnamed: 0,Serial_Num,Season,Basin,Sub_basin,Name,ISO_time,Latitude,Longitude,hurdat_atl_wind
0,1842298N11080,1842,NI,BB,NOT NAMED,1842-10-25 06:00:00,-999.0,-999.0,-999
1,1842298N11080,1842,NI,BB,NOT NAMED,1842-10-25 12:00:00,-999.0,-999.0,-999
2,1842298N11080,1842,NI,AS,NOT NAMED,1842-10-25 18:00:00,-999.0,-999.0,-999
3,1842298N11080,1842,NI,AS,NOT NAMED,1842-10-26 00:00:00,-999.0,-999.0,-999
4,1842298N11080,1842,NI,AS,NOT NAMED,1842-10-26 06:00:00,-999.0,-999.0,-999


### 2. Filter data to get storms with wind measurements and those in the North Atlantic

In [3]:
all_storms_df = all_storms_df[all_storms_df["hurdat_atl_wind"] > 0]
all_storms_df.head()


Unnamed: 0,Serial_Num,Season,Basin,Sub_basin,Name,ISO_time,Latitude,Longitude,hurdat_atl_wind
291,1851175N26270,1851,,GM,UNNAMED,1851-06-25 00:00:00,28.0,-94.8,80
292,1851175N26270,1851,,GM,UNNAMED,1851-06-25 06:00:00,28.0,-95.4,80
293,1851175N26270,1851,,GM,UNNAMED,1851-06-25 12:00:00,28.0,-96.0,80
294,1851175N26270,1851,,GM,UNNAMED,1851-06-25 18:00:00,28.1,-96.5,80
296,1851175N26270,1851,,,UNNAMED,1851-06-26 00:00:00,28.2,-97.0,70


How many storms are we looking at in this data now?

In [4]:
all_storms_df.to_csv("all_storms.csv")

print(all_storms_df.Serial_Num.nunique(), " storms incuded.")


1812  storms incuded.


### 3. Convert knots to miles per hour.

In [5]:
def knots_to_mph(speed):
    return speed * 1.15077945

In [6]:
all_storms_df['wind_speed_mph'] = all_storms_df["hurdat_atl_wind"].apply(knots_to_mph)
all_storms_df.describe()

Unnamed: 0,Season,Latitude,Longitude,hurdat_atl_wind,wind_speed_mph
count,48531.0,48531.0,48531.0,48531.0,48531.0
mean,1950.287981,26.773677,-65.737384,52.723249,60.672832
std,44.980657,20.137629,25.094726,24.61411,28.325412
min,1851.0,-999.0,-999.0,10.0,11.507794
25%,1912.0,19.1,-80.7,35.0,40.277281
50%,1957.0,26.4,-67.5,45.0,51.785075
75%,1990.0,33.2,-52.2,70.0,80.554561
max,2016.0,80.3,28.0,165.0,189.878609


### 4. Find wind speed difference for 24-hour periods.

Wind measures are set at every 6 hour mark, so we can look at the difference between a record and the record 4 steps behind it.

In [7]:
grouped_storms = all_storms_df.groupby(['Serial_Num', 'Season', 'Name', 'Sub_basin'])
len(grouped_storms)


# Disable copy warning.
pd.options.mode.chained_assignment = None


calculated_df = pd.DataFrame([])

for storm_id, records in grouped_storms:
    if len(records) == 1:
        continue
    else:
        records['wind_diff'] = records['wind_speed_mph'].diff(periods=4)
        calculated_df = calculated_df.append(records)


calculated_df.head()

Unnamed: 0,Serial_Num,Season,Basin,Sub_basin,Name,ISO_time,Latitude,Longitude,hurdat_atl_wind,wind_speed_mph,wind_diff
291,1851175N26270,1851,,GM,UNNAMED,1851-06-25 00:00:00,28.0,-94.8,80,92.062356,
292,1851175N26270,1851,,GM,UNNAMED,1851-06-25 06:00:00,28.0,-95.4,80,92.062356,
293,1851175N26270,1851,,GM,UNNAMED,1851-06-25 12:00:00,28.0,-96.0,80,92.062356,
294,1851175N26270,1851,,GM,UNNAMED,1851-06-25 18:00:00,28.1,-96.5,80,92.062356,
296,1851175N26270,1851,,,UNNAMED,1851-06-26 00:00:00,28.2,-97.0,70,80.554561,


### 5. Calculate if the wind speed changes qualify as "rapid intensification", which means it increased by at least 30 knots, or 34.5234 mph, over a 24-hour period.

In [8]:
def rapid_development(winddiff):
    if winddiff >= 34.5234:
        return 1
    else:
        return 0
    

In [9]:
calculated_df["rapid_increases"] = calculated_df["wind_diff"].apply(rapid_development)
calculated_df.to_csv("parsed_storms.csv")
calculated_df.head()

Unnamed: 0,Serial_Num,Season,Basin,Sub_basin,Name,ISO_time,Latitude,Longitude,hurdat_atl_wind,wind_speed_mph,wind_diff,rapid_increases
291,1851175N26270,1851,,GM,UNNAMED,1851-06-25 00:00:00,28.0,-94.8,80,92.062356,,0
292,1851175N26270,1851,,GM,UNNAMED,1851-06-25 06:00:00,28.0,-95.4,80,92.062356,,0
293,1851175N26270,1851,,GM,UNNAMED,1851-06-25 12:00:00,28.0,-96.0,80,92.062356,,0
294,1851175N26270,1851,,GM,UNNAMED,1851-06-25 18:00:00,28.1,-96.5,80,92.062356,,0
296,1851175N26270,1851,,,UNNAMED,1851-06-26 00:00:00,28.2,-97.0,70,80.554561,,0


### 6. Group the storms together, summing up the number of times they rapidly intensified.

In [10]:
intensified_storms = calculated_df.groupby(['Serial_Num', 'Season', 'Name', 'Sub_basin']).rapid_increases.max().reset_index().sort_values("rapid_increases", ascending=False)
intensified_storms = intensified_storms[intensified_storms["rapid_increases"] > 0]

In [11]:
intensified_storms.to_csv("intensified_storms.csv")