In [15]:
import numpy as np 
import pandas as pd 
import math
from scipy import stats
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_rows', 500)

ModuleNotFoundError: No module named 'cartopy'

In [16]:
# import
atlantic=pd.read_csv('./atlantic.csv', usecols=['ID', 'Date', 'Latitude', 'Longitude', 'Maximum Wind'])
pacific=pd.read_csv('./pacific.csv', usecols=['ID', 'Date', 'Latitude', 'Longitude', 'Maximum Wind'])

In [17]:
# types and trim
atlantic = atlantic.astype({'Date': str})
pacific = pacific.astype({'Date': str})
atlantic['Latitude'] = atlantic.Latitude.str[:-1].astype(float)
atlantic['Longitude'] = atlantic.Longitude.str[:-1].astype(float)
pacific['Latitude'] = pacific.Latitude.str[:-1].astype(float)
pacific['Longitude'] = pacific.Longitude.str[:-1].astype(float)

In [18]:
# group by year and ID with means of Lat, Long and Max Wind for every unique ID
atlantic = atlantic.groupby([atlantic.Date.str[:4], 'ID']).mean().round(2)
pacific = pacific.groupby([pacific.Date.str[:4], 'ID']).mean().round(2)
print(atlantic)
#print(pacific)

               Latitude  Longitude  Maximum Wind
Date ID                                         
1851 AL011851     28.84      97.91         60.71
     AL021851     22.20      97.60         80.00
     AL031851     12.00      60.00         50.00
     AL041851     26.44      72.63         65.10
     AL051851     32.50      73.50         50.00
...                 ...        ...           ...
2015 AL082015     31.87      61.12         36.92
     AL092015     15.37      44.78         24.47
     AL102015     17.90      41.69         31.76
     AL112015     33.25      47.81         59.14
     AL122015     32.99      63.96         53.00

[1816 rows x 3 columns]


In [None]:
# Initial summary stats

atlantic_count = atlantic.groupby('ID').size().count()
pacific_count = pacific.groupby('ID').size().count()
total_count = atlantic_count + pacific_count
combined = pd.concat([atlantic, pacific])
combined_groupby = combined.groupby('Date').size()
total_change = (combined_groupby.tail(5).sum() / combined_groupby.iloc[98:103].sum()).round(2)
atlantic_groupby = atlantic.groupby('Date').size()
atlantic_change = (atlantic_groupby.tail(5).sum() / atlantic_groupby.head(5).sum()).round(2)
pacific_groupby = pacific.groupby('Date').size()
pacific_change = (pacific_groupby.tail(5).sum() / pacific_groupby.head(5).sum()).round(2)

print(atlantic_change, pacific_change, total_change)

In [19]:
# plot: Hurricane frequency of occurrence by year

# values
atlantic_counts_by_year = atlantic.groupby('Date').size().to_frame('Count').reset_index()
pacific_counts_by_year = pacific.groupby('Date').size().to_frame('Count').reset_index()

# plot configuration
plt.figure(figsize=(20,6))
plt.rcParams.update({'font.size': 16})

# Atlantic
ax = plt.subplot(1,2,1)
ax.xaxis.set_major_locator(plt.MaxNLocator(10))
ax.set_facecolor('#89cff0')
ax.set_title('Atlantic')
x_values = np.array(atlantic_counts_by_year['Date'].tolist()).astype(np.int)
y_values = atlantic_counts_by_year['Count'].tolist()
x_label = 'Date'
y_label = 'Count'

plt.scatter(x_values, y_values,marker='o', alpha=1, s=80, c='blue', edgecolors='blue')
plt.xlabel(x_label)
plt.ylabel(y_label)

# regression line
slope, intercept, r_value, p_value, std_err = stats.linregress(x_values,y_values)
line = slope*x_values+intercept
plt.plot(x_values,y_values,'o', x_values, line, linewidth=5)

# Pacific
ax = plt.subplot(1,2,2)
ax.xaxis.set_major_locator(plt.MaxNLocator(10))
ax.set_facecolor('#89cff0')
ax.set_title('Pacific')
x_values = np.array(pacific_counts_by_year['Date'].tolist()).astype(np.int)
y_values = pacific_counts_by_year['Count'].tolist()
x_label = 'Date'
y_label = 'Count'

plt.scatter(x_values, y_values,marker='o', alpha=1, s=80, c='blue', edgecolors='blue')
plt.xlabel(x_label)
plt.ylabel(y_label)

# regression line
slope, intercept, r_value, p_value, std_err = stats.linregress(x_values,y_values)
line = slope*x_values+intercept
plt.plot(x_values,y_values,'o', x_values, line,linewidth=5)

plt.show()

NameError: name 'plt' is not defined

In [20]:
# plot:  Hurricane max wind by year

# values
atlantic_max_wind_by_year = atlantic.groupby('Date').mean().round(2).reset_index().drop([116])
pacific_max_wind_by_year = pacific.groupby('Date').mean().round(2).reset_index()

# plot configuration
plt.figure(figsize=(20,6))
plt.rcParams.update({'font.size': 16})

# Atlantic
ax = plt.subplot(1,2,1)
ax.xaxis.set_major_locator(plt.MaxNLocator(10))
ax.set_facecolor('#89cff0')
ax.set_title('Atlantic')
x_values = np.array(atlantic_max_wind_by_year['Date'].tolist()).astype(np.int)
y_values = atlantic_max_wind_by_year['Maximum Wind'].tolist()
x_label = 'Date'
y_label = 'Max Wind(avg)'

plt.scatter(x_values, y_values,marker='o', alpha=1, s=80, c='blue', edgecolors='blue')
plt.xlabel(x_label)
plt.ylabel(y_label)

# regression line
slope, intercept, r_value, p_value, std_err = stats.linregress(x_values,y_values)
line = slope*x_values+intercept
plt.plot(x_values,y_values,'o', x_values, line, linewidth=5)

# Pacific
ax = plt.subplot(1,2,2)
ax.xaxis.set_major_locator(plt.MaxNLocator(10))
ax.set_facecolor('#89cff0')
ax.set_title('Pacific')
x_values = np.array(pacific_max_wind_by_year['Date'].tolist()).astype(np.int)
y_values = pacific_max_wind_by_year['Maximum Wind'].tolist()
x_label = 'Date'
y_label = 'Max Wind(avg)'

plt.scatter(x_values, y_values,marker='o', alpha=1, s=80, c='blue', edgecolors='blue')
plt.xlabel(x_label)
plt.ylabel(y_label)

# regression line
slope, intercept, r_value, p_value, std_err = stats.linregress(x_values,y_values)
line = slope*x_values+intercept
plt.plot(x_values,y_values,'o', x_values, line,linewidth=5)

plt.show()

NameError: name 'plt' is not defined

In [21]:
# plot: Atlantic Hurricane location by year (Lat/Lng average)

# values
atlantic_location_avg = atlantic.groupby('Date')['Latitude', 'Longitude'].mean().round(2).reset_index()

# plot configuration
# Atlantic
ax = plt.axes(projection=ccrs.PlateCarree())
ax.stock_img()

atlantic_lon = atlantic_location_avg['Longitude'].iloc[0]
atlantic_lat = atlantic_location_avg['Latitude'].iloc[0]
atlantic_lon2 = atlantic_location_avg['Longitude'].iloc[-1]
atlantic_lat2 = atlantic_location_avg['Latitude'].iloc[-1]

ax.set_extent([-atlantic_lon - 40, -atlantic_lon + 40, atlantic_lat + 30, atlantic_lat - 30])
ax.set_title('Atlantic')

plt.plot([-atlantic_lon, -atlantic_lon2], [atlantic_lat, atlantic_lat2],
         color='red', linewidth=2, marker='o',
         transform=ccrs.Geodetic(),
         )

plt.plot([-atlantic_lon, -atlantic_lon2], [atlantic_lat, atlantic_lat2],
         color='gray', linestyle='--',
         transform=ccrs.PlateCarree(),
         )

plt.text(-atlantic_lon + 35, atlantic_lat - 4, atlantic_location_avg['Date'].iloc[0],
         horizontalalignment='right',
         transform=ccrs.Geodetic(), fontsize=10)

plt.text(-atlantic_lon2 - 34, atlantic_lat2 - 1, atlantic_location_avg['Date'].iloc[-1],
         horizontalalignment='left', transform=ccrs.Geodetic(), 
         fontsize=10)

plt.show()

NameError: name 'plt' is not defined

In [22]:
# plot: Pacific Hurricane location by year (Lat/Lng average)

# values
pacific_location_avg = pacific.groupby('Date')['Latitude', 'Longitude'].mean().round(2).reset_index()

# plot configuration
# Pacific
ax = plt.axes(projection=ccrs.PlateCarree())
ax.stock_img()

pacific_lon = pacific_location_avg['Longitude'].iloc[0]
pacific_lat = pacific_location_avg['Latitude'].iloc[0]
pacific_lon2 = pacific_location_avg['Longitude'].iloc[-1]
pacific_lat2 = pacific_location_avg['Latitude'].iloc[-1]

ax.set_extent([-pacific_lon - 40, -pacific_lon + 40, pacific_lat + 30, pacific_lat - 30])
ax.set_title('Pacific')

plt.plot([-pacific_lon, -pacific_lon2], [pacific_lat, pacific_lat2],
         color='red', linewidth=2, marker='o',
         transform=ccrs.Geodetic(),
         )

plt.plot([-pacific_lon, -pacific_lon2], [pacific_lat, pacific_lat2],
         color='gray', linestyle='--',
         transform=ccrs.PlateCarree(),
         )

plt.text(-pacific_lon + 10, pacific_lat - 5, pacific_location_avg['Date'].iloc[0],
         horizontalalignment='right',
         transform=ccrs.Geodetic(), fontsize=10)

plt.text(-pacific_lon2 - 10, pacific_lat2 - 3, pacific_location_avg['Date'].iloc[-1],
         horizontalalignment='left', transform=ccrs.Geodetic(), 
         fontsize=10)

plt.show()

NameError: name 'plt' is not defined

In [23]:
# Average bearing of location change by year - https://www.movable-type.co.uk/scripts/latlong.html

# Atlantic
readings = list(zip(atlantic_location_avg['Latitude'], atlantic_location_avg['Longitude']))

def bearings():
    bearings = []
    for idx, reading in enumerate(readings, start=1):
        lng2 = reading[1]
        lng1 = readings[idx-2][1]
        lat2 = reading[0]
        lat1 = readings[idx-2][0]
        lngDiff = -(lng2 - lng1)
        y = math.sin(lngDiff) * math.cos(lat2)
        x = (math.cos(lat1) * math.sin(lat2)) - (math.sin(lat1) * math.cos(lat2) * math.cos(lngDiff))
        brng = round(math.degrees(math.atan2(y, x)), 2)
        n_brng = round((brng + 360) % 360, 2)
        bearings.append(n_brng)
        
    return bearings
        
#avg_location_differences()
bearings = bearings()

# plot: bearing changes

# Compute areas and colors
N = 150
r = 2 * np.random.rand(N)
theta = 2 * np.pi * np.random.rand(N)
area = 200 * r**2
colors = theta

fig = plt.figure()
ax = fig.add_subplot(111, projection='polar')
ax.set_title('Atlantic yearly bearing change of hurricane onset', y=1.2, fontsize=14)
c = ax.scatter(bearings, bearings,s=80, cmap='hsv', alpha=0.75)


NameError: name 'plt' is not defined

In [24]:
# Pacific
readings = list(zip(pacific_location_avg['Latitude'], pacific_location_avg['Longitude']))

def bearings():
    bearings = []
    for idx, reading in enumerate(readings, start=1):
        lng2 = reading[1]
        lng1 = readings[idx-2][1]
        lat2 = reading[0]
        lat1 = readings[idx-2][0]
        lngDiff = -(lng2 - lng1)
        y = math.sin(lngDiff) * math.cos(lat2)
        x = (math.cos(lat1) * math.sin(lat2)) - (math.sin(lat1) * math.cos(lat2) * math.cos(lngDiff))
        brng = round(math.degrees(math.atan2(y, x)), 2)
        n_brng = round((brng + 360) % 360, 2)
        bearings.append(n_brng)
        
    return bearings
        
#avg_location_differences()
bearings = bearings()

# plot: bearing changes

# Compute areas and colors
N = 150
r = 2 * np.random.rand(N)
theta = 2 * np.pi * np.random.rand(N)
area = 200 * r**2
colors = theta

fig = plt.figure()
ax = fig.add_subplot(111, projection='polar')
ax.set_title('Pacific yearly bearing change of hurricane onset', y=1.2, fontsize=14)
c = ax.scatter(bearings, bearings,s=80, cmap='hsv', alpha=0.75)

NameError: name 'plt' is not defined

In [25]:
# Summary statistics and plots

# Total data
total_data = pd.concat([atlantic, pacific])

# Central tendency measures for max wind

# values
max_wind_data = total_data.groupby('Date')['Maximum Wind'].mean().reset_index()
aggregate_max_wind_data = [x for x in max_wind_data['Maximum Wind'].tolist() if x >= 0]
mean = np.mean(aggregate_max_wind_data) 
std = np.std(aggregate_max_wind_data)
x_values = np.arange(-2.5,3.0,.5)

def generate_values(data):
    values = {el:0 for el in x_values}
    for reading in data:
        stds = (reading - mean) / std
        stds = round(stds/.5) * .5
        values[stds] += 1
    return values

y_values = generate_values(aggregate_max_wind_data)

# plot: Central tendency measures of max wind 

# plot configuration
plt.figure(figsize=(20,6))
plt.rcParams.update({'font.size': 16})

ax = plt.subplot(1,2,1)
ax.set_title('Total Hurricane Maximum Wind Distribution', fontsize=18)
x_label = 'standard deviation (x̄={}, σ={})'.format(mean.round(2), std.round(2))
y_label = 'frequency'

plt.xlabel(x_label)
plt.ylabel(y_label)

plt.bar(x_values, y_values.values(), edgecolor='red', alpha=.9)

NameError: name 'plt' is not defined