In [1]:
import numpy
import random
from matplotlib import pyplot
from matplotlib import colors
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.patches as mpatches
import datetime
import cartopy.crs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from PIL import Image, ImageDraw
import pandas
from sklearn.linear_model import LinearRegression, LogisticRegression
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook

In [2]:
# Use Pandas to Read Ten Min Sustained Windspeed Data File
Windspeed_File = pandas.read_csv("Ten_Min_Windspeed_Data_Output.csv")
Windspeed_File.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Bluff Head (Stanley),Central Pier,Cheung Chau,Cheung Chau Beach,Green Island,Hong Kong International Airport,Kai Tak,King's Park,...,Tai Mo Shan,Tai Po Kau,Tap Mun East,Tate's Cairn,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang
0,(2016,Nangka,45.0,48.0,60,70.0,74.0,48,37.0,32,...,,43.0,58.0,77.0,20.0,26.0,16.0,85.0,23.0,35.0
1,(2007,Higos,67.0,51.0,108,85.0,60.0,68,56.0,43,...,,54.0,76.0,89.0,28.0,39.0,,89.0,30.0,38.0
2,(2003,Sinlaku,50.0,47.0,77,66.0,54.0,57,45.0,38,...,87.0,45.0,62.0,67.0,23.0,33.0,30.0,73.0,22.0,33.0
3,(2002,Nuri,,40.0,57,58.0,52.0,46,40.0,31,...,83.0,39.0,61.0,66.0,19.0,30.0,30.0,60.0,19.0,29.0
4,(1914,Kajiki,38.0,41.0,63,60.0,59.0,40,32.0,30,...,74.0,42.0,53.0,62.0,18.0,26.0,15.0,68.0,17.0,28.0


In [3]:
# Use Pandas to Read Typhoon CPA Data File
Typhoon_CPA_File = pandas.read_csv("Typhoon_CPA_Data.csv")
Typhoon_CPA_File.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Highest Typhoon Signal Issued,Closest Point of Approach (km),CPA Bearing,CPA Intensity (km/h),CPA Radius of Maximum Winds (km),CPA Date,CPA Time
0,(2016,Nangka,8,530,SSW,85,130,13/10/2020,17:00
1,(2007,Higos,9,80,WSW,130,28,19/8/2020,05:00
2,(2003,Sinlaku,3,580,SW,55,250,1/8/2020,10:00
3,(2002,Nuri,3,190,SSW,75,60,14/6/2020,02:00
4,(1914,Kajiki,3,330,S,55,185,1/9/2019,21:00


In [4]:
# Use Pandas to Read Each Station's Windspeed Ratio to Average of HK Data File
Windspeed_Ratio_File = pandas.read_csv("Windspeed_Ratio_To_HK_Average_Output.csv")
Windspeed_Ratio_File.head()

Unnamed: 0,Quadrant,Bluff Head (Stanley),Central Pier,Cheung Chau,Cheung Chau Beach,Green Island,Hong Kong International Airport,Kai Tak,King's Park,Lau Fau Shan,...,Tai Mo Shan,Tai Po Kau,Tate's Cairn,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang,Unnamed: 21
0,Eastern,0.982317,1.020018,1.331294,1.410381,1.337948,1.09286,0.879183,0.664276,0.981458,...,1.789653,0.927412,1.468669,0.482496,0.687209,0.616674,1.456591,0.566153,0.722664,
1,Southern,1.032203,1.015392,1.370514,1.320824,1.327907,1.124406,0.907685,0.682041,0.917341,...,1.703046,0.902764,1.467367,0.495218,0.680184,0.627353,1.520535,0.574582,0.749956,
2,Western,1.0314,0.92726,1.398986,1.312252,1.441225,1.123915,0.876423,0.674758,0.958148,...,1.741025,0.856989,1.650741,0.523875,0.715327,0.644059,1.600562,0.58821,0.667983,
3,Northern,0.995543,0.98557,1.349687,1.210644,1.278806,1.166646,0.935403,0.681322,1.071375,...,1.792995,0.931712,1.463281,0.511684,0.829104,0.74086,1.415606,0.606081,0.734325,


In [5]:
# Drop Typhoon Code and Typhoon Name Columns From Windspeed File
Windspeed_File = Windspeed_File.drop(columns=["Typhoon Name", "Typhoon Code"])
Windspeed_File.head()

Unnamed: 0,Bluff Head (Stanley),Central Pier,Cheung Chau,Cheung Chau Beach,Green Island,Hong Kong International Airport,Kai Tak,King's Park,Lamma Island,Lau Fau Shan,...,Tai Mo Shan,Tai Po Kau,Tap Mun East,Tate's Cairn,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang
0,45.0,48.0,60,70.0,74.0,48,37.0,32,42.0,30,...,,43.0,58.0,77.0,20.0,26.0,16.0,85.0,23.0,35.0
1,67.0,51.0,108,85.0,60.0,68,56.0,43,58.0,38,...,,54.0,76.0,89.0,28.0,39.0,,89.0,30.0,38.0
2,50.0,47.0,77,66.0,54.0,57,45.0,38,48.0,42,...,87.0,45.0,62.0,67.0,23.0,33.0,30.0,73.0,22.0,33.0
3,,40.0,57,58.0,52.0,46,40.0,31,39.0,35,...,83.0,39.0,61.0,66.0,19.0,30.0,30.0,60.0,19.0,29.0
4,38.0,41.0,63,60.0,59.0,40,32.0,30,36.0,36,...,74.0,42.0,53.0,62.0,18.0,26.0,15.0,68.0,17.0,28.0


In [6]:
# Convert Windspeed Stations Columm Names Into List
Windspeed_Stations_List = list(Windspeed_File.columns)
print (Windspeed_Stations_List)
print ("")
#
# Create Array of Typhoons Windspeeds for All Typhoons at All Stations
All_Windspeed_Array = Windspeed_File.values.ravel('C')
print (All_Windspeed_Array)
#

['Bluff Head (Stanley)', 'Central Pier', 'Cheung Chau', 'Cheung Chau Beach', 'Green Island', 'Hong Kong International Airport', 'Kai Tak', "King's Park", 'Lamma Island', 'Lau Fau Shan', 'Ngong Ping', 'North Point', 'Peng Chau', 'Ping Chau', 'Sai Kung', 'Sha Chau', 'Sha Lo Wan', 'Sha Tin', 'Shek Kong', 'Star Ferry (Kowloon)', 'Ta Kwu Ling', 'Tai Mei Tuk', 'Tai Mo Shan', 'Tai Po Kau', 'Tap Mun East', "Tate's Cairn", 'Tseung Kwan O', 'Tsing Yi Shell Oil Depot', 'Tuen Mun Government Offices', 'Waglan Island', 'Wetland Park', 'Wong Chuk Hang']

[45. 48. 60. ... 41. nan 15.]


In [7]:
# Create DataFrame Containing Each Variable for All Typhoons at All Stations
All_Typhoon_Code_List = []
All_Typhoon_Name_List = []
All_Windspeed_Stations_List = []
All_Intensity_List = []
All_Distance_List = []
All_R_Max_List = []
All_Bearing_List = []
for i in range(len(Typhoon_CPA_File)):
    for j in range(len(Windspeed_Stations_List)):
        All_Typhoon_Code_List.append(Typhoon_CPA_File["Typhoon Code"][i])
        All_Typhoon_Name_List.append(Typhoon_CPA_File["Typhoon Name"][i])
        All_Windspeed_Stations_List.append(Windspeed_Stations_List[j])
        All_Bearing_List.append(Typhoon_CPA_File["CPA Bearing"][i])
        All_Intensity_List.append(Typhoon_CPA_File["CPA Intensity (km/h)"][i])
        All_Distance_List.append(Typhoon_CPA_File["Closest Point of Approach (km)"][i])
        All_R_Max_List.append(Typhoon_CPA_File["CPA Radius of Maximum Winds (km)"][i])
Main_DataFrame = pandas.DataFrame({"Typhoon Code": All_Typhoon_Code_List, "Typhoon Name": All_Typhoon_Name_List, \
"Windspeed Station": All_Windspeed_Stations_List, "Bearing": All_Bearing_List, "Intensity": All_Intensity_List, \
"Distance": All_Distance_List, "Radius of Max Winds": All_R_Max_List, "Windspeed": All_Windspeed_Array})
print (len(Main_DataFrame))
Main_DataFrame.head()

3360


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Intensity,Distance,Radius of Max Winds,Windspeed
0,(2016,Nangka,Bluff Head (Stanley),SSW,85,530,130,45.0
1,(2016,Nangka,Central Pier,SSW,85,530,130,48.0
2,(2016,Nangka,Cheung Chau,SSW,85,530,130,60.0
3,(2016,Nangka,Cheung Chau Beach,SSW,85,530,130,70.0
4,(2016,Nangka,Green Island,SSW,85,530,130,74.0


In [8]:
# Include Quadrant of CPA Bearing Into DataFrame
Directions_Circle = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
All_Quadrant_List = []
Colour_Code_Primary_List = []
Colour_Code_Secondary_List = []
for i in range(len(Main_DataFrame)):
    if Main_DataFrame["Bearing"][i] == Directions_Circle[2] or Main_DataFrame["Bearing"][i] == Directions_Circle[3] or \
    Main_DataFrame["Bearing"][i] == Directions_Circle[4] or Main_DataFrame["Bearing"][i] == Directions_Circle[5]:
        All_Quadrant_List.append("Eastern")
        Colour_Code_Primary_List.append("orange")
        Colour_Code_Secondary_List.append("darkorange")
    elif Main_DataFrame["Bearing"][i] == Directions_Circle[6] or Main_DataFrame["Bearing"][i] == Directions_Circle[7] or \
    Main_DataFrame["Bearing"][i] == Directions_Circle[8] or Main_DataFrame["Bearing"][i] == Directions_Circle[9]:
        All_Quadrant_List.append("Southern")
        Colour_Code_Primary_List.append("limegreen")
        Colour_Code_Secondary_List.append("darkgreen")
    elif Main_DataFrame["Bearing"][i] == Directions_Circle[10] or Main_DataFrame["Bearing"][i] == Directions_Circle[11] or \
    Main_DataFrame["Bearing"][i] == Directions_Circle[12] or Main_DataFrame["Bearing"][i] == Directions_Circle[13]:
        All_Quadrant_List.append("Western")
        Colour_Code_Primary_List.append("deepskyblue")
        Colour_Code_Secondary_List.append("mediumblue")
    elif Main_DataFrame["Bearing"][i] == Directions_Circle[14] or Main_DataFrame["Bearing"][i] == Directions_Circle[15] or \
    Main_DataFrame["Bearing"][i] == Directions_Circle[0] or Main_DataFrame["Bearing"][i] == Directions_Circle[1]:
        All_Quadrant_List.append("Northern")
        Colour_Code_Primary_List.append("red")
        Colour_Code_Secondary_List.append("firebrick")
    else:
        All_Quadrant_List.append(numpy.nan)
        Colour_Code_Primary_List.append("gray")
        Colour_Code_Secondary_List.append("dimgray")
Main_DataFrame["Quadrant"] = All_Quadrant_List
Main_DataFrame["Colour Code 1"] = Colour_Code_Primary_List
Main_DataFrame["Colour Code 2"] = Colour_Code_Secondary_List
Main_DataFrame.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Intensity,Distance,Radius of Max Winds,Windspeed,Quadrant,Colour Code 1,Colour Code 2
0,(2016,Nangka,Bluff Head (Stanley),SSW,85,530,130,45.0,Southern,limegreen,darkgreen
1,(2016,Nangka,Central Pier,SSW,85,530,130,48.0,Southern,limegreen,darkgreen
2,(2016,Nangka,Cheung Chau,SSW,85,530,130,60.0,Southern,limegreen,darkgreen
3,(2016,Nangka,Cheung Chau Beach,SSW,85,530,130,70.0,Southern,limegreen,darkgreen
4,(2016,Nangka,Green Island,SSW,85,530,130,74.0,Southern,limegreen,darkgreen


In [9]:
# Include Windspeed Ratio to Average of HK Into DataFrame
All_Windspeed_Ratio_List = []
for i in range(len(Main_DataFrame)):
    if Main_DataFrame["Windspeed Station"][i] != "Lamma Island" and Main_DataFrame["Windspeed Station"][i] != "Tap Mun East":
        if Main_DataFrame["Quadrant"][i] == "Eastern":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DataFrame["Windspeed Station"][i]][0])
        elif Main_DataFrame["Quadrant"][i] == "Southern":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DataFrame["Windspeed Station"][i]][1])
        elif Main_DataFrame["Quadrant"][i] == "Western":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DataFrame["Windspeed Station"][i]][2])
        elif Main_DataFrame["Quadrant"][i] == "Northern":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DataFrame["Windspeed Station"][i]][3])
        else:
            All_Windspeed_Ratio_List.append(numpy.nan)
    else:
        All_Windspeed_Ratio_List.append(numpy.nan)
Main_DataFrame["Windspeed Ratio to HK Average"] = All_Windspeed_Ratio_List
Main_DataFrame.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Intensity,Distance,Radius of Max Winds,Windspeed,Quadrant,Colour Code 1,Colour Code 2,Windspeed Ratio to HK Average
0,(2016,Nangka,Bluff Head (Stanley),SSW,85,530,130,45.0,Southern,limegreen,darkgreen,1.032203
1,(2016,Nangka,Central Pier,SSW,85,530,130,48.0,Southern,limegreen,darkgreen,1.015392
2,(2016,Nangka,Cheung Chau,SSW,85,530,130,60.0,Southern,limegreen,darkgreen,1.370514
3,(2016,Nangka,Cheung Chau Beach,SSW,85,530,130,70.0,Southern,limegreen,darkgreen,1.320824
4,(2016,Nangka,Green Island,SSW,85,530,130,74.0,Southern,limegreen,darkgreen,1.327907


In [10]:
# Reorder DataFrame Columns Positions
Main_DataFrame_Columns = list(Main_DataFrame.columns)
Main_DataFrame_Columns = Main_DataFrame_Columns[:4] + Main_DataFrame_Columns[8:11] + Main_DataFrame_Columns[4:8] + \
Main_DataFrame_Columns[11:12]
print (Main_DataFrame_Columns)
Main_DataFrame = Main_DataFrame[Main_DataFrame_Columns]
Main_DataFrame.head()

['Typhoon Code', 'Typhoon Name', 'Windspeed Station', 'Bearing', 'Quadrant', 'Colour Code 1', 'Colour Code 2', 'Intensity', 'Distance', 'Radius of Max Winds', 'Windspeed', 'Windspeed Ratio to HK Average']


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed,Windspeed Ratio to HK Average
0,(2016,Nangka,Bluff Head (Stanley),SSW,Southern,limegreen,darkgreen,85,530,130,45.0,1.032203
1,(2016,Nangka,Central Pier,SSW,Southern,limegreen,darkgreen,85,530,130,48.0,1.015392
2,(2016,Nangka,Cheung Chau,SSW,Southern,limegreen,darkgreen,85,530,130,60.0,1.370514
3,(2016,Nangka,Cheung Chau Beach,SSW,Southern,limegreen,darkgreen,85,530,130,70.0,1.320824
4,(2016,Nangka,Green Island,SSW,Southern,limegreen,darkgreen,85,530,130,74.0,1.327907


In [11]:
# Create DataFrame With No NaN Values
Main_DataFrame_No_NaN = Main_DataFrame[Main_DataFrame["Windspeed"] >= 0.0]
print (len(Main_DataFrame_No_NaN))
Main_DataFrame_No_NaN.head()

2788


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed,Windspeed Ratio to HK Average
0,(2016,Nangka,Bluff Head (Stanley),SSW,Southern,limegreen,darkgreen,85,530,130,45.0,1.032203
1,(2016,Nangka,Central Pier,SSW,Southern,limegreen,darkgreen,85,530,130,48.0,1.015392
2,(2016,Nangka,Cheung Chau,SSW,Southern,limegreen,darkgreen,85,530,130,60.0,1.370514
3,(2016,Nangka,Cheung Chau Beach,SSW,Southern,limegreen,darkgreen,85,530,130,70.0,1.320824
4,(2016,Nangka,Green Island,SSW,Southern,limegreen,darkgreen,85,530,130,74.0,1.327907


In [12]:
# Create Colour Legend
#Orange_Patch = mpatches.Patch(color='darkorange', label='Eastern Quadrant')
#Green_Patch = mpatches.Patch(color='limegreen', label='Southern Quadrant')
#Blue_Patch = mpatches.Patch(color='deepskyblue', label='Western Quadrant')
#Red_Patch = mpatches.Patch(color='red', label='Northern Quadrant')
#pyplot.legend(handles=[Orange_Patch, Green_Patch, Blue_Patch, Red_Patch], loc=2, fontsize=14)

In [13]:
# Calculate Multivariable Linear Regression
Linear_Reg_X_Variables = ["Intensity", "Distance", "Radius of Max Winds"]
Windspeed_Linear_Reg_Model = LinearRegression().fit(Main_DataFrame_No_NaN[Linear_Reg_X_Variables], \
Main_DataFrame_No_NaN["Windspeed"])
Windspeed_Linear_Reg_Slope = Windspeed_Linear_Reg_Model.coef_
Windspeed_Linear_Reg_Intercept = Windspeed_Linear_Reg_Model.intercept_
Windspeed_Linear_Reg_R_Squared = Windspeed_Linear_Reg_Model.score(Main_DataFrame_No_NaN[Linear_Reg_X_Variables], \
Main_DataFrame_No_NaN["Windspeed"])
Windspeed_Linear_Reg_Predictions = Windspeed_Linear_Reg_Model.predict(Main_DataFrame_No_NaN[Linear_Reg_X_Variables])
print ("Slopes (Intensity, Distance, Radius of Max Winds):")
print (Windspeed_Linear_Reg_Slope)
print ("y Intercept:")
print (Windspeed_Linear_Reg_Intercept)
print ("R Squared Value:")
print (Windspeed_Linear_Reg_R_Squared)
print ("Predictions:")
print (Windspeed_Linear_Reg_Predictions)

Slopes (Intensity, Distance, Radius of Max Winds):
[ 0.3144331  -0.0586741   0.16356595]
y Intercept:
20.25251209434451
R Squared Value:
0.26748656113419966
Predictions:
[37.14562288 37.14562288 37.14562288 ... 38.66958938 38.66958938
 38.66958938]


In [14]:
# Draw Scatter Plot of Windspeeds VS Different Variables of All Typhoons With Predicted Windspeeds
# x Axis: Typhoon Intensity at CPA
# y Axis: Typhoon Distance from Hong Kong at CPA
# Point Size: Typhoon Radius of Maximum Winds at CPA
# Colour: Quadrant of Typhoon With Respect to Hong Kong
# z Axis: 10-Minute Sustained Windspeed
Fig = pyplot.figure(figsize=(10,8))
Axes = Fig.gca(projection='3d')
R_Max_Point_Size = Main_DataFrame_No_NaN["Radius of Max Winds"] / 5
Axes.scatter(Main_DataFrame_No_NaN["Intensity"], Main_DataFrame_No_NaN["Distance"], Main_DataFrame_No_NaN["Windspeed"], \
s=R_Max_Point_Size, c='limegreen', marker='o', alpha=0.30, label='Actual Windspeed')
pyplot.title('3D Scatter Plot of Actual and Predicted Windspeeds VS Different Variables', fontsize=18)
Axes.set_xlabel('Typhoon Intensity at CPA (km/h)', fontsize=10)
Axes.set_ylabel('Distance Between Typhoon and HK at CPA (km)', fontsize=10)
Axes.set_zlabel('10-Minute Sustained Windspeed (km/h)', fontsize=10)
pyplot.grid('grid', linestyle="-", linewidth=0.8, color='silver')
#
# Plot Multivariable Regression Predictions Onto Graph
Axes.scatter(Main_DataFrame_No_NaN["Intensity"], Main_DataFrame_No_NaN["Distance"], Windspeed_Linear_Reg_Predictions, \
s=R_Max_Point_Size*2.5, c='darkgreen', marker='*', label='Predicted Windspeed')
pyplot.legend(loc=1, fontsize=10)
pyplot.show()

<IPython.core.display.Javascript object>

In [15]:
# Draw Scatter Plot of Actual Windspeeds VS Predicted Windspeeds By Linear Regression Model
Fig = pyplot.figure(figsize=(10,8))
Axes = Fig.gca()
pyplot.scatter(Windspeed_Linear_Reg_Predictions, Main_DataFrame_No_NaN["Windspeed"], c='limegreen', marker='o', s=18,\
alpha=0.30)
pyplot.title('Scatter Plot of Actual VS Predicted Windspeeds By Linear Regression', fontsize=20)
Axes.set_xlabel('Predicted Windspeed By Linear Regression Model (km/h)', fontsize=15)
Axes.set_ylabel('Actual Windspeed (km/h)', fontsize=15)
pyplot.rc('xtick', labelsize=10)
pyplot.rc('ytick', labelsize=10)
pyplot.xlim(0, 100)
pyplot.ylim(0, 200)
pyplot.grid('grid', linestyle="-", linewidth=0.8, color='silver')
pyplot.show()

<IPython.core.display.Javascript object>

In [16]:
# Use Windspeed Ratio to Average of Hong Kong to Calculate Windspeed Modified For Windspeed Station Geography
Windspeed_Modified_With_Ratio = Main_DataFrame["Windspeed"] / Main_DataFrame["Windspeed Ratio to HK Average"]
print (Windspeed_Modified_With_Ratio)
Main_DataFrame["Windspeed Modified With Ratio"] = Windspeed_Modified_With_Ratio
print (len(Main_DataFrame))
Main_DataFrame.head()

0       43.596063
1       47.272374
2       43.779197
3       52.997229
4       55.726786
          ...    
3355          NaN
3356    32.432026
3357    28.147911
3358          NaN
3359    20.756533
Length: 3360, dtype: float64
3360


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed,Windspeed Ratio to HK Average,Windspeed Modified With Ratio
0,(2016,Nangka,Bluff Head (Stanley),SSW,Southern,limegreen,darkgreen,85,530,130,45.0,1.032203,43.596063
1,(2016,Nangka,Central Pier,SSW,Southern,limegreen,darkgreen,85,530,130,48.0,1.015392,47.272374
2,(2016,Nangka,Cheung Chau,SSW,Southern,limegreen,darkgreen,85,530,130,60.0,1.370514,43.779197
3,(2016,Nangka,Cheung Chau Beach,SSW,Southern,limegreen,darkgreen,85,530,130,70.0,1.320824,52.997229
4,(2016,Nangka,Green Island,SSW,Southern,limegreen,darkgreen,85,530,130,74.0,1.327907,55.726786


In [17]:
# Update DataFrame With No NaN Values
Main_DataFrame_No_NaN = Main_DataFrame[Main_DataFrame["Windspeed"] >= 0.0]
print (len(Main_DataFrame_No_NaN))
Main_DataFrame_No_NaN.head()

2788


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed,Windspeed Ratio to HK Average,Windspeed Modified With Ratio
0,(2016,Nangka,Bluff Head (Stanley),SSW,Southern,limegreen,darkgreen,85,530,130,45.0,1.032203,43.596063
1,(2016,Nangka,Central Pier,SSW,Southern,limegreen,darkgreen,85,530,130,48.0,1.015392,47.272374
2,(2016,Nangka,Cheung Chau,SSW,Southern,limegreen,darkgreen,85,530,130,60.0,1.370514,43.779197
3,(2016,Nangka,Cheung Chau Beach,SSW,Southern,limegreen,darkgreen,85,530,130,70.0,1.320824,52.997229
4,(2016,Nangka,Green Island,SSW,Southern,limegreen,darkgreen,85,530,130,74.0,1.327907,55.726786


In [18]:
# Create DataFrame With No Windspeed Stations < 30 Data Points
Main_DataFrame_No_Few = Main_DataFrame_No_NaN[Main_DataFrame_No_NaN["Windspeed Ratio to HK Average"] >= 0.0]
print (len(Main_DataFrame_No_Few))
Main_DataFrame_No_Few.head()

2679


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed,Windspeed Ratio to HK Average,Windspeed Modified With Ratio
0,(2016,Nangka,Bluff Head (Stanley),SSW,Southern,limegreen,darkgreen,85,530,130,45.0,1.032203,43.596063
1,(2016,Nangka,Central Pier,SSW,Southern,limegreen,darkgreen,85,530,130,48.0,1.015392,47.272374
2,(2016,Nangka,Cheung Chau,SSW,Southern,limegreen,darkgreen,85,530,130,60.0,1.370514,43.779197
3,(2016,Nangka,Cheung Chau Beach,SSW,Southern,limegreen,darkgreen,85,530,130,70.0,1.320824,52.997229
4,(2016,Nangka,Green Island,SSW,Southern,limegreen,darkgreen,85,530,130,74.0,1.327907,55.726786


In [19]:
# Calculate Multivariable Linear Regression Using Windspeeds Modified With Windspeed Ratio
Linear_Reg_X_Variables = ["Intensity", "Distance", "Radius of Max Winds"]
Windspeed_With_Ratio_Linear_Reg_Model = LinearRegression().fit(Main_DataFrame_No_Few[Linear_Reg_X_Variables], \
Main_DataFrame_No_Few["Windspeed Modified With Ratio"])
Windspeed_With_Ratio_Linear_Reg_Slope = Windspeed_With_Ratio_Linear_Reg_Model.coef_
Windspeed_With_Ratio_Linear_Reg_Intercept = Windspeed_With_Ratio_Linear_Reg_Model.intercept_
Windspeed_With_Ratio_Linear_Reg_R_Squared = Windspeed_With_Ratio_Linear_Reg_Model.score(\
Main_DataFrame_No_Few[Linear_Reg_X_Variables], Main_DataFrame_No_Few["Windspeed Modified With Ratio"])
Windspeed_With_Ratio_Linear_Reg_Predictions = numpy.array(Windspeed_With_Ratio_Linear_Reg_Model.predict(\
Main_DataFrame_No_Few[Linear_Reg_X_Variables]) * Main_DataFrame_No_Few["Windspeed Ratio to HK Average"])
print ("Slopes:")
print (Windspeed_With_Ratio_Linear_Reg_Slope)
print ("y Intercept:")
print (Windspeed_With_Ratio_Linear_Reg_Intercept)
print ("R Squared Value:")
print (Windspeed_With_Ratio_Linear_Reg_R_Squared)
print ("Predictions:")
print (Windspeed_With_Ratio_Linear_Reg_Predictions)

Slopes:
[ 0.31358236 -0.0609879   0.16480835]
y Intercept:
21.6857452067664
R Squared Value:
0.48953653975210154
Predictions:
[38.64749008 38.01805297 51.31442991 ... 23.93795352 56.54169146
 28.05224052]


In [20]:
# Draw Scatter Plot of Windspeeds VS Different Variables of All Typhoons With Predictions Modified With Windspeed Ratios
# x Axis: Typhoon Intensity at CPA
# y Axis: Typhoon Distance from Hong Kong at CPA
# Point Size: Typhoon Radius of Maximum Winds at CPA
# Colour: Quadrant of Typhoon With Respect to Hong Kong
# z Axis: 10-Minute Sustained Windspeed
Fig = pyplot.figure(figsize=(10,8))
Axes = Fig.gca(projection='3d')
R_Max_Point_Size = Main_DataFrame_No_Few["Radius of Max Winds"] / 5
Axes.scatter(Main_DataFrame_No_Few["Intensity"], Main_DataFrame_No_Few["Distance"], Main_DataFrame_No_Few["Windspeed"], \
s=R_Max_Point_Size, c='limegreen', marker='o', alpha=0.30, label='Actual Windspeed')
pyplot.title('3D Scatter Plot of Actual and Predicted Windspeeds VS Different Variables With Windspeed Ratios', fontsize=14)
Axes.set_xlabel('Typhoon Intensity at CPA (km/h)', fontsize=10)
Axes.set_ylabel('Distance Between Typhoon and HK at CPA (km)', fontsize=10)
Axes.set_zlabel('10-Minute Sustained Windspeed (km/h)', fontsize=10)
pyplot.grid('grid', linestyle="-", linewidth=0.8, color='silver')
#
# Plot Multivariable Regression Predictions Onto Graph
Axes.scatter(Main_DataFrame_No_Few["Intensity"], Main_DataFrame_No_Few["Distance"], Windspeed_With_Ratio_Linear_Reg_Predictions, \
s=R_Max_Point_Size*2.5, c='darkgreen', marker='*', label='Predicted Windspeed Modified With Windspeed Ratios')
pyplot.legend(loc=1, fontsize=10)
pyplot.show()

<IPython.core.display.Javascript object>

In [21]:
# Draw Scatter Plot of Actual Windspeeds VS Predicted Windspeeds By Linear Regression With Windspeed Ratios to HK Average
Fig = pyplot.figure(figsize=(10,8))
Axes = Fig.gca()
pyplot.scatter(Windspeed_With_Ratio_Linear_Reg_Predictions, Main_DataFrame_No_Few["Windspeed"], c='limegreen', \
marker='o', s=18, alpha=0.30)
pyplot.title('Scatter Plot of Actual VS Predicted Windspeeds By Linear Regression With Windspeed Ratios', fontsize=14.5)
Axes.set_xlabel('Predicted Windspeed By Linear Regression Model With Windspeed Ratios to HK Average (km/h)', fontsize=13)
Axes.set_ylabel('Actual Windspeed (km/h)', fontsize=13)
pyplot.rc('xtick', labelsize=10)
pyplot.rc('ytick', labelsize=10)
pyplot.xlim(0, 200)
pyplot.ylim(0, 200)
pyplot.grid('grid', linestyle="-", linewidth=0.8, color='silver')
pyplot.show()

<IPython.core.display.Javascript object>

In [22]:
# Separate Data Set Rows According to Quadrant of Typhoon Bearing
Quadrants = ["Eastern", "Southern", "Western", "Northern"]
Main_DataFrame_E_Quadrant = Main_DataFrame_No_Few[Main_DataFrame_No_Few["Quadrant"] == Quadrants[0]]
Main_DataFrame_S_Quadrant = Main_DataFrame_No_Few[Main_DataFrame_No_Few["Quadrant"] == Quadrants[1]]
Main_DataFrame_W_Quadrant = Main_DataFrame_No_Few[Main_DataFrame_No_Few["Quadrant"] == Quadrants[2]]
Main_DataFrame_N_Quadrant = Main_DataFrame_No_Few[Main_DataFrame_No_Few["Quadrant"] == Quadrants[3]]
print (len(Main_DataFrame_E_Quadrant))
print (len(Main_DataFrame_S_Quadrant))
print (len(Main_DataFrame_W_Quadrant))
print (len(Main_DataFrame_N_Quadrant))
Main_DataFrame_E_Quadrant.head()

520
1007
769
383


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed,Windspeed Ratio to HK Average,Windspeed Modified With Ratio
288,(1826,Yutu,Bluff Head (Stanley),ESE,Eastern,orange,darkorange,90,400,90,15.0,0.982317,15.270017
289,(1826,Yutu,Central Pier,ESE,Eastern,orange,darkorange,90,400,90,28.0,1.020018,27.450508
290,(1826,Yutu,Cheung Chau,ESE,Eastern,orange,darkorange,90,400,90,41.0,1.331294,30.797111
291,(1826,Yutu,Cheung Chau Beach,ESE,Eastern,orange,darkorange,90,400,90,36.0,1.410381,25.52502
292,(1826,Yutu,Green Island,ESE,Eastern,orange,darkorange,90,400,90,50.0,1.337948,37.370653


In [23]:
# Calculate Multivariable Linear Regression Using Windspeeds Modified With Windspeed Ratio Separated By Quadrants
Linear_Reg_X_Variables = ["Intensity", "Distance", "Radius of Max Winds"]
Windspeed_Quadrant_Ratio_Linear_Reg_Model = []
Windspeed_Quadrant_Ratio_Linear_Reg_Slope = []
Windspeed_Quadrant_Ratio_Linear_Reg_Intercept = []
Windspeed_Quadrant_Ratio_Linear_Reg_R_Squared = []
Windspeed_Quadrant_Ratio_Linear_Reg_Predictions = []
for i in range(4):
    if i == 0:
        Windspeed_Quadrant_Ratio_Linear_Reg_Model.append(LinearRegression().fit(\
        Main_DataFrame_E_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_E_Quadrant["Windspeed Modified With Ratio"]))
    if i == 1:
        Windspeed_Quadrant_Ratio_Linear_Reg_Model.append(LinearRegression().fit(\
        Main_DataFrame_S_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_S_Quadrant["Windspeed Modified With Ratio"]))
    if i == 2:
        Windspeed_Quadrant_Ratio_Linear_Reg_Model.append(LinearRegression().fit(\
        Main_DataFrame_W_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_W_Quadrant["Windspeed Modified With Ratio"]))
    if i == 3:
        Windspeed_Quadrant_Ratio_Linear_Reg_Model.append(LinearRegression().fit(\
        Main_DataFrame_N_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_N_Quadrant["Windspeed Modified With Ratio"]))
    Windspeed_Quadrant_Ratio_Linear_Reg_Slope.append(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].coef_)
    Windspeed_Quadrant_Ratio_Linear_Reg_Intercept.append(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].intercept_)
    if i == 0:
        Windspeed_Quadrant_Ratio_Linear_Reg_R_Squared.append(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].score(\
        Main_DataFrame_E_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_E_Quadrant["Windspeed Modified With Ratio"]))
        Windspeed_Quadrant_Ratio_Linear_Reg_Predictions.append(numpy.array(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].\
        predict(Main_DataFrame_E_Quadrant[Linear_Reg_X_Variables]) * Main_DataFrame_E_Quadrant["Windspeed Ratio to HK Average"]))
    if i == 1:
        Windspeed_Quadrant_Ratio_Linear_Reg_R_Squared.append(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].score(\
        Main_DataFrame_S_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_S_Quadrant["Windspeed Modified With Ratio"]))
        Windspeed_Quadrant_Ratio_Linear_Reg_Predictions.append(numpy.array(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].\
        predict(Main_DataFrame_S_Quadrant[Linear_Reg_X_Variables]) * Main_DataFrame_S_Quadrant["Windspeed Ratio to HK Average"]))
    if i == 2:
        Windspeed_Quadrant_Ratio_Linear_Reg_R_Squared.append(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].score(\
        Main_DataFrame_W_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_W_Quadrant["Windspeed Modified With Ratio"]))
        Windspeed_Quadrant_Ratio_Linear_Reg_Predictions.append(numpy.array(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].\
        predict(Main_DataFrame_W_Quadrant[Linear_Reg_X_Variables]) * Main_DataFrame_W_Quadrant["Windspeed Ratio to HK Average"]))
    if i == 3:
        Windspeed_Quadrant_Ratio_Linear_Reg_R_Squared.append(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].score(\
        Main_DataFrame_N_Quadrant[Linear_Reg_X_Variables], Main_DataFrame_N_Quadrant["Windspeed Modified With Ratio"]))
        Windspeed_Quadrant_Ratio_Linear_Reg_Predictions.append(numpy.array(Windspeed_Quadrant_Ratio_Linear_Reg_Model[i].\
        predict(Main_DataFrame_N_Quadrant[Linear_Reg_X_Variables]) * Main_DataFrame_N_Quadrant["Windspeed Ratio to HK Average"]))
    print (Quadrants[i], "Quadrant:")
    print ("Slopes:")
    print (Windspeed_Quadrant_Ratio_Linear_Reg_Slope[i])
    print ("y Intercept:")
    print (Windspeed_Quadrant_Ratio_Linear_Reg_Intercept[i])
    print ("R Squared Value:")
    print (Windspeed_Quadrant_Ratio_Linear_Reg_R_Squared[i])
    print ("Predictions:")
    print (Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[i])
    print (len(Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[i]))
    print ("")

Eastern Quadrant:
Slopes:
[ 0.2242439  -0.08448614  0.03776574]
y Intercept:
33.88650220514145
R Squared Value:
0.6148614201619829
Predictions:
[ 23.2543103   24.14678714  31.51560123  33.38782388  31.67313348
  25.87117181  20.81282049  15.7253577   23.23396293  23.09935482
  28.58346926  12.2298908   22.00227905  27.73764502  21.8302295
  13.64975639  15.087334    23.69470853  13.74902953  29.70015148
  42.36629927  21.95454349  34.76766903  11.42209782  16.26824214
  14.59847966  34.48175851  13.40248752  17.10756361  51.32058334
  53.29021525  69.55265577  73.68451593  69.90031807  57.09580768
  45.93239166  34.70472872  51.27567817  94.53032719  50.97860778
  63.08165226  26.99048571  48.55744079  61.21497927  30.12402652
  52.29251042  30.34311518  65.54608928  93.49936265  48.45209185
  76.72973451  25.20774496  35.90283545  32.21779026  76.09875066
  29.57832198  37.75515733  55.02025287  57.13187435  74.56666428
  78.99638772  74.93938934  61.21180962  49.24362975  37.20657143

In [24]:
# Draw Scatter Plot of Windspeeds VS Different Variables of Typhoons By Quadrant With Predictions Modified With Windspeed Ratios# x Axis: Typhoon Intensity at CPA
# y Axis: Typhoon Distance from Hong Kong at CPA
# Point Size: Typhoon Radius of Maximum Winds at CPA
R_Max_Point_Size = [Main_DataFrame_E_Quadrant["Radius of Max Winds"] / 5, Main_DataFrame_S_Quadrant["Radius of Max Winds"] / 5, \
Main_DataFrame_W_Quadrant["Radius of Max Winds"] / 5, Main_DataFrame_N_Quadrant["Radius of Max Winds"] / 5]
# Colour: Quadrant of Typhoon With Respect to Hong Kong
# z Axis: 10-Minute Sustained Windspeed
Fig = pyplot.figure(figsize=(10,8))
Axes = Fig.gca(projection='3d')
Axes.scatter(Main_DataFrame_E_Quadrant["Intensity"], Main_DataFrame_E_Quadrant["Distance"], \
Main_DataFrame_E_Quadrant["Windspeed"],s=R_Max_Point_Size[0], \
c=Main_DataFrame_E_Quadrant["Colour Code 1"], marker='o', alpha=0.30, label='Eastern Quadrant Actual')
Axes.scatter(Main_DataFrame_S_Quadrant["Intensity"], Main_DataFrame_S_Quadrant["Distance"], \
Main_DataFrame_S_Quadrant["Windspeed"],s=R_Max_Point_Size[1], \
c=Main_DataFrame_S_Quadrant["Colour Code 1"], marker='o', alpha=0.30, label='Southern Quadrant Actual')
Axes.scatter(Main_DataFrame_W_Quadrant["Intensity"], Main_DataFrame_W_Quadrant["Distance"], \
Main_DataFrame_W_Quadrant["Windspeed"],s=R_Max_Point_Size[2], \
c=Main_DataFrame_W_Quadrant["Colour Code 1"], marker='o', alpha=0.30, label='Western Quadrant Actual')
Axes.scatter(Main_DataFrame_N_Quadrant["Intensity"], Main_DataFrame_N_Quadrant["Distance"], \
Main_DataFrame_N_Quadrant["Windspeed"],s=R_Max_Point_Size[3], \
c=Main_DataFrame_N_Quadrant["Colour Code 1"], marker='o', alpha=0.30, label='Northern Quadrant Actual')
pyplot.title('3D Scatter Plot of Actual and Predicted Windspeeds VS Different Variables With Windspeed Ratios By Quadrant', fontsize=12)
Axes.set_xlabel('Typhoon Intensity at CPA (km/h)', fontsize=10)
Axes.set_ylabel('Distance Between Typhoon and HK at CPA (km)', fontsize=10)
Axes.set_zlabel('10-Minute Sustained Windspeed (km/h)', fontsize=10)
pyplot.grid('grid', linestyle="-", linewidth=0.8, color='silver')
#
# Plot Multivariable Regression Predictions Onto Graph
Axes.scatter(Main_DataFrame_E_Quadrant["Intensity"], Main_DataFrame_E_Quadrant["Distance"], \
Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[0],s=R_Max_Point_Size[0], \
c=Main_DataFrame_E_Quadrant["Colour Code 2"], marker='*', alpha=0.30, label='Eastern Quadrant Predicted')
Axes.scatter(Main_DataFrame_S_Quadrant["Intensity"], Main_DataFrame_S_Quadrant["Distance"], \
Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[1],s=R_Max_Point_Size[1], \
c=Main_DataFrame_S_Quadrant["Colour Code 2"], marker='*', alpha=0.30, label='Southern Quadrant Predicted')
Axes.scatter(Main_DataFrame_W_Quadrant["Intensity"], Main_DataFrame_W_Quadrant["Distance"], \
Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[2],s=R_Max_Point_Size[2], \
c=Main_DataFrame_W_Quadrant["Colour Code 2"], marker='*', alpha=0.30, label='Western Quadrant Predicted')
Axes.scatter(Main_DataFrame_N_Quadrant["Intensity"], Main_DataFrame_N_Quadrant["Distance"], \
Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[3],s=R_Max_Point_Size[3], \
c=Main_DataFrame_N_Quadrant["Colour Code 2"], marker='*', alpha=0.30, label='Northern Quadrant Predicted')
pyplot.legend(loc=2, fontsize=10)
pyplot.show()

<IPython.core.display.Javascript object>

In [25]:
Fig = pyplot.figure(figsize=(10,8))
Axes = Fig.gca()
pyplot.scatter(Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[0], Main_DataFrame_E_Quadrant["Windspeed"], \
c=Main_DataFrame_E_Quadrant["Colour Code 1"], marker='o', s=18, alpha=0.30, label='Eastern Quadrant')
pyplot.scatter(Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[1], Main_DataFrame_S_Quadrant["Windspeed"], \
c=Main_DataFrame_S_Quadrant["Colour Code 1"], marker='o', s=18, alpha=0.30, label='Southern Quadrant')
pyplot.scatter(Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[2], Main_DataFrame_W_Quadrant["Windspeed"], \
c=Main_DataFrame_W_Quadrant["Colour Code 1"], marker='o', s=18, alpha=0.30, label='Western Quadrant')
pyplot.scatter(Windspeed_Quadrant_Ratio_Linear_Reg_Predictions[3], Main_DataFrame_N_Quadrant["Windspeed"], \
c=Main_DataFrame_N_Quadrant["Colour Code 1"], marker='o', s=18, alpha=0.30, label='Nortern Quadrant')
pyplot.title('Scatter Plot of Actual VS Predicted Windspeeds By Linear Regression With Windspeed Ratios By Quadrant', fontsize=13)
Axes.set_xlabel('Predicted Windspeed By Linear Regression Model With Windspeed Ratios to HK Average By Quadrant (km/h)', fontsize=12)
Axes.set_ylabel('Actual Windspeed (km/h)', fontsize=12)
pyplot.rc('xtick', labelsize=10)
pyplot.rc('ytick', labelsize=10)
pyplot.xlim(0, 200)
pyplot.ylim(0, 200)
pyplot.grid('grid', linestyle="-", linewidth=0.8, color='silver')
pyplot.legend(loc=2, fontsize=14)
pyplot.show()

<IPython.core.display.Javascript object>

In [28]:
# Separate Variables Into Categories Boolean For Logistic Regression
# Windspeed: Strong Wind: >=41km/h; Gale Wind: >=63km/h
Strong_Wind_Boolean = (Main_DataFrame_No_NaN["Windspeed"] >= 41)
Gale_Wind_Boolean = (Main_DataFrame_No_NaN["Windspeed"] >= 63)
#
# Intensity: Tropical Depression: 41-62km/h; Tropical Storm: 63-87km/h; Severe Tropical Storm: 88-117km/h
# Typhoon: 118-149km/h; Severe or Super Typhoon: >=150
TD_Boolean = (Main_DataFrame_No_NaN["Intensity"] >= 41) & (Main_DataFrame_No_NaN["Intensity"] < 63)
TS_Boolean = (Main_DataFrame_No_NaN["Intensity"] >= 63) & (Main_DataFrame_No_NaN["Intensity"] < 88)
STS_Boolean = (Main_DataFrame_No_NaN["Intensity"] >= 88) & (Main_DataFrame_No_NaN["Intensity"] < 118)
TY_Boolean = (Main_DataFrame_No_NaN["Intensity"] >= 118) & (Main_DataFrame_No_NaN["Intensity"] < 149)
STY_SuperTY_Boolean = (Main_DataFrame_No_NaN["Intensity"] >= 150)
#
# Distance: Direct Impact: 0-100km; Near Impact: 101-250km; Moderate Impact: 251-400km; Distant Impact: 401-800km
Direct_Impact_Boolean = (Main_DataFrame_No_NaN["Distance"] <= 100)
Near_Impact_Boolean = (Main_DataFrame_No_NaN["Distance"] > 100) & (Main_DataFrame_No_NaN["Distance"] <= 250)
Moderate_Impact_Boolean = (Main_DataFrame_No_NaN["Distance"] > 250) & (Main_DataFrame_No_NaN["Distance"] <= 400)
Distant_Impact_Boolean = (Main_DataFrame_No_NaN["Distance"] > 400) & (Main_DataFrame_No_NaN["Distance"] <= 800)
#
# Radius of Max Winds: Small Size: 0-30km; Medium Size: 31-60km; Large Size: 61-100km; Huge Size: >=101km
Small_Size_Boolean = (Main_DataFrame_No_NaN["Radius of Max Winds"] <= 30)
Medium_Size_Boolean = (Main_DataFrame_No_NaN["Radius of Max Winds"] > 30) & \
(Main_DataFrame_No_NaN["Radius of Max Winds"] <= 60)
Large_Size_Boolean = (Main_DataFrame_No_NaN["Radius of Max Winds"] > 60) & \
(Main_DataFrame_No_NaN["Radius of Max Winds"] <= 100)
Huge_Size_Boolean = (Main_DataFrame_No_NaN["Radius of Max Winds"] > 100)

In [29]:
# Create Logistic Regression DataFrames
Logistic_Reg_DataFrame = pandas.DataFrame({"Strong Wind": Strong_Wind_Boolean, "Gale Wind": Gale_Wind_Boolean})
Logistic_Reg_Variables = pandas.DataFrame({"Tropical Depression": TD_Boolean, "Tropical Storm": TS_Boolean, \
"Severe Tropical Storm": STS_Boolean, "Typhoon": TY_Boolean, "Severe Typhoon or Super Typhoon": STY_SuperTY_Boolean, \
"Direct Impact": Direct_Impact_Boolean, "Near Impact": Near_Impact_Boolean, \
"Moderate Impact": Moderate_Impact_Boolean, "Distant Impact": Distant_Impact_Boolean, "Small Size": Small_Size_Boolean, \
"Medium Size": Medium_Size_Boolean, "Large Size": Large_Size_Boolean, "Huge Size": Huge_Size_Boolean,})
#
# Replace True With 1 and False With 0
Logistic_Reg_Variables = Logistic_Reg_Variables.replace({True: 1, False: 0})
Logistic_Reg_DataFrame = pandas.merge(Logistic_Reg_DataFrame, Logistic_Reg_Variables, left_index=True, right_index=True)
Logistic_Reg_DataFrame.head()

Unnamed: 0,Strong Wind,Gale Wind,Tropical Depression,Tropical Storm,Severe Tropical Storm,Typhoon,Severe Typhoon or Super Typhoon,Direct Impact,Near Impact,Moderate Impact,Distant Impact,Small Size,Medium Size,Large Size,Huge Size
0,True,False,0,1,0,0,0,0,0,0,1,0,0,0,1
1,True,False,0,1,0,0,0,0,0,0,1,0,0,0,1
2,True,False,0,1,0,0,0,0,0,0,1,0,0,0,1
3,True,True,0,1,0,0,0,0,0,0,1,0,0,0,1
4,True,True,0,1,0,0,0,0,0,0,1,0,0,0,1


In [30]:
# Create Dummies For Non-Numeric Variables: Quadrant and Windspeed Station
Windspeed_Station_Dummies = pandas.get_dummies(Main_DataFrame_No_NaN["Windspeed Station"])
Quadrant_Dummies = pandas.get_dummies(Main_DataFrame_No_NaN["Quadrant"])
Quadrant_Dummies = Quadrant_Dummies[["Eastern", "Southern", "Western", "Northern"]]
#
# Insert Dummies Into Logistic Regression DataFrame
Logistic_Reg_DataFrame = pandas.merge(Logistic_Reg_DataFrame, Windspeed_Station_Dummies, left_index=True, right_index=True)
Logistic_Reg_DataFrame = pandas.merge(Logistic_Reg_DataFrame, Quadrant_Dummies, left_index=True, right_index=True)
Logistic_Reg_DataFrame.head()

Unnamed: 0,Strong Wind,Gale Wind,Tropical Depression,Tropical Storm,Severe Tropical Storm,Typhoon,Severe Typhoon or Super Typhoon,Direct Impact,Near Impact,Moderate Impact,...,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang,Eastern,Southern,Western,Northern
0,True,False,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,True,False,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,True,False,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,True,True,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,True,True,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [31]:
# Print Each Variable Count
for Column in Logistic_Reg_DataFrame.columns:
    print (Logistic_Reg_DataFrame[Column].value_counts())
    print ("")

False    1477
True     1311
Name: Strong Wind, dtype: int64

False    2219
True      569
Name: Gale Wind, dtype: int64

0    2323
1     465
Name: Tropical Depression, dtype: int64

0    1860
1     928
Name: Tropical Storm, dtype: int64

0    2254
1     534
Name: Severe Tropical Storm, dtype: int64

0    2299
1     489
Name: Typhoon, dtype: int64

0    2416
1     372
Name: Severe Typhoon or Super Typhoon, dtype: int64

0    2270
1     518
Name: Direct Impact, dtype: int64

0    2096
1     692
Name: Near Impact, dtype: int64

0    1913
1     875
Name: Moderate Impact, dtype: int64

0    2085
1     703
Name: Distant Impact, dtype: int64

0    2214
1     574
Name: Small Size, dtype: int64

0    1705
1    1083
Name: Medium Size, dtype: int64

0    2016
1     772
Name: Large Size, dtype: int64

0    2429
1     359
Name: Huge Size, dtype: int64

0    2718
1      70
Name: Bluff Head (Stanley), dtype: int64

0    2702
1      86
Name: Central Pier, dtype: int64

0    2683
1     105
Name: Cheung 

In [32]:
# Define Logistic Regression Predictors
Logistic_Reg_Predictors = list(Logistic_Reg_DataFrame.columns)
Logistic_Reg_Predictors.remove("Strong Wind")
Logistic_Reg_Predictors.remove("Gale Wind")
print (Logistic_Reg_Predictors)
print (len(Logistic_Reg_Predictors))

['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm', 'Typhoon', 'Severe Typhoon or Super Typhoon', 'Direct Impact', 'Near Impact', 'Moderate Impact', 'Distant Impact', 'Small Size', 'Medium Size', 'Large Size', 'Huge Size', 'Bluff Head (Stanley)', 'Central Pier', 'Cheung Chau', 'Cheung Chau Beach', 'Green Island', 'Hong Kong International Airport', 'Kai Tak', "King's Park", 'Lamma Island', 'Lau Fau Shan', 'Ngong Ping', 'North Point', 'Peng Chau', 'Ping Chau', 'Sai Kung', 'Sha Chau', 'Sha Lo Wan', 'Sha Tin', 'Shek Kong', 'Star Ferry (Kowloon)', 'Ta Kwu Ling', 'Tai Mei Tuk', 'Tai Mo Shan', 'Tai Po Kau', 'Tap Mun East', "Tate's Cairn", 'Tseung Kwan O', 'Tsing Yi Shell Oil Depot', 'Tuen Mun Government Offices', 'Waglan Island', 'Wetland Park', 'Wong Chuk Hang', 'Eastern', 'Southern', 'Western', 'Northern']
49


In [33]:
# Calculate Logistic Regression For Strong Wind
Strong_Wind_Logistic_Reg_Model = LogisticRegression().fit(Logistic_Reg_DataFrame[Logistic_Reg_Predictors], \
Logistic_Reg_DataFrame["Strong Wind"])
Strong_Wind_Logistic_Reg_Slope = Strong_Wind_Logistic_Reg_Model.coef_[0]
Strong_Wind_Logistic_Reg_Intercept = Strong_Wind_Logistic_Reg_Model.intercept_[0]
print ("Slopes:")
print (Strong_Wind_Logistic_Reg_Slope)
print ("y Intercept:")
print (Strong_Wind_Logistic_Reg_Intercept)

Slopes:
[-2.43437217 -1.56623809  0.35894211  1.01469504  2.62743411  2.38617327
  0.32242332 -0.84329536 -1.86484022 -1.68389806 -0.45541371  0.28024332
  1.85952945  0.49728837  0.25265994  2.21757877  1.95504299  2.2000265
  1.01531693 -0.22872716 -1.94930267  0.14916604 -0.37545276  2.59736686
 -0.06651336  1.18438956 -2.97800355  0.7206102   0.9775896  -0.57110592
 -2.95422277 -1.96044589 -0.08304421 -2.60989817  1.26790698  3.60104998
 -0.1264828   1.68822649  2.88678158 -3.65253657 -1.56601836 -2.24477084
  3.00131042 -2.90388438 -1.94144078 -0.67184689  1.89623176  1.50414751
 -0.19279676]
y Intercept:
-0.30249447698142395


In [34]:
# Print Logistic Regression For Strong Wind Results
for i in range(len(Logistic_Reg_Predictors)):
    print(Logistic_Reg_Predictors[i], ":", Strong_Wind_Logistic_Reg_Slope[i])
    if i == 4 or i == 8 or i == 12 or i == 44:
        print ("")

Tropical Depression : -2.4343721698466156
Tropical Storm : -1.5662380856996143
Severe Tropical Storm : 0.3589421118079239
Typhoon : 1.0146950355468798
Severe Typhoon or Super Typhoon : 2.6274341116331024

Direct Impact : 2.386173267468161
Near Impact : 0.3224233169032694
Moderate Impact : -0.8432953568551271
Distant Impact : -1.864840224074616

Small Size : -1.6838980642176256
Medium Size : -0.45541370844242945
Large Size : 0.28024332423676324
Huge Size : 1.8595294518649823

Bluff Head (Stanley) : 0.4972883683068559
Central Pier : 0.2526599378449979
Cheung Chau : 2.2175787714534554
Cheung Chau Beach : 1.9550429860830174
Green Island : 2.2000265048233
Hong Kong International Airport : 1.0153169290585538
Kai Tak : -0.2287271648020769
King's Park : -1.949302671935019
Lamma Island : 0.1491660350701754
Lau Fau Shan : -0.3754527636087407
Ngong Ping : 2.5973668596472312
North Point : -0.06651336161304762
Peng Chau : 1.1843895630601051
Ping Chau : -2.9780035530930253
Sai Kung : 0.7206102042413

In [35]:
# Calculate Logistic Regression For Gale Wind
Gale_Wind_Logistic_Reg_Model = LogisticRegression().fit(Logistic_Reg_DataFrame[Logistic_Reg_Predictors], \
Logistic_Reg_DataFrame["Gale Wind"])
Gale_Wind_Logistic_Reg_Slope = Gale_Wind_Logistic_Reg_Model.coef_[0]
Gale_Wind_Logistic_Reg_Intercept = Gale_Wind_Logistic_Reg_Model.intercept_[0]
print ("Slope:")
print (Gale_Wind_Logistic_Reg_Slope)
print ("y Intercept:")
print (Gale_Wind_Logistic_Reg_Intercept)

Slope:
[-2.52016145 -1.69718545  0.48887251  1.23624697  2.49499047  2.24898506
  0.29208081 -0.56120986 -1.97709297 -1.54504898 -0.35045666  0.43888436
  1.45938432  0.75161418 -0.16128983  2.44429558  1.87572541  2.03452182
  0.6644313  -0.79684496 -1.95993419 -0.62118272 -0.44098274  3.64117344
 -0.20273934  0.74758516 -2.3883617  -0.20404898  1.08821666  0.26680716
 -2.80077874 -1.83148144  0.11512871 -2.80077874  1.66138707  4.16412607
 -0.54072521  0.23883186  2.8672165  -2.47479662 -2.06469115 -2.17641635
  3.00358567 -2.36529702 -1.73153381 -0.51450489  1.55072066  1.08025072
 -0.30057635]
y Intercept:
-2.755121203167774


In [36]:
# Print Logistic Regression For Gale Wind Results
for i in range(len(Logistic_Reg_Predictors)):
    print(Logistic_Reg_Predictors[i], ":", Gale_Wind_Logistic_Reg_Slope[i])
    if i == 4 or i == 8 or i == 12 or i == 44:
        print ("")

Tropical Depression : -2.5201614506419383
Tropical Storm : -1.6971854500054382
Severe Tropical Storm : 0.48887250609077576
Typhoon : 1.2362469677230883
Severe Typhoon or Super Typhoon : 2.4949904701272936

Direct Impact : 2.2489850598845815
Near Impact : 0.29208081336453373
Moderate Impact : -0.5612098591222835
Distant Impact : -1.9770929708330487

Small Size : -1.545048980258444
Medium Size : -0.35045665899707845
Large Size : 0.4388843623205692
Huge Size : 1.4593843202287426

Bluff Head (Stanley) : 0.7516141774043603
Central Pier : -0.16128983112509723
Cheung Chau : 2.444295581736029
Cheung Chau Beach : 1.875725411055245
Green Island : 2.0345218213412344
Hong Kong International Airport : 0.6644313029731768
Kai Tak : -0.7968449626560062
King's Park : -1.9599341867199893
Lamma Island : -0.6211827238958768
Lau Fau Shan : -0.44098273531629756
Ngong Ping : 3.6411734375170752
North Point : -0.20273934105757388
Peng Chau : 0.7475851630593261
Ping Chau : -2.3883617038728633
Sai Kung : -0.2040

In [40]:
# Testing Example: Tropical Storm Nangka at Green Island
# Intensity: Tropical Storm (Intensity = 85km/h); Distance: Distant Impact (Distance = 530km)
# Size: Huge Size (R Max = 130km); Windspeed Station: Green Island; Quadrant: Southern Quadrant (Bearing = SSW)
# Strong Wind:
Strong_Wind_Logistic_Reg_Value = Strong_Wind_Logistic_Reg_Intercept + Strong_Wind_Logistic_Reg_Slope[1] + \
Strong_Wind_Logistic_Reg_Slope[8] + Strong_Wind_Logistic_Reg_Slope[12] + Strong_Wind_Logistic_Reg_Slope[17] + \
Strong_Wind_Logistic_Reg_Slope[46]
print (Strong_Wind_Logistic_Reg_Value)
Strong_Wind_Logistic_Reg_Prob = numpy.exp(Strong_Wind_Logistic_Reg_Value) / (numpy.exp(Strong_Wind_Logistic_Reg_Value)+1)
print ("Strong Wind Probability:", Strong_Wind_Logistic_Reg_Prob)
#
# Gale Wind:
Gale_Wind_Logistic_Reg_Value = Gale_Wind_Logistic_Reg_Intercept + Gale_Wind_Logistic_Reg_Slope[1] + \
Gale_Wind_Logistic_Reg_Slope[8] + Gale_Wind_Logistic_Reg_Slope[12] + Gale_Wind_Logistic_Reg_Slope[17] + \
Gale_Wind_Logistic_Reg_Slope[46]
print (Gale_Wind_Logistic_Reg_Value)
Gale_Wind_Logistic_Reg_Prob = numpy.exp(Gale_Wind_Logistic_Reg_Value) / (numpy.exp(Gale_Wind_Logistic_Reg_Value)+1)
print ("Gale Wind Probability:", Gale_Wind_Logistic_Reg_Prob)
#
# Actual Windspeed: 74km/h (Gale Wind)
# Strong Wind prediction correct; Gale wind prediction incorrect

2.222214925872808
Strong Wind Probability: 0.9022267565135758
-1.384772821680032
Gale Wind Probability: 0.20024355743806382
