In [2]:
import numpy
import random
from matplotlib import pyplot
from matplotlib import colors
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import datetime
import cartopy.crs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from PIL import Image, ImageDraw
import pandas
from sklearn.linear_model import LinearRegression, LogisticRegression
from mpl_toolkits.mplot3d import Axes3D
import seaborn
import scipy
from scipy import stats
from scipy.stats import poisson, ttest_ind
%matplotlib notebook

In [3]:
# Use Pandas to Read Ten Minute Sustained Windspeed Data File
Windspeed_File = pandas.read_csv("Ten_Min_Windspeed_Data_Output.csv")
Windspeed_File.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Bluff Head (Stanley),Central Pier,Cheung Chau,Cheung Chau Beach,Green Island,Hong Kong International Airport,Kai Tak,King's Park,...,Tai Mo Shan,Tai Po Kau,Tap Mun East,Tate's Cairn,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang
0,(2016,Nangka,45.0,48.0,60,70.0,74.0,48,37.0,32,...,,43.0,58.0,77.0,20.0,26.0,16.0,85.0,23.0,35.0
1,(2007,Higos,67.0,51.0,108,85.0,60.0,68,56.0,43,...,,54.0,76.0,89.0,28.0,39.0,,89.0,30.0,38.0
2,(2003,Sinlaku,50.0,47.0,77,66.0,54.0,57,45.0,38,...,87.0,45.0,62.0,67.0,23.0,33.0,30.0,73.0,22.0,33.0
3,(2002,Nuri,,40.0,57,58.0,52.0,46,40.0,31,...,83.0,39.0,61.0,66.0,19.0,30.0,30.0,60.0,19.0,29.0
4,(1914,Kajiki,38.0,41.0,63,60.0,59.0,40,32.0,30,...,74.0,42.0,53.0,62.0,18.0,26.0,15.0,68.0,17.0,28.0


In [5]:
# Use Pandas to Read Typhoon CPA Data File
Typhoon_CPA_File = pandas.read_csv("Typhoon_CPA_Data.csv")
Typhoon_CPA_File.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Highest Typhoon Signal Issued,Closest Point of Approach (km),CPA Bearing,CPA Intensity (km/h),CPA Radius of Maximum Winds (km),CPA Date,CPA Time
0,(2016,Nangka,8,530,SSW,85,130,13/10/2020,17:00
1,(2007,Higos,9,80,WSW,130,28,19/8/2020,05:00
2,(2003,Sinlaku,3,580,SW,55,250,1/8/2020,10:00
3,(2002,Nuri,3,190,SSW,75,60,14/6/2020,02:00
4,(1914,Kajiki,3,330,S,55,185,1/9/2019,21:00


In [6]:
# Use Pandas to Read Windspeed Ratio to Average of HK Data File
Windspeed_Ratio_File = pandas.read_csv("Windspeed_Ratio_To_HK_Average_Output.csv")
Windspeed_Ratio_File.head()

Unnamed: 0,Quadrant,Bluff Head (Stanley),Central Pier,Cheung Chau,Cheung Chau Beach,Green Island,Hong Kong International Airport,Kai Tak,King's Park,Lau Fau Shan,...,Tai Mo Shan,Tai Po Kau,Tate's Cairn,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang,Unnamed: 21
0,Eastern,0.982317,1.020018,1.331294,1.410381,1.337948,1.09286,0.879183,0.664276,0.981458,...,1.789653,0.927412,1.468669,0.482496,0.687209,0.616674,1.456591,0.566153,0.722664,
1,Southern,1.032203,1.015392,1.370514,1.320824,1.327907,1.124406,0.907685,0.682041,0.917341,...,1.703046,0.902764,1.467367,0.495218,0.680184,0.627353,1.520535,0.574582,0.749956,
2,Western,1.0314,0.92726,1.398986,1.312252,1.441225,1.123915,0.876423,0.674758,0.958148,...,1.741025,0.856989,1.650741,0.523875,0.715327,0.644059,1.600562,0.58821,0.667983,
3,Northern,0.995543,0.98557,1.349687,1.210644,1.278806,1.166646,0.935403,0.681322,1.071375,...,1.792995,0.931712,1.463281,0.511684,0.829104,0.74086,1.415606,0.606081,0.734325,


In [7]:
# Drop Typhoon Code and Typhoon Name Columns From Windspeed File
Windspeed_File = Windspeed_File.drop(columns=["Typhoon Name", "Typhoon Code"])
Windspeed_File.head()

Unnamed: 0,Bluff Head (Stanley),Central Pier,Cheung Chau,Cheung Chau Beach,Green Island,Hong Kong International Airport,Kai Tak,King's Park,Lamma Island,Lau Fau Shan,...,Tai Mo Shan,Tai Po Kau,Tap Mun East,Tate's Cairn,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang
0,45.0,48.0,60,70.0,74.0,48,37.0,32,42.0,30,...,,43.0,58.0,77.0,20.0,26.0,16.0,85.0,23.0,35.0
1,67.0,51.0,108,85.0,60.0,68,56.0,43,58.0,38,...,,54.0,76.0,89.0,28.0,39.0,,89.0,30.0,38.0
2,50.0,47.0,77,66.0,54.0,57,45.0,38,48.0,42,...,87.0,45.0,62.0,67.0,23.0,33.0,30.0,73.0,22.0,33.0
3,,40.0,57,58.0,52.0,46,40.0,31,39.0,35,...,83.0,39.0,61.0,66.0,19.0,30.0,30.0,60.0,19.0,29.0
4,38.0,41.0,63,60.0,59.0,40,32.0,30,36.0,36,...,74.0,42.0,53.0,62.0,18.0,26.0,15.0,68.0,17.0,28.0


In [8]:
# Convert Windspeed Stations Columm Names Into List
Windspeed_Stations_List = list(Windspeed_File.columns)
#
# Create Array of Typhoons Windspeeds for All Typhoons at All Stations
All_Windspeed_Array = Windspeed_File.values.ravel('C')

In [9]:
# Create DataFrame Containing Each Useful Variable for All Typhoons at All Stations
All_Typhoon_Code_List = []
All_Typhoon_Name_List = []
All_Windspeed_Stations_List = []
All_Intensity_List = []
All_Distance_List = []
All_R_Max_List = []
All_Bearing_List = []
All_Typhoon_Signal_List = []
for i in range(len(Typhoon_CPA_File)):
    for j in range(len(Windspeed_Stations_List)):
        All_Typhoon_Code_List.append(Typhoon_CPA_File["Typhoon Code"][i])
        All_Typhoon_Name_List.append(Typhoon_CPA_File["Typhoon Name"][i])
        All_Windspeed_Stations_List.append(Windspeed_Stations_List[j])
        All_Typhoon_Signal_List.append(Typhoon_CPA_File["Highest Typhoon Signal Issued"][i])
        All_Bearing_List.append(Typhoon_CPA_File["CPA Bearing"][i])
        All_Intensity_List.append(Typhoon_CPA_File["CPA Intensity (km/h)"][i])
        All_Distance_List.append(Typhoon_CPA_File["Closest Point of Approach (km)"][i])
        All_R_Max_List.append(Typhoon_CPA_File["CPA Radius of Maximum Winds (km)"][i])
Main_DF = pandas.DataFrame({"Typhoon Code": All_Typhoon_Code_List, "Typhoon Name": All_Typhoon_Name_List, \
"Windspeed Station": All_Windspeed_Stations_List, "Typhoon Signal": All_Typhoon_Signal_List, \
"Bearing": All_Bearing_List, "Intensity": All_Intensity_List, "Distance": All_Distance_List, \
"Radius of Max Winds": All_R_Max_List, "Actual Windspeed": All_Windspeed_Array})
Main_DF.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Intensity,Distance,Radius of Max Winds,Actual Windspeed
0,(2016,Nangka,Bluff Head (Stanley),8,SSW,85,530,130,45.0
1,(2016,Nangka,Central Pier,8,SSW,85,530,130,48.0
2,(2016,Nangka,Cheung Chau,8,SSW,85,530,130,60.0
3,(2016,Nangka,Cheung Chau Beach,8,SSW,85,530,130,70.0
4,(2016,Nangka,Green Island,8,SSW,85,530,130,74.0


In [10]:
# Include Quadrant of CPA Bearing and Associated Colour Codes Into DataFrame
Directions_Circle = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
All_Quadrant_List = []
Colour_Code_Primary_List = []
Colour_Code_Secondary_List = []
for i in range(len(Main_DF)):
    if Main_DF["Bearing"][i] == Directions_Circle[2] or Main_DF["Bearing"][i] == Directions_Circle[3] or \
    Main_DF["Bearing"][i] == Directions_Circle[4] or Main_DF["Bearing"][i] == Directions_Circle[5]:
        All_Quadrant_List.append("Eastern")
        Colour_Code_Primary_List.append("orange")
        Colour_Code_Secondary_List.append("darkorange")
    elif Main_DF["Bearing"][i] == Directions_Circle[6] or Main_DF["Bearing"][i] == Directions_Circle[7] or \
    Main_DF["Bearing"][i] == Directions_Circle[8] or Main_DF["Bearing"][i] == Directions_Circle[9]:
        All_Quadrant_List.append("Southern")
        Colour_Code_Primary_List.append("limegreen")
        Colour_Code_Secondary_List.append("darkgreen")
    elif Main_DF["Bearing"][i] == Directions_Circle[10] or Main_DF["Bearing"][i] == Directions_Circle[11] or \
    Main_DF["Bearing"][i] == Directions_Circle[12] or Main_DF["Bearing"][i] == Directions_Circle[13]:
        All_Quadrant_List.append("Western")
        Colour_Code_Primary_List.append("deepskyblue")
        Colour_Code_Secondary_List.append("mediumblue")
    elif Main_DF["Bearing"][i] == Directions_Circle[14] or Main_DF["Bearing"][i] == Directions_Circle[15] or \
    Main_DF["Bearing"][i] == Directions_Circle[0] or Main_DF["Bearing"][i] == Directions_Circle[1]:
        All_Quadrant_List.append("Northern")
        Colour_Code_Primary_List.append("red")
        Colour_Code_Secondary_List.append("firebrick")
    else:
        All_Quadrant_List.append(numpy.nan)
        Colour_Code_Primary_List.append("gray")
        Colour_Code_Secondary_List.append("dimgray")
Main_DF["Quadrant"] = All_Quadrant_List
Main_DF["Colour Code 1"] = Colour_Code_Primary_List
Main_DF["Colour Code 2"] = Colour_Code_Secondary_List

In [11]:
# Include Windspeed Ratio to Average of HK Into DataFrame
All_Windspeed_Ratio_List = []
for i in range(len(Main_DF)):
    if Main_DF["Windspeed Station"][i] != "Lamma Island" and Main_DF["Windspeed Station"][i] != "Tap Mun East":
        if Main_DF["Quadrant"][i] == "Eastern":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DF["Windspeed Station"][i]][0])
        elif Main_DF["Quadrant"][i] == "Southern":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DF["Windspeed Station"][i]][1])
        elif Main_DF["Quadrant"][i] == "Western":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DF["Windspeed Station"][i]][2])
        elif Main_DF["Quadrant"][i] == "Northern":
            All_Windspeed_Ratio_List.append(Windspeed_Ratio_File[Main_DF["Windspeed Station"][i]][3])
        else:
            All_Windspeed_Ratio_List.append(numpy.nan)
    else:
        All_Windspeed_Ratio_List.append(numpy.nan)
Main_DF["Windspeed Ratio to HK Average"] = All_Windspeed_Ratio_List
Main_DF.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Intensity,Distance,Radius of Max Winds,Actual Windspeed,Quadrant,Colour Code 1,Colour Code 2,Windspeed Ratio to HK Average
0,(2016,Nangka,Bluff Head (Stanley),8,SSW,85,530,130,45.0,Southern,limegreen,darkgreen,1.032203
1,(2016,Nangka,Central Pier,8,SSW,85,530,130,48.0,Southern,limegreen,darkgreen,1.015392
2,(2016,Nangka,Cheung Chau,8,SSW,85,530,130,60.0,Southern,limegreen,darkgreen,1.370514
3,(2016,Nangka,Cheung Chau Beach,8,SSW,85,530,130,70.0,Southern,limegreen,darkgreen,1.320824
4,(2016,Nangka,Green Island,8,SSW,85,530,130,74.0,Southern,limegreen,darkgreen,1.327907


In [12]:
# Reorder DataFrame Columns Positions
Main_DF_Columns = list(Main_DF.columns)
Main_DF_Columns = Main_DF_Columns[:5] + Main_DF_Columns[9:12] + Main_DF_Columns[5:8] + Main_DF_Columns[12:13] + \
Main_DF_Columns[8:9]
Main_DF = Main_DF[Main_DF_Columns]
print ("DataFrame Size:", len(Main_DF))
Main_DF.head()

DataFrame Size: 3360


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed Ratio to HK Average,Actual Windspeed
0,(2016,Nangka,Bluff Head (Stanley),8,SSW,Southern,limegreen,darkgreen,85,530,130,1.032203,45.0
1,(2016,Nangka,Central Pier,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.015392,48.0
2,(2016,Nangka,Cheung Chau,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.370514,60.0
3,(2016,Nangka,Cheung Chau Beach,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.320824,70.0
4,(2016,Nangka,Green Island,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.327907,74.0


In [13]:
# Create DataFrame With No NaN Values
Main_DF_No_NaN = Main_DF[Main_DF["Actual Windspeed"] >= 0.0].copy()
print ("DataFrame Size:", len(Main_DF_No_NaN))
Main_DF_No_NaN.head()

DataFrame Size: 2788


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed Ratio to HK Average,Actual Windspeed
0,(2016,Nangka,Bluff Head (Stanley),8,SSW,Southern,limegreen,darkgreen,85,530,130,1.032203,45.0
1,(2016,Nangka,Central Pier,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.015392,48.0
2,(2016,Nangka,Cheung Chau,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.370514,60.0
3,(2016,Nangka,Cheung Chau Beach,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.320824,70.0
4,(2016,Nangka,Green Island,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.327907,74.0


In [14]:
# Create DataFrame With No Windspeed Stations < 30 Data Points
Main_DF_No_Few = Main_DF_No_NaN[Main_DF_No_NaN["Windspeed Ratio to HK Average"] >= 0.0].copy()
print ("DataFrame Size:", len(Main_DF_No_Few))
Main_DF_No_Few.head()

DataFrame Size: 2679


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed Ratio to HK Average,Actual Windspeed
0,(2016,Nangka,Bluff Head (Stanley),8,SSW,Southern,limegreen,darkgreen,85,530,130,1.032203,45.0
1,(2016,Nangka,Central Pier,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.015392,48.0
2,(2016,Nangka,Cheung Chau,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.370514,60.0
3,(2016,Nangka,Cheung Chau Beach,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.320824,70.0
4,(2016,Nangka,Green Island,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.327907,74.0


In [15]:
# Separate Variables Into Categories Boolean For Multivariable Logistic Regression (MLogR) Model
# Windspeed: Strong Wind: >=41km/h; Gale Wind: >=63km/h
Strong_Wind_Boolean = (Main_DF_No_Few["Actual Windspeed"] >= 41)
Gale_Wind_Boolean = (Main_DF_No_Few["Actual Windspeed"] >= 63)
#
# Intensity: Tropical Depression: 41-62km/h; Tropical Storm: 63-87km/h; Severe Tropical Storm: 88-117km/h
# Typhoon: 118-149km/h; Severe or Super Typhoon: >=150
TD_Boolean = (Main_DF_No_Few["Intensity"] >= 41) & (Main_DF_No_Few["Intensity"] < 63)
TS_Boolean = (Main_DF_No_Few["Intensity"] >= 63) & (Main_DF_No_Few["Intensity"] < 88)
STS_Boolean = (Main_DF_No_Few["Intensity"] >= 88) & (Main_DF_No_Few["Intensity"] < 118)
TY_Boolean = (Main_DF_No_Few["Intensity"] >= 118) & (Main_DF_No_Few["Intensity"] < 149)
STY_SuperTY_Boolean = (Main_DF_No_Few["Intensity"] >= 150)
#
# Distance: Direct Impact: 0-100km; Near Impact: 101-250km; Moderate Impact: 251-400km; Distant Impact: 401-800km
Direct_Impact_Boolean = (Main_DF_No_Few["Distance"] <= 100)
Near_Impact_Boolean = (Main_DF_No_Few["Distance"] > 100) & (Main_DF_No_Few["Distance"] <= 250)
Moderate_Impact_Boolean = (Main_DF_No_Few["Distance"] > 250) & (Main_DF_No_Few["Distance"] <= 400)
Distant_Impact_Boolean = (Main_DF_No_Few["Distance"] > 400) & (Main_DF_No_Few["Distance"] <= 800)
#
# Radius of Max Winds: Small Size: 0-30km; Medium Size: 31-60km; Large Size: 61-100km; Huge Size: >=101km
Small_Size_Boolean = (Main_DF_No_Few["Radius of Max Winds"] <= 30)
Medium_Size_Boolean = (Main_DF_No_Few["Radius of Max Winds"] > 30) & (Main_DF_No_Few["Radius of Max Winds"] <= 60)
Large_Size_Boolean = (Main_DF_No_Few["Radius of Max Winds"] > 60) & (Main_DF_No_Few["Radius of Max Winds"] <= 100)
Huge_Size_Boolean = (Main_DF_No_Few["Radius of Max Winds"] > 100)

In [16]:
# Create Multivariable Logistic Regression (MLogR) DataFrame
MLogR_DF = pandas.DataFrame({"Strong Wind": Strong_Wind_Boolean, "Gale Wind": Gale_Wind_Boolean})
MLogR_Variables = pandas.DataFrame({"Tropical Depression": TD_Boolean, "Tropical Storm": TS_Boolean, \
"Severe Tropical Storm": STS_Boolean, "Typhoon": TY_Boolean, "Severe Typhoon or Super Typhoon": STY_SuperTY_Boolean, \
"Direct Impact": Direct_Impact_Boolean, "Near Impact": Near_Impact_Boolean, \
"Moderate Impact": Moderate_Impact_Boolean, "Distant Impact": Distant_Impact_Boolean, "Small Size": Small_Size_Boolean, \
"Medium Size": Medium_Size_Boolean, "Large Size": Large_Size_Boolean, "Huge Size": Huge_Size_Boolean,})
#
# Replace True With 1 and False With 0
MLogR_Variables = MLogR_Variables.replace({True: 1, False: 0})
MLogR_DF = pandas.merge(MLogR_DF, MLogR_Variables, left_index=True, right_index=True)
MLogR_DF.head()

Unnamed: 0,Strong Wind,Gale Wind,Tropical Depression,Tropical Storm,Severe Tropical Storm,Typhoon,Severe Typhoon or Super Typhoon,Direct Impact,Near Impact,Moderate Impact,Distant Impact,Small Size,Medium Size,Large Size,Huge Size
0,True,False,0,1,0,0,0,0,0,0,1,0,0,0,1
1,True,False,0,1,0,0,0,0,0,0,1,0,0,0,1
2,True,False,0,1,0,0,0,0,0,0,1,0,0,0,1
3,True,True,0,1,0,0,0,0,0,0,1,0,0,0,1
4,True,True,0,1,0,0,0,0,0,0,1,0,0,0,1


In [17]:
# Create Dummies For Non-Numeric Variables: Quadrant and Windspeed Station
Windspeed_Station_Dummies = pandas.get_dummies(Main_DF_No_Few["Windspeed Station"])
Quadrant_Dummies = pandas.get_dummies(Main_DF_No_Few["Quadrant"])
Quadrant_Dummies = Quadrant_Dummies[["Eastern", "Southern", "Western", "Northern"]]
#
# Insert Dummies Into MLogR DataFrame
MLogR_DF = pandas.merge(MLogR_DF, Windspeed_Station_Dummies, left_index=True, right_index=True)
MLogR_DF = pandas.merge(MLogR_DF, Quadrant_Dummies, left_index=True, right_index=True)
MLogR_DF.head()

Unnamed: 0,Strong Wind,Gale Wind,Tropical Depression,Tropical Storm,Severe Tropical Storm,Typhoon,Severe Typhoon or Super Typhoon,Direct Impact,Near Impact,Moderate Impact,...,Tseung Kwan O,Tsing Yi Shell Oil Depot,Tuen Mun Government Offices,Waglan Island,Wetland Park,Wong Chuk Hang,Eastern,Southern,Western,Northern
0,True,False,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,True,False,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,True,False,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,True,True,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,True,True,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [18]:
# Print Each Variable Count
for Column in MLogR_DF.columns:
    print (MLogR_DF[Column].value_counts())
    print ("")

False    1436
True     1243
Name: Strong Wind, dtype: int64

False    2135
True      544
Name: Gale Wind, dtype: int64

0    2224
1     455
Name: Tropical Depression, dtype: int64

0    1814
1     865
Name: Tropical Storm, dtype: int64

0    2177
1     502
Name: Severe Tropical Storm, dtype: int64

0    2193
1     486
Name: Typhoon, dtype: int64

0    2308
1     371
Name: Severe Typhoon or Super Typhoon, dtype: int64

0    2245
1     434
Name: Direct Impact, dtype: int64

0    1993
1     686
Name: Near Impact, dtype: int64

0    1811
1     868
Name: Moderate Impact, dtype: int64

0    1988
1     691
Name: Distant Impact, dtype: int64

0    2108
1     571
Name: Small Size, dtype: int64

0    1659
1    1020
Name: Medium Size, dtype: int64

0    1935
1     744
Name: Large Size, dtype: int64

0    2335
1     344
Name: Huge Size, dtype: int64

0    2611
1      68
Name: Bluff Head (Stanley), dtype: int64

0    2595
1      84
Name: Central Pier, dtype: int64

0    2577
1     102
Name: Cheung 

In [19]:
# Define Multivariable Logistic Regression Predictors
MLogR_Predictors = list(MLogR_DF.columns)
MLogR_Predictors.remove("Strong Wind")
MLogR_Predictors.remove("Gale Wind")
print (MLogR_Predictors)
print (len(MLogR_Predictors))

['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm', 'Typhoon', 'Severe Typhoon or Super Typhoon', 'Direct Impact', 'Near Impact', 'Moderate Impact', 'Distant Impact', 'Small Size', 'Medium Size', 'Large Size', 'Huge Size', 'Bluff Head (Stanley)', 'Central Pier', 'Cheung Chau', 'Cheung Chau Beach', 'Green Island', 'Hong Kong International Airport', 'Kai Tak', "King's Park", 'Lau Fau Shan', 'Ngong Ping', 'North Point', 'Peng Chau', 'Ping Chau', 'Sai Kung', 'Sha Chau', 'Sha Lo Wan', 'Sha Tin', 'Shek Kong', 'Star Ferry (Kowloon)', 'Ta Kwu Ling', 'Tai Mei Tuk', 'Tai Mo Shan', 'Tai Po Kau', "Tate's Cairn", 'Tseung Kwan O', 'Tsing Yi Shell Oil Depot', 'Tuen Mun Government Offices', 'Waglan Island', 'Wetland Park', 'Wong Chuk Hang', 'Eastern', 'Southern', 'Western', 'Northern']
47


In [20]:
# Calculate Multivariable Logistic Regression (MLogR) Model For Predicting Strong Wind
MLogR_Strong_Wind_Model = LogisticRegression().fit(MLogR_DF[MLogR_Predictors], MLogR_DF["Strong Wind"])
MLogR_Strong_Wind_Slope = MLogR_Strong_Wind_Model.coef_[0]
MLogR_Strong_Wind_Intercept = MLogR_Strong_Wind_Model.intercept_[0]
print ("Slopes:")
print (MLogR_Strong_Wind_Slope)
print ("y Intercept:")
print (MLogR_Strong_Wind_Intercept)

Slopes:
[-2.38610455e+00 -1.45570790e+00  2.55864938e-01  9.82372396e-01
  2.60343089e+00  2.43102630e+00  3.08031210e-01 -8.29481033e-01
 -1.90972071e+00 -1.63837385e+00 -3.77267511e-01  2.24176527e-01
  1.79132061e+00  6.13493442e-01  3.48522424e-01  2.34354753e+00
  2.00115163e+00  2.24991103e+00  1.11867092e+00 -1.69528327e-01
 -1.89716612e+00 -3.18568175e-01  2.61308466e+00  1.86394179e-03
  1.30827448e+00 -3.01012063e+00  8.16284735e-01  1.09432089e+00
 -5.29239665e-01 -2.96866899e+00 -1.91885208e+00 -1.42413911e-02
 -2.59825761e+00  1.37746685e+00  3.63801448e+00 -4.37055568e-03
  2.93222087e+00 -3.55533954e+00 -1.53783884e+00 -2.20909387e+00
  3.04157581e+00 -2.96596304e+00 -1.80129909e+00 -1.27838225e+00
  1.25657678e+00  8.72654169e-01 -8.50992917e-01]
y Intercept:
0.24285438376200022


In [21]:
# Print Multivariable Logistic Regression Slopes For Predicting Strong Wind
for i in range(len(MLogR_Predictors)):
    print(MLogR_Predictors[i], ":", MLogR_Strong_Wind_Slope[i])
    if i == 4 or i == 8 or i == 12 or i == 42:
        print ("")

Tropical Depression : -2.3861045505313814
Tropical Storm : -1.4557079040044063
Severe Tropical Storm : 0.2558649382915876
Typhoon : 0.9823723964119127
Severe Typhoon or Super Typhoon : 2.6034308940076527

Direct Impact : 2.4310263021891902
Near Impact : 0.3080312104367692
Moderate Impact : -0.8294810333046266
Distant Impact : -1.9097207051459661

Small Size : -1.6383738466268154
Medium Size : -0.37726751112028173
Large Size : 0.2241765266986156
Huge Size : 1.791320605223842

Bluff Head (Stanley) : 0.6134934418828928
Central Pier : 0.34852242434547637
Cheung Chau : 2.3435475301073074
Cheung Chau Beach : 2.001151634922388
Green Island : 2.2499110261572928
Hong Kong International Airport : 1.1186709219683397
Kai Tak : -0.16952832742147525
King's Park : -1.897166115597844
Lau Fau Shan : -0.3185681753414935
Ngong Ping : 2.613084660386601
North Point : 0.001863941788672085
Peng Chau : 1.3082744759520986
Ping Chau : -3.01012062698501
Sai Kung : 0.8162847350426276
Sha Chau : 1.0943208931975137

In [22]:
# Calculate Multivariable Logistic Regression (MLogR) Model For Predicting Gale Wind
MLogR_Gale_Wind_Model = LogisticRegression().fit(MLogR_DF[MLogR_Predictors], \
MLogR_DF["Gale Wind"])
MLogR_Gale_Wind_Slope = MLogR_Gale_Wind_Model.coef_[0]
MLogR_Gale_Wind_Intercept = MLogR_Gale_Wind_Model.intercept_[0]
print ("Slopes:")
print (MLogR_Gale_Wind_Slope)
print ("y Intercept:")
print (MLogR_Gale_Wind_Intercept)

Slopes:
[-2.45133687 -1.57452241  0.39167061  1.20038512  2.43403855  2.26205441
  0.28575799 -0.56585789 -1.9817195  -1.49554103 -0.26858006  0.37220622
  1.39214987  0.69355114 -0.11903447  2.41535518  1.87925132  1.99499044
  0.64303211 -0.75610389 -1.93350784 -0.39313168  3.67086461 -0.26875695
  0.6868323  -2.36450921 -0.27004053  1.04193554  0.21942885 -2.78617317
 -1.79942839  0.17160372 -2.78617317  1.598834    4.17180879 -0.5385923
  2.84779852 -2.45609382 -2.35028895 -2.15063587  2.98963497 -2.35028895
 -1.70192731 -0.93912406  1.07151668  0.63611601 -0.76827363]
y Intercept:
-2.2889634639849876


In [23]:
# Print Multivariable Logistic Regression Slopes For Predicting Gale Wind
for i in range(len(MLogR_Predictors)):
    print(MLogR_Predictors[i], ":", MLogR_Gale_Wind_Slope[i])
    if i == 4 or i == 8 or i == 12 or i == 42:
        print ("")

Tropical Depression : -2.451336867756968
Tropical Storm : -1.5745224106052085
Severe Tropical Storm : 0.39167061074921483
Typhoon : 1.2003851193031165
Severe Typhoon or Super Typhoon : 2.434038550950749

Direct Impact : 2.262054408448504
Near Impact : 0.28575798942663494
Moderate Impact : -0.5658578947956978
Distant Impact : -1.9817195004385422

Small Size : -1.495541030278751
Medium Size : -0.26858005712326033
Large Size : 0.37220621818833755
Huge Size : 1.3921498718545737

Bluff Head (Stanley) : 0.6935511448215033
Central Pier : -0.11903446668155347
Cheung Chau : 2.4153551774473314
Cheung Chau Beach : 1.8792513199670795
Green Island : 1.994990438284187
Hong Kong International Airport : 0.6430321093575647
Kai Tak : -0.756103889096922
King's Park : -1.9335078420501748
Lau Fau Shan : -0.39313168254367137
Ngong Ping : 3.6708646126668456
North Point : -0.2687569453996198
Peng Chau : 0.6868322982278147
Ping Chau : -2.364509207544351
Sai Kung : -0.2700405323726678
Sha Chau : 1.0419355423895

In [24]:
# Calculate Logit Values for Predicting Strong Wind
MLogR_Strong_Wind_Logit = numpy.zeros(len(MLogR_DF))
for i in range(len(MLogR_DF)):
    MLogR_Strong_Wind_Logit[i] = MLogR_Strong_Wind_Intercept
    for k in range(len(MLogR_DF.columns)-2):
        MLogR_Strong_Wind_Logit[i] += (MLogR_DF.iloc[i, k+2] * MLogR_Strong_Wind_Slope[k])
print ("Strong Wind Logit Values:")
print (MLogR_Strong_Wind_Logit)

Strong Wind Logit Values:
[ 0.5388166   0.27384558  2.26887069 ... -5.8103439  -0.55967421
 -5.40254911]


In [25]:
# Calculate Probabilities of Strong Wind
MLogR_Strong_Wind_Prob = numpy.exp(MLogR_Strong_Wind_Logit) / \
(numpy.exp(MLogR_Strong_Wind_Logit)+1)
print ("Strong Wind Probabilities:")
print (MLogR_Strong_Wind_Prob)

Strong Wind Probabilities:
[0.63153709 0.56803674 0.9062659  ... 0.00298745 0.36362284 0.00448488]


In [26]:
# Include MLogR Model Strong Wind Probabilities Into Main DataFrame With No Few
Main_DF_No_Few["MLogR Model Strong Wind Probability"] = MLogR_Strong_Wind_Prob
Main_DF_No_Few.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed Ratio to HK Average,Actual Windspeed,MLogR Model Strong Wind Probability
0,(2016,Nangka,Bluff Head (Stanley),8,SSW,Southern,limegreen,darkgreen,85,530,130,1.032203,45.0,0.631537
1,(2016,Nangka,Central Pier,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.015392,48.0,0.568037
2,(2016,Nangka,Cheung Chau,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.370514,60.0,0.906266
3,(2016,Nangka,Cheung Chau Beach,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.320824,70.0,0.872859
4,(2016,Nangka,Green Island,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.327907,74.0,0.898003


In [27]:
# Calculate Logit Values for Predicting Gale Wind
MLogR_Gale_Wind_Logit = numpy.zeros(len(MLogR_DF))
for i in range(len(MLogR_DF)):
    MLogR_Gale_Wind_Logit[i] = MLogR_Gale_Wind_Intercept
    for k in range(len(MLogR_DF.columns)-2):
        MLogR_Gale_Wind_Logit[i] += (MLogR_DF.iloc[i, k+2] * MLogR_Gale_Wind_Slope[k])
print ("Gale Wind Logit Values:")
print (MLogR_Gale_Wind_Logit)

Gale Wind Logit Values:
[-2.68798768 -3.50057329 -0.96618365 ... -7.65559881 -2.51532797
 -7.20689025]


In [28]:
# Calculate Probabilities of Gale Wind
MLogR_Gale_Wind_Prob = numpy.exp(MLogR_Gale_Wind_Logit) / \
(numpy.exp(MLogR_Gale_Wind_Logit)+1)
print ("Gale Wind Probabilities:")
print (MLogR_Gale_Wind_Prob)

Gale Wind Probabilities:
[0.06368591 0.02929592 0.27564184 ... 0.00047316 0.07479059 0.00074091]


In [29]:
# Include Logistic Regression Model Gale Wind Probabilities Into Main DataFrame With No Few
Main_DF_No_Few["MLogR Model Gale Wind Probability"] = MLogR_Gale_Wind_Prob
Main_DF_No_Few.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed Ratio to HK Average,Actual Windspeed,MLogR Model Strong Wind Probability,MLogR Model Gale Wind Probability
0,(2016,Nangka,Bluff Head (Stanley),8,SSW,Southern,limegreen,darkgreen,85,530,130,1.032203,45.0,0.631537,0.063686
1,(2016,Nangka,Central Pier,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.015392,48.0,0.568037,0.029296
2,(2016,Nangka,Cheung Chau,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.370514,60.0,0.906266,0.275642
3,(2016,Nangka,Cheung Chau Beach,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.320824,70.0,0.872859,0.182085
4,(2016,Nangka,Green Island,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.327907,74.0,0.898003,0.199959


### Model 6.1: Probabilities of Strong Wind Signal No.3 and Gale/Storm Wind Signal No.8 Using Multivariable Logistic Regression (MLogR) Model

The probabilities of strong wind and gale wind calculated through the MLogR Model can further be used to calculate the probabilities of Hong Kong Observatory (HKO) issuing Strong Wind Signal No.3 and Gale/Storm Wind Signal No.8.

According to HKO, since 2007, "the HKO makes reference to the wind data recorded from a network of eight near-sea level reference anemometers covering the whole of Hong Kong when considering the issuance of Tropical Cyclone Warning Signals, No.3 and No.8". Therefore, these eight reference windspeed stations -- Cheung Chau, Hong Kong International Airport, Sai Kung, Kai Tak, Lau Fau Shan, Tsing Yi Shell Oil Depot, Sha Tin, Ta Kwu Ling -- will be the focus for tis next section. Officially, the criteria for issuing TC Signals No.3 and No.8 are when at least half -- 4 out of 8 -- of the reference windspeed stations are expected to record strong wind or gale wind respectively. However, according to previous observations, in most instances TC Signal No.3 and No.8 are issued even if less than 4 of the 8 reference windspeed stations record strong wind or gale wind. Therefore, instead of using the official criteria, the probability of TC Signals No.3 and No.8 during each number of reference windspeed stations out of the 8 of them reaching strong wind or gale wind -- 0 stations reaching strong wind or gale wind, 1 station reaching strong wind or gale wind, 2 stations reaching strong wind or gale wind, etc. -- will be calculated.

After that, using the probabilities of strong wind and gale wind of those 8 reference windspeed stations calculated by MLogR Model in the previous part, the probability of each number of reference windspeed stations out of the 8 of them reaching strong wind or gale wind -- 0 stations reaching strong wind or gale wind, 1 station reaching strong wind or gale wind, 2 stations reaching strong wind or gale wind, etc. -- during each TC will be calculated.

In [30]:
# Create Main DataFrame With Only 8 Reference Windspeed Stations
Reference_Stations = ["Cheung Chau", "Hong Kong International Airport", "Sai Kung", "Kai Tak", "Lau Fau Shan", \
"Tsing Yi Shell Oil Depot", "Sha Tin", "Ta Kwu Ling"]
Main_DF_Ref_Stations = Main_DF_No_Few[(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[0]) | \
(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[1]) | \
(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[2]) | \
(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[3]) | \
(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[4]) | \
(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[5]) | \
(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[6]) | \
(Main_DF_No_Few["Windspeed Station"] == Reference_Stations[7])].copy()
Main_DF_Ref_Stations = Main_DF_Ref_Stations[Main_DF_Ref_Stations["MLogR Model Strong Wind Probability"] >= 0]
print ("DataFrame Size:", len(Main_DF_Ref_Stations))
Main_DF_Ref_Stations.head()

DataFrame Size: 785


Unnamed: 0,Typhoon Code,Typhoon Name,Windspeed Station,Typhoon Signal,Bearing,Quadrant,Colour Code 1,Colour Code 2,Intensity,Distance,Radius of Max Winds,Windspeed Ratio to HK Average,Actual Windspeed,MLogR Model Strong Wind Probability,MLogR Model Gale Wind Probability
2,(2016,Nangka,Cheung Chau,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.370514,60.0,0.906266,0.275642
5,(2016,Nangka,Hong Kong International Airport,8,SSW,Southern,limegreen,darkgreen,85,530,130,1.124406,48.0,0.73962,0.060739
6,(2016,Nangka,Kai Tak,8,SSW,Southern,limegreen,darkgreen,85,530,130,0.907685,37.0,0.43925,0.01571
9,(2016,Nangka,Lau Fau Shan,8,SSW,Southern,limegreen,darkgreen,85,530,130,0.917341,30.0,0.402936,0.02243
14,(2016,Nangka,Sai Kung,8,SSW,Southern,limegreen,darkgreen,85,530,130,0.994101,53.0,0.677347,0.025294


In [31]:
# Create Arrays of Actual Windspeeds, Strong and Gale Wind Probabilities For 8 Reference Windspeed Stations
Typhoon_Code_List = list(Typhoon_CPA_File["Typhoon Code"])
Typhoon_Name_List = list(Typhoon_CPA_File["Typhoon Name"])
Typhoon_Signal_List = list(Typhoon_CPA_File["Highest Typhoon Signal Issued"])
Ref_Stations_Actual_Windspeeds = numpy.zeros((len(Reference_Stations), len(Typhoon_Code_List)))
Ref_Stations_Strong_Wind_Probs = numpy.zeros((len(Reference_Stations), len(Typhoon_Code_List)))
Ref_Stations_Gale_Wind_Probs = numpy.zeros((len(Reference_Stations), len(Typhoon_Code_List)))
#
for i in range(len(Typhoon_Code_List)):
    for j in range(len(Reference_Stations)):
        if len(Main_DF_Ref_Stations[(Main_DF_Ref_Stations["Typhoon Code"] == \
        Typhoon_Code_List[i]) & (Main_DF_Ref_Stations["Windspeed Station"] == Reference_Stations[j])]) == 1:
            Ref_Stations_Actual_Windspeeds[j][i] = float(Main_DF_Ref_Stations[(Main_DF_Ref_Stations["Typhoon Code"] == \
            Typhoon_Code_List[i]) & (Main_DF_Ref_Stations["Windspeed Station"] == Reference_Stations[j])] \
            ["Actual Windspeed"])
        else:
            Ref_Stations_Actual_Windspeeds[j][i] = numpy.nan
#
for i in range(len(Typhoon_Code_List)):
    for j in range(len(Reference_Stations)):
        if len(Main_DF_Ref_Stations[(Main_DF_Ref_Stations["Typhoon Code"] == \
        Typhoon_Code_List[i]) & (Main_DF_Ref_Stations["Windspeed Station"] == Reference_Stations[j])]) == 1:
            Ref_Stations_Strong_Wind_Probs[j][i] = float(Main_DF_Ref_Stations[(Main_DF_Ref_Stations["Typhoon Code"] == \
            Typhoon_Code_List[i]) & (Main_DF_Ref_Stations["Windspeed Station"] == Reference_Stations[j])] \
            ["MLogR Model Strong Wind Probability"])
        else:
            Ref_Stations_Strong_Wind_Probs[j][i] = numpy.nan
#
for i in range(len(Typhoon_Code_List)):
    for j in range(len(Reference_Stations)):
        if len(Main_DF_Ref_Stations[(Main_DF_Ref_Stations["Typhoon Code"] == \
        Typhoon_Code_List[i]) & (Main_DF_Ref_Stations["Windspeed Station"] == Reference_Stations[j])]) == 1:
            Ref_Stations_Gale_Wind_Probs[j][i] = float(Main_DF_Ref_Stations[(Main_DF_Ref_Stations["Typhoon Code"] == \
            Typhoon_Code_List[i]) & (Main_DF_Ref_Stations["Windspeed Station"] == Reference_Stations[j])] \
            ["MLogR Model Gale Wind Probability"])
        else:
            Ref_Stations_Gale_Wind_Probs[j][i] = numpy.nan

In [32]:
# Calculate Number of Reference Stations Out of 8 Recording Strong Wind and Gale Wind For Each Typhoon
Ref_Station_Strong_Wind_Count = numpy.zeros(len(Typhoon_Code_List))
Ref_Station_Gale_Wind_Count = numpy.zeros(len(Typhoon_Code_List))
for i in range(len(Ref_Station_Strong_Wind_Count)):
    for j in range(len(Reference_Stations)):
        if Ref_Stations_Actual_Windspeeds[j][i] >= 41:
            Ref_Station_Strong_Wind_Count[i] += 1
        if Ref_Stations_Actual_Windspeeds[j][i] >= 63:
            Ref_Station_Gale_Wind_Count[i] += 1
        if Ref_Stations_Actual_Windspeeds[j][i] >= 0:
            Check_NaN = 0
        else:
            Ref_Station_Strong_Wind_Count[i] = numpy.nan
            Ref_Station_Gale_Wind_Count[i] = numpy.nan
print (Ref_Station_Strong_Wind_Count)
print (Ref_Station_Gale_Wind_Count)

[ 3.  4.  5.  3.  2.  2.  1.  6.  0.  2.  8.  1.  1.  2.  2.  3.  4.  2.
  3.  7.  8. nan  5.  6.  4.  0.  2.  0.  2.  7.  0.  4.  4.  3.  1.  6.
  2.  4.  0.  1.  5.  6.  5.  0.  3.  3.  0.  4.  8.  5.  1.  1.  5.  4.
  4.  0.  3.  3.  0.  0.  3.  2.  0.  8.  3.  4.  8.  3.  0.  0.  1.  8.
 nan  6.  6.  6.  3.  5.  6. nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]
[ 0.  2.  1.  0.  1.  0.  0.  1.  0.  0.  6.  0.  0.  0.  1.  1.  1.  0.
  0.  4.  5. nan  1.  3.  1.  0.  0.  0.  0.  3.  0.  0.  1.  0.  0.  4.
  0.  1.  0.  0.  3.  2.  1.  0.  0.  1.  0.  1.  5.  1.  0.  0.  3.  0.
  1.  0.  0.  0.  0.  0.  0.  0.  0.  4.  0.  1.  3.  0.  0.  0.  0.  5.
 nan  2.  4.  2.  1.  1.  1. nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


In [33]:
# Create MLogR Probabilities DataFrame For 8 Reference Windspeed Stations
MLogR_DF_Probs_Ref_Stations = pandas.DataFrame({"Typhoon Code": Typhoon_Code_List, \
"Typhoon Name": Typhoon_Name_List, "Typhoon Signal": Typhoon_Signal_List, \
"Number of Reference Stations Strong Wind": Ref_Station_Strong_Wind_Count, \
"Number of Reference Stations Gale Wind": Ref_Station_Gale_Wind_Count, \
"Cheung Chau Actual Windspeed": Ref_Stations_Actual_Windspeeds[0], \
"HK International Airport Actual Windspeed": Ref_Stations_Actual_Windspeeds[1], \
"Sai Kung Actual Windspeed": Ref_Stations_Actual_Windspeeds[2], \
"Kai Tak Actual Windspeed": Ref_Stations_Actual_Windspeeds[3], \
"Lau Fau Shan Actual Windspeed": Ref_Stations_Actual_Windspeeds[4], \
"Tsing Yi Shell Oil Depot Actual Windspeed": Ref_Stations_Actual_Windspeeds[5], \
"Sha Tin Actual Windspeed": Ref_Stations_Actual_Windspeeds[6], \
"Ta Kwu Ling Actual Windspeed": Ref_Stations_Actual_Windspeeds[7], \
"Cheung Chau Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[0], \
"HK International Airport Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[1], \
"Sai Kung Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[2], \
"Kai Tak Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[3], \
"Lau Fau Shan Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[4], \
"Tsing Yi Shell Oil Depot Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[5], \
"Sha Tin Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[6], \
"Ta Kwu Ling Strong Wind Probability": Ref_Stations_Strong_Wind_Probs[7], \
"Cheung Chau Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[0], \
"HK International Airport Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[1], \
"Sai Kung Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[2], \
"Kai Tak Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[3], \
"Lau Fau Shan Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[4], \
"Tsing Yi Shell Oil Depot Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[5], \
"Sha Tin Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[6], \
"Ta Kwu Ling Gale Wind Probability": Ref_Stations_Gale_Wind_Probs[7]})
print ("DataFrame Size:", len(MLogR_DF_Probs_Ref_Stations))
MLogR_DF_Probs_Ref_Stations.head()

DataFrame Size: 105


Unnamed: 0,Typhoon Code,Typhoon Name,Typhoon Signal,Number of Reference Stations Strong Wind,Number of Reference Stations Gale Wind,Cheung Chau Actual Windspeed,HK International Airport Actual Windspeed,Sai Kung Actual Windspeed,Kai Tak Actual Windspeed,Lau Fau Shan Actual Windspeed,...,Sha Tin Strong Wind Probability,Ta Kwu Ling Strong Wind Probability,Cheung Chau Gale Wind Probability,HK International Airport Gale Wind Probability,Sai Kung Gale Wind Probability,Kai Tak Gale Wind Probability,Lau Fau Shan Gale Wind Probability,Tsing Yi Shell Oil Depot Gale Wind Probability,Sha Tin Gale Wind Probability,Ta Kwu Ling Gale Wind Probability
0,(2016,Nangka,8,3.0,0.0,60.0,48.0,53.0,37.0,30.0,...,0.045506,0.064589,0.275642,0.060739,0.025294,0.01571,0.02243,0.003231,0.002092,0.002092
1,(2007,Higos,9,4.0,2.0,108.0,68.0,54.0,56.0,38.0,...,0.480476,0.572553,0.93874,0.722537,0.511001,0.391252,0.48024,0.11545,0.077836,0.077836
2,(2003,Sinlaku,3,5.0,1.0,77.0,57.0,51.0,45.0,42.0,...,0.012646,0.018213,0.092928,0.017112,0.006938,0.004279,0.006139,0.000872,0.000564,0.000564
3,(2002,Nuri,3,3.0,0.0,57.0,46.0,49.0,40.0,35.0,...,0.047689,0.067624,0.411096,0.106048,0.045441,0.028446,0.040391,0.005911,0.00383,0.00383
4,(1914,Kajiki,3,2.0,1.0,63.0,40.0,42.0,32.0,36.0,...,0.052476,0.074255,0.39481,0.099799,0.042593,0.026633,0.037847,0.005526,0.003581,0.003581


In [34]:
# Create Arrays with Probabilities of Strong Wind and Not Strong Wind, Gale Wind and Not Gale Wind
Ref_Stations_Strong_Wind_TF_Probs = numpy.zeros((len(Ref_Stations_Strong_Wind_Probs), \
len(Ref_Stations_Strong_Wind_Probs[0]), 2))
for j in range(len(Ref_Stations_Strong_Wind_TF_Probs)):
    for i in range(len(Ref_Stations_Strong_Wind_TF_Probs[j])):
        for k in range(2):
            if k == 0:
                Ref_Stations_Strong_Wind_TF_Probs[j][i][k] = 1 - MLogR_DF_Probs_Ref_Stations.iloc[i,j+13]
            elif k == 1:
                Ref_Stations_Strong_Wind_TF_Probs[j][i][k] = MLogR_DF_Probs_Ref_Stations.iloc[i,j+13]
Ref_Stations_Gale_Wind_TF_Probs = numpy.zeros((len(Ref_Stations_Gale_Wind_Probs), \
len(Ref_Stations_Gale_Wind_Probs[0]), 2))
for j in range(len(Ref_Stations_Gale_Wind_TF_Probs)):
    for i in range(len(Ref_Stations_Gale_Wind_TF_Probs[j])):
        for k in range(2):
            if k == 0:
                Ref_Stations_Gale_Wind_TF_Probs[j][i][k] = 1 - MLogR_DF_Probs_Ref_Stations.iloc[i,j+21]
            elif k == 1:
                Ref_Stations_Gale_Wind_TF_Probs[j][i][k] = MLogR_DF_Probs_Ref_Stations.iloc[i,j+21]

In [35]:
# Calculate Probability For Each Number Out Of The 8 Reference Windspeed Stations Recording Strong Wind
Ref_Stations_Strong_Wind_Count_Prob = numpy.zeros((9,len(Ref_Stations_Strong_Wind_TF_Probs[0])))
for l in range(2):
    for m in range(2):
        for n in range(2):
            for o in range(2):
                for p in range(2):
                    for q in range(2):
                        for r in range(2):
                            for s in range(2):
                                for i in range(len(Ref_Stations_Strong_Wind_TF_Probs[0])):
                                    Probs_Product = Ref_Stations_Strong_Wind_TF_Probs[0][i][l] * \
                                    Ref_Stations_Strong_Wind_TF_Probs[1][i][m] * \
                                    Ref_Stations_Strong_Wind_TF_Probs[2][i][n] * \
                                    Ref_Stations_Strong_Wind_TF_Probs[3][i][o] * \
                                    Ref_Stations_Strong_Wind_TF_Probs[4][i][p] * \
                                    Ref_Stations_Strong_Wind_TF_Probs[5][i][q] * \
                                    Ref_Stations_Strong_Wind_TF_Probs[6][i][r] * \
                                    Ref_Stations_Strong_Wind_TF_Probs[7][i][s]
                                    Strong_Wind_Count = l+m+n+o+p+q+r+s
                                    Ref_Stations_Strong_Wind_Count_Prob[Strong_Wind_Count][i] += Probs_Product
print (Ref_Stations_Strong_Wind_Count_Prob)

[[1.96266927e-03 4.52879854e-10 6.48632630e-02 1.65469105e-03
  1.15582762e-03 1.96266927e-03 2.56086157e-02 2.34257198e-05
  2.86519945e-02 6.43617726e-02 1.02789398e-20 1.65469105e-03
  6.48632630e-02 6.48632630e-02 1.02175750e-01 7.50385092e-04
  9.93040986e-09 2.86519945e-02 3.00474880e-01 1.46550181e-13
  2.01402213e-16            nan 1.44465177e-03 2.30973771e-03
  2.88674841e-03 6.67522557e-01 5.50170029e-01 1.70707195e-01
  1.47521685e-02 6.54993719e-07 2.37667305e-01 4.24800346e-03
  7.05707802e-06 1.56744717e-03 1.81510387e-01 6.25388658e-06
  7.86543906e-02 7.05707802e-06 4.97580699e-01 1.15791658e-05
  8.93603955e-10 1.15303449e-08 1.96266927e-03 4.97580699e-01
  8.31581521e-04 9.07690999e-03 3.18219955e-01 6.25388658e-06
  2.13209405e-19 6.12684474e-07 1.02175750e-01 8.33091340e-03
  6.25388658e-06 8.33091340e-03 7.50385092e-04 4.97580699e-01
  1.40154032e-02 2.08321350e-01 4.87380313e-01 6.67522557e-01
  5.14164681e-03 7.28335857e-02 8.33091340e-03 9.93040986e-09
  9.0769

In [36]:
MLogR_DF_Probs_Ref_Stations["0 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[0]
MLogR_DF_Probs_Ref_Stations["1 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[1]
MLogR_DF_Probs_Ref_Stations["2 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[2]
MLogR_DF_Probs_Ref_Stations["3 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[3]
MLogR_DF_Probs_Ref_Stations["4 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[4]
MLogR_DF_Probs_Ref_Stations["5 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[5]
MLogR_DF_Probs_Ref_Stations["6 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[6]
MLogR_DF_Probs_Ref_Stations["7 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[7]
MLogR_DF_Probs_Ref_Stations["8 Reference Stations Strong Wind Probability"] = Ref_Stations_Strong_Wind_Count_Prob[8]
MLogR_DF_Probs_Ref_Stations.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Typhoon Signal,Number of Reference Stations Strong Wind,Number of Reference Stations Gale Wind,Cheung Chau Actual Windspeed,HK International Airport Actual Windspeed,Sai Kung Actual Windspeed,Kai Tak Actual Windspeed,Lau Fau Shan Actual Windspeed,...,Ta Kwu Ling Gale Wind Probability,0 Reference Stations Strong Wind Probability,1 Reference Stations Strong Wind Probability,2 Reference Stations Strong Wind Probability,3 Reference Stations Strong Wind Probability,4 Reference Stations Strong Wind Probability,5 Reference Stations Strong Wind Probability,6 Reference Stations Strong Wind Probability,7 Reference Stations Strong Wind Probability,8 Reference Stations Strong Wind Probability
0,(2016,Nangka,8,3.0,0.0,60.0,48.0,53.0,37.0,30.0,...,0.002092,0.001962669,0.03215367,0.158308,0.329783,0.31387,0.13719,0.024959,0.001734,3.926318e-05
1,(2007,Higos,9,4.0,2.0,108.0,68.0,54.0,56.0,38.0,...,0.077836,4.528799e-10,1.439265e-07,1.4e-05,0.000556,0.010256,0.086962,0.306906,0.413623,0.1816841
2,(2003,Sinlaku,3,5.0,1.0,77.0,57.0,51.0,45.0,42.0,...,0.000564,0.06486326,0.2854833,0.377617,0.211337,0.054038,0.006345,0.00031,6e-06,3.521473e-08
3,(2002,Nuri,3,3.0,0.0,57.0,46.0,49.0,40.0,35.0,...,0.00383,0.001654691,0.02847423,0.147257,0.322221,0.322127,0.147893,0.028262,0.002062,4.905325e-05
4,(1914,Kajiki,3,2.0,1.0,63.0,40.0,42.0,32.0,36.0,...,0.003581,0.001155828,0.02199645,0.125806,0.304441,0.336589,0.170902,0.036118,0.002915,7.667246e-05


In [38]:
# Calculate Probability For Each Number Out Of The 8 Reference Windspeed Stations Recording Gale Wind
Ref_Stations_Gale_Wind_Count_Prob = numpy.zeros((9,len(Ref_Stations_Gale_Wind_TF_Probs[0])))
for l in range(2):
    for m in range(2):
        for n in range(2):
            for o in range(2):
                for p in range(2):
                    for q in range(2):
                        for r in range(2):
                            for s in range(2):
                                for i in range(len(Ref_Stations_Gale_Wind_TF_Probs[0])):
                                    Probs_Product = Ref_Stations_Gale_Wind_TF_Probs[0][i][l] * \
                                    Ref_Stations_Gale_Wind_TF_Probs[1][i][m] * \
                                    Ref_Stations_Gale_Wind_TF_Probs[2][i][n] * \
                                    Ref_Stations_Gale_Wind_TF_Probs[3][i][o] * \
                                    Ref_Stations_Gale_Wind_TF_Probs[4][i][p] * \
                                    Ref_Stations_Gale_Wind_TF_Probs[5][i][q] * \
                                    Ref_Stations_Gale_Wind_TF_Probs[6][i][r] * \
                                    Ref_Stations_Gale_Wind_TF_Probs[7][i][s]
                                    Gale_Wind_Count = l+m+n+o+p+q+r+s
                                    Ref_Stations_Gale_Wind_Count_Prob[Gale_Wind_Count][i] += Probs_Product
print (Ref_Stations_Gale_Wind_Count_Prob)

[[6.33374309e-01 1.97820238e-03 8.74413624e-01 4.62183864e-01
  4.82309732e-01 6.33374309e-01 4.87261407e-01 2.31007048e-01
  8.15874166e-01 5.37140307e-01 7.78560173e-10 4.62183864e-01
  8.74413624e-01 8.74413624e-01 8.37263119e-01 4.00793805e-01
  5.27180180e-03 8.15874166e-01 8.91079993e-01 2.91274538e-05
  1.35487534e-06            nan 2.58047808e-01 2.42484683e-01
  4.40669903e-01 9.47965438e-01 9.38736548e-01 8.17902121e-01
  6.49841593e-01 2.92189282e-02 9.08711887e-01 6.06111133e-01
  9.20438164e-02 4.21378272e-01 8.89705379e-01 4.25174935e-02
  7.44613072e-01 9.20438164e-02 9.04735970e-01 8.43605223e-02
  3.42181965e-03 1.66507740e-02 6.33374309e-01 9.04735970e-01
  2.64909112e-01 5.24308081e-01 7.87643902e-01 4.25174935e-02
  1.31819812e-08 8.53496761e-02 8.37263119e-01 5.50293325e-01
  4.25174935e-02 5.50293325e-01 4.00793805e-01 9.04735970e-01
  5.41237537e-01 8.65167423e-01 9.40189475e-01 9.47965438e-01
  3.84254884e-01 7.58022770e-01 5.50293325e-01 5.27180180e-03
  5.2430

In [39]:
MLogR_DF_Probs_Ref_Stations["0 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[0]
MLogR_DF_Probs_Ref_Stations["1 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[1]
MLogR_DF_Probs_Ref_Stations["2 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[2]
MLogR_DF_Probs_Ref_Stations["3 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[3]
MLogR_DF_Probs_Ref_Stations["4 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[4]
MLogR_DF_Probs_Ref_Stations["5 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[5]
MLogR_DF_Probs_Ref_Stations["6 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[6]
MLogR_DF_Probs_Ref_Stations["7 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[7]
MLogR_DF_Probs_Ref_Stations["8 Reference Stations Gale Wind Probability"] = Ref_Stations_Gale_Wind_Count_Prob[8]
MLogR_DF_Probs_Ref_Stations.head()

Unnamed: 0,Typhoon Code,Typhoon Name,Typhoon Signal,Number of Reference Stations Strong Wind,Number of Reference Stations Gale Wind,Cheung Chau Actual Windspeed,HK International Airport Actual Windspeed,Sai Kung Actual Windspeed,Kai Tak Actual Windspeed,Lau Fau Shan Actual Windspeed,...,8 Reference Stations Strong Wind Probability,0 Reference Stations Gale Wind Probability,1 Reference Stations Gale Wind Probability,2 Reference Stations Gale Wind Probability,3 Reference Stations Gale Wind Probability,4 Reference Stations Gale Wind Probability,5 Reference Stations Gale Wind Probability,6 Reference Stations Gale Wind Probability,7 Reference Stations Gale Wind Probability,8 Reference Stations Gale Wind Probability
0,(2016,Nangka,8,3.0,0.0,60.0,48.0,53.0,37.0,30.0,...,3.926318e-05,0.633374,0.327763,0.037158,0.001669,3.519426e-05,3.531782e-07,1.55471e-09,3.00666e-12,2.109072e-15
1,(2007,Higos,9,4.0,2.0,108.0,68.0,54.0,56.0,38.0,...,0.1816841,0.001978,0.041224,0.188195,0.340375,0.2890542,0.1168088,0.02070643,0.001612555,4.55508e-05
2,(2003,Sinlaku,3,5.0,1.0,77.0,57.0,51.0,45.0,42.0,...,3.521473e-08,0.874414,0.121823,0.003718,4.5e-05,2.552567e-07,6.896225e-10,8.172953e-13,4.255262e-16,8.036104e-20
3,(2002,Nuri,3,3.0,0.0,57.0,46.0,49.0,40.0,35.0,...,4.905325e-05,0.462184,0.438755,0.091247,0.007518,0.0002908403,5.354067e-06,4.323608e-08,1.53387e-10,1.973798e-13
4,(1914,Kajiki,3,2.0,1.0,63.0,40.0,42.0,32.0,36.0,...,7.667246e-05,0.48231,0.427889,0.083162,0.006403,0.0002315038,3.982772e-06,3.005703e-08,9.965208e-11,1.198391e-13


In [40]:
# Find Probability of Typhoon Signal Number 3 For Every Number Out Of The 8 Reference Stations Recording Strong Wind
Ref_Station_Strong_Wind_Count_T3_Prob = numpy.zeros(9)
for j in range(len(Ref_Station_Strong_Wind_Count_T3_Prob)):
    Ref_Station_T_Signal = numpy.array(MLogR_DF_Probs_Ref_Stations[\
    (MLogR_DF_Probs_Ref_Stations["Number of Reference Stations Strong Wind"] == j)]["Typhoon Signal"])
    T3_True = 0
    T3_False = 0
    for k in range(len(Ref_Station_T_Signal)):
        if Ref_Station_T_Signal[k] >= 3:
            T3_True += 1
        else:
            T3_False += 1
    Ref_Station_Strong_Wind_Count_T3_Prob[j] = T3_True / (T3_True+T3_False)
print (Ref_Station_Strong_Wind_Count_T3_Prob)

[0.07692308 0.5        0.5        0.92307692 1.         1.
 1.         1.         1.        ]


In [41]:
# Find Probability of Typhoon Signal Number 8 For Every Number Out Of The 8 Reference Stations Recording Gale Wind
# For 7 and 8 Reference Stations Recording Gale Wind Which Had No Precedent, Assume 100% Chance of T8
Ref_Station_Gale_Wind_Count_T8_Prob = numpy.zeros(9)
for j in range(9):
    Ref_Station_T_Signal = numpy.array(MLogR_DF_Probs_Ref_Stations[\
    (MLogR_DF_Probs_Ref_Stations["Number of Reference Stations Gale Wind"] == j)]["Typhoon Signal"])
    T8_True = 0
    T8_False = 0
    for k in range(len(Ref_Station_T_Signal)):
        if Ref_Station_T_Signal[k] >= 8:
            T8_True += 1
        else:
            T8_False += 1
    if T8_True+T8_False > 0:
        Ref_Station_Gale_Wind_Count_T8_Prob[j] = T8_True / (T8_True+T8_False)
    else:
        Ref_Station_Gale_Wind_Count_T8_Prob[j] = 1
print (Ref_Station_Gale_Wind_Count_T8_Prob)

[0.04878049 0.36842105 0.75       1.         1.         1.
 1.         1.         1.        ]


In [42]:
# Calculate Probability of Thypoon Signal Number 3 Based On Probabilities of Number of Reference Stations Strong Wind
T3_Probability = numpy.zeros(len(Ref_Stations_Strong_Wind_Count_Prob[0]))
for i in range(len(Ref_Stations_Strong_Wind_Count_Prob[0])):
    for j in range(len(Ref_Station_Strong_Wind_Count_T3_Prob)):
        T3_Probability[i] += Ref_Stations_Strong_Wind_Count_Prob[j][i] * Ref_Station_Strong_Wind_Count_T3_Prob[j]
print (T3_Probability)

[0.87758939 0.99995032 0.59231917 0.88582069 0.90161332 0.87758939
 0.68953102 0.98348043 0.67852691 0.59318762 1.         0.88582069
 0.59231917 0.59231917 0.53968892 0.91810739 0.99971772 0.67852691
 0.39122132 0.99999956 0.99999999        nan 0.89203087 0.86928457
 0.8571689  0.21851668 0.270043   0.47484431 0.74036553 0.99730089
 0.42811752 0.83406593 0.99087303 0.88833383 0.46654761 0.99140987
 0.57043284 0.99087303 0.29380205 0.98831794 0.99992685 0.99969341
 0.87758939 0.29380205 0.91441984 0.78045993 0.38155276 0.99140987
 1.         0.99739359 0.53968892 0.78707044 0.99140987 0.78707044
 0.91810739 0.29380205 0.74480617 0.44734899 0.2984785  0.21851668
 0.82162456 0.57923541 0.78707044 0.99971772 0.78045993 0.99198312
 0.99973239 0.95871964 0.73389613 0.47370595 0.31399794 0.99999957
        nan 0.99717814 0.89203087 0.91810739 0.78045993 0.88833383
 0.87425957        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan       

In [43]:
# Calculate Probability of Typhoon Signal Number 8 Based On Probabilities of Number of Reference Stations Gale Wind
T8_Probability = numpy.zeros(len(Ref_Stations_Gale_Wind_Count_Prob[0]))
for i in range(len(Ref_Stations_Gale_Wind_Count_Prob[0])):
    for j in range(len(Ref_Station_Gale_Wind_Count_T8_Prob)):
        T8_Probability[i] += Ref_Stations_Gale_Wind_Count_Prob[j][i] * Ref_Station_Gale_Wind_Count_T8_Prob[j]
print (T8_Probability)

[0.18122405 0.92503353 0.09037033 0.26044158 0.25018122 0.18122405
 0.24770285 0.41168289 0.11089558 0.2236583  0.99999375 0.26044158
 0.09037033 0.09037033 0.10330102 0.29383716 0.87737533 0.11089558
 0.08467041 0.99310655 0.99899646        nan 0.38929803 0.40195141
 0.27176373 0.06564783 0.06869073 0.11017059 0.17439582 0.73206818
 0.07870465 0.19278613 0.5761908  0.28225543 0.08513826 0.68682475
 0.13708009 0.5761908  0.08004426 0.58989228 0.90093682 0.79020623
 0.18122405 0.08004426 0.38390215 0.22969256 0.12109941 0.68682475
 0.99995495 0.58807814 0.10330102 0.21757398 0.68682475 0.21757398
 0.29383716 0.08004426 0.22175229 0.09355892 0.0682106  0.06564783
 0.30345817 0.13204184 0.21757398 0.87737533 0.22969256 0.58548047
 0.89990333 0.37594004 0.22164395 0.15288799 0.06443437 0.98989625
        nan 0.69777012 0.38929803 0.29383716 0.22969256 0.28225543
 0.24142647        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan       

In [44]:
MLogR_DF_Probs_Ref_Stations["Strong Wind Signal No.3 Probability"] = T3_Probability
MLogR_DF_Probs_Ref_Stations["Gale/Storm Wind Signal No.8 Probability"] = T8_Probability
print ("DataFrame Size:", len(MLogR_DF_Probs_Ref_Stations))
MLogR_DF_Probs_Ref_Stations.head()

DataFrame Size: 105


Unnamed: 0,Typhoon Code,Typhoon Name,Typhoon Signal,Number of Reference Stations Strong Wind,Number of Reference Stations Gale Wind,Cheung Chau Actual Windspeed,HK International Airport Actual Windspeed,Sai Kung Actual Windspeed,Kai Tak Actual Windspeed,Lau Fau Shan Actual Windspeed,...,1 Reference Stations Gale Wind Probability,2 Reference Stations Gale Wind Probability,3 Reference Stations Gale Wind Probability,4 Reference Stations Gale Wind Probability,5 Reference Stations Gale Wind Probability,6 Reference Stations Gale Wind Probability,7 Reference Stations Gale Wind Probability,8 Reference Stations Gale Wind Probability,Strong Wind Signal No.3 Probability,Gale/Storm Wind Signal No.8 Probability
0,(2016,Nangka,8,3.0,0.0,60.0,48.0,53.0,37.0,30.0,...,0.327763,0.037158,0.001669,3.519426e-05,3.531782e-07,1.55471e-09,3.00666e-12,2.109072e-15,0.877589,0.181224
1,(2007,Higos,9,4.0,2.0,108.0,68.0,54.0,56.0,38.0,...,0.041224,0.188195,0.340375,0.2890542,0.1168088,0.02070643,0.001612555,4.55508e-05,0.99995,0.925034
2,(2003,Sinlaku,3,5.0,1.0,77.0,57.0,51.0,45.0,42.0,...,0.121823,0.003718,4.5e-05,2.552567e-07,6.896225e-10,8.172953e-13,4.255262e-16,8.036104e-20,0.592319,0.09037
3,(2002,Nuri,3,3.0,0.0,57.0,46.0,49.0,40.0,35.0,...,0.438755,0.091247,0.007518,0.0002908403,5.354067e-06,4.323608e-08,1.53387e-10,1.973798e-13,0.885821,0.260442
4,(1914,Kajiki,3,2.0,1.0,63.0,40.0,42.0,32.0,36.0,...,0.427889,0.083162,0.006403,0.0002315038,3.982772e-06,3.005703e-08,9.965208e-11,1.198391e-13,0.901613,0.250181


In [45]:
# Remove Typhoons With Nan Datapoints
MLogR_DF_Probs_Ref_Stations = MLogR_DF_Probs_Ref_Stations[MLogR_DF_Probs_Ref_Stations\
["Number of Reference Stations Strong Wind"] >= 0]
print ("DataFrame Size:", len(MLogR_DF_Probs_Ref_Stations))
MLogR_DF_Probs_Ref_Stations.head()

DataFrame Size: 77


Unnamed: 0,Typhoon Code,Typhoon Name,Typhoon Signal,Number of Reference Stations Strong Wind,Number of Reference Stations Gale Wind,Cheung Chau Actual Windspeed,HK International Airport Actual Windspeed,Sai Kung Actual Windspeed,Kai Tak Actual Windspeed,Lau Fau Shan Actual Windspeed,...,1 Reference Stations Gale Wind Probability,2 Reference Stations Gale Wind Probability,3 Reference Stations Gale Wind Probability,4 Reference Stations Gale Wind Probability,5 Reference Stations Gale Wind Probability,6 Reference Stations Gale Wind Probability,7 Reference Stations Gale Wind Probability,8 Reference Stations Gale Wind Probability,Strong Wind Signal No.3 Probability,Gale/Storm Wind Signal No.8 Probability
0,(2016,Nangka,8,3.0,0.0,60.0,48.0,53.0,37.0,30.0,...,0.327763,0.037158,0.001669,3.519426e-05,3.531782e-07,1.55471e-09,3.00666e-12,2.109072e-15,0.877589,0.181224
1,(2007,Higos,9,4.0,2.0,108.0,68.0,54.0,56.0,38.0,...,0.041224,0.188195,0.340375,0.2890542,0.1168088,0.02070643,0.001612555,4.55508e-05,0.99995,0.925034
2,(2003,Sinlaku,3,5.0,1.0,77.0,57.0,51.0,45.0,42.0,...,0.121823,0.003718,4.5e-05,2.552567e-07,6.896225e-10,8.172953e-13,4.255262e-16,8.036104e-20,0.592319,0.09037
3,(2002,Nuri,3,3.0,0.0,57.0,46.0,49.0,40.0,35.0,...,0.438755,0.091247,0.007518,0.0002908403,5.354067e-06,4.323608e-08,1.53387e-10,1.973798e-13,0.885821,0.260442
4,(1914,Kajiki,3,2.0,1.0,63.0,40.0,42.0,32.0,36.0,...,0.427889,0.083162,0.006403,0.0002315038,3.982772e-06,3.005703e-08,9.965208e-11,1.198391e-13,0.901613,0.250181


In [46]:
MLogR_DF_Probs_Ref_Stations[(MLogR_DF_Probs_Ref_Stations["Strong Wind Signal No.3 Probability"] >= 0.50) & 
(MLogR_DF_Probs_Ref_Stations["Typhoon Signal"] < 3)]

Unnamed: 0,Typhoon Code,Typhoon Name,Typhoon Signal,Number of Reference Stations Strong Wind,Number of Reference Stations Gale Wind,Cheung Chau Actual Windspeed,HK International Airport Actual Windspeed,Sai Kung Actual Windspeed,Kai Tak Actual Windspeed,Lau Fau Shan Actual Windspeed,...,1 Reference Stations Gale Wind Probability,2 Reference Stations Gale Wind Probability,3 Reference Stations Gale Wind Probability,4 Reference Stations Gale Wind Probability,5 Reference Stations Gale Wind Probability,6 Reference Stations Gale Wind Probability,7 Reference Stations Gale Wind Probability,8 Reference Stations Gale Wind Probability,Strong Wind Signal No.3 Probability,Gale/Storm Wind Signal No.8 Probability
5,(1912,Podul,1,2.0,0.0,46.0,41.0,37.0,31.0,36.0,...,0.327763,0.037158,0.001669,3.519426e-05,3.531782e-07,1.55471e-09,3.00666e-12,2.109072e-15,0.877589,0.181224
6,(1911,Bailu,1,1.0,0.0,36.0,43.0,28.0,25.0,38.0,...,0.425112,0.081252,0.006152,0.0002187446,3.70084e-06,2.746608e-08,8.955149e-11,1.059061e-13,0.689531,0.247703
8,(1904,Mun,1,0.0,0.0,39.0,37.0,16.0,27.0,30.0,...,0.175682,0.008287,0.000155,1.359101e-06,5.675161e-09,1.039532e-11,8.36522e-15,2.44168e-18,0.678527,0.110896
13,(1809_2,Son-Tinh_2,1,2.0,0.0,50.0,36.0,41.0,25.0,30.0,...,0.121823,0.003718,4.5e-05,2.552567e-07,6.896225e-10,8.172953e-13,4.255262e-16,8.036104e-20,0.592319,0.09037
17,(1722W,TD1722W,1,2.0,0.0,55.0,41.0,36.0,37.0,27.0,...,0.175682,0.008287,0.000155,1.359101e-06,5.675161e-09,1.039532e-11,8.36522e-15,2.44168e-18,0.678527,0.110896
36,(1414W,TD1414W,1,2.0,0.0,42.0,41.0,30.0,28.0,36.0,...,0.238221,0.016696,0.000464,6.044205e-06,3.749821e-08,1.020505e-10,1.220112e-13,5.2912220000000007e-17,0.570433,0.13708
39,(1329,Krosa,1,1.0,0.0,44.0,37.0,37.0,25.0,38.0,...,0.37654,0.36819,0.142633,0.02594404,0.002245595,8.526262e-05,1.422216e-06,8.60487e-09,0.988318,0.589892
61,(1002,Conson,1,2.0,0.0,46.0,46.0,33.0,33.0,28.0,...,0.22673,0.014857,0.000386,4.70111e-06,2.726774e-08,6.937951e-11,7.755201e-14,3.14432e-17,0.579235,0.132042
62,(0916,Ketsana,1,0.0,0.0,36.0,36.0,24.0,25.0,27.0,...,0.386505,0.059471,0.003625,0.0001037646,1.413293e-06,8.444009e-09,2.216383e-11,2.110147e-14,0.78707,0.217574
76,(0713,Francisco,1,3.0,1.0,63.0,43.0,46.0,37.0,38.0,...,0.403117,0.067899,0.004531,0.0001419639,2.11663e-06,1.384348e-08,3.977639e-11,4.145504e-14,0.78046,0.229693


In [47]:
# Calculate MLogR Model Prediction Accuracy For Strong Wind Signal No.3
MLogR_T3_True = numpy.zeros(len(MLogR_DF_Probs_Ref_Stations))
for i in range(len(MLogR_T3_True)):
    if numpy.array(MLogR_DF_Probs_Ref_Stations["Strong Wind Signal No.3 Probability"])[i] < 0.500 and \
    numpy.array(MLogR_DF_Probs_Ref_Stations["Typhoon Signal"])[i] < 3:
        MLogR_T3_True[i] = 1
    elif numpy.array(MLogR_DF_Probs_Ref_Stations["Strong Wind Signal No.3 Probability"])[i] >= 0.500 and \
    numpy.array(MLogR_DF_Probs_Ref_Stations["Typhoon Signal"])[i] >= 3:
        MLogR_T3_True[i] = 1
    else:
        MLogR_T3_True[i] = 0
MLogR_T3_Accuracy = numpy.sum(MLogR_T3_True) / len(MLogR_T3_True)
print ("MLogR Model Accuracy In Forecasting For Strong Wind Signal No.3:", MLogR_T3_Accuracy*100, "%")

MLogR Model Accuracy In Forecasting For Strong Wind Signal No.3: 84.4155844155844 %


In [48]:
# Calculate MLogR Model Prediction Accuracy For Gale/Storm Wind Signal No.8
MLogR_T8_True = numpy.zeros(len(MLogR_DF_Probs_Ref_Stations))
for i in range(len(MLogR_T8_True)):
    if numpy.array(MLogR_DF_Probs_Ref_Stations["Gale/Storm Wind Signal No.8 Probability"])[i] < 0.500 and \
    numpy.array(MLogR_DF_Probs_Ref_Stations["Typhoon Signal"])[i] < 8:
        MLogR_T8_True[i] = 1
    elif numpy.array(MLogR_DF_Probs_Ref_Stations["Gale/Storm Wind Signal No.8 Probability"])[i] >= 0.500 and \
    numpy.array(MLogR_DF_Probs_Ref_Stations["Typhoon Signal"])[i] >= 8:
        MLogR_T8_True[i] = 1
    else:
        MLogR_T8_True[i] = 0
MLogR_T8_Accuracy = numpy.sum(MLogR_T8_True) / len(MLogR_T8_True)
print ("MLogR Model Accuracy In Forecasting For Gale/Storm Wind Signal No.8:", MLogR_T8_Accuracy*100, "%")

MLogR Model Accuracy In Forecasting For Gale/Storm Wind Signal No.8: 87.01298701298701 %
