In [1]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER
import cftime
import datetime
from datetime import date
from matplotlib import pyplot
from matplotlib import colors
from matplotlib import font_manager
from matplotlib.cm import ScalarMappable
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
import matplotlib.ticker as mticker
import numpy
import pandas
from PIL import Image
import random
from scipy import stats
import xarray as xr

In [2]:
Diri = '/glade/u/home/whimkao//ExtraTrack/ExtraTrack_Data/Output_Files_V6/'
Output_Diri = '/glade/u/home/whimkao//ExtraTrack/ExtraTrack_Github/RCP_Figs/Output_Files/'

In [3]:
# Open File
def Open_File(File):
    DF = pandas.read_csv(File)
    DF = DF.drop("Unnamed: 0", axis=1)
    return (DF)

In [4]:
# Open Each File
def Files_Open(Model, Diri, Dataset):
    Data_DF = Open_File(Diri+Model+'_Data_'+Dataset+'_Output_V6.csv')
    ET_DF = Open_File(Diri+Model+'_ET_'+Dataset+'_Output_V6.csv')
    Codes_DF = Open_File(Diri+Model+'_Codes_Output_V6.csv')
# Edit Time Format
    Time, Begin_Time, Compl_Time, Trop_Peak_Time, Peak_Time, Birth_Time, Death_Time, Year = [], [], [], [], [], [], [], []
    for i in range(len(Data_DF)):
        Time.append(Datetime(Data_DF["Time(Z)"][i]))
    for j in range(len(ET_DF)):
        Begin_Time.append(Datetime(ET_DF["ET Begin Time"][j]))
        Compl_Time.append(Datetime(ET_DF["ET Complete Time"][j]))
        Trop_Peak_Time.append(Datetime(ET_DF["Trop Peak Time"][j]))
        Peak_Time.append(Datetime(ET_DF["Peak Time"][j]))
        Birth_Time.append(Datetime(ET_DF["Genesis Time"][j]))
        Death_Time.append(Datetime(ET_DF["Dissipation Time"][j]))
        Year.append(Find_Year(ET_DF["ET Begin Time"][j]))
    Data_DF["Time(Z)"] = Time
    ET_DF["ET Begin Time"] = Begin_Time
    ET_DF["ET Complete Time"] = Compl_Time
    ET_DF["Trop Peak Time"] = Trop_Peak_Time
    ET_DF["Peak Time"] = Peak_Time
    ET_DF["Genesis Time"] = Birth_Time
    ET_DF["Dissipation Time"] = Death_Time
    ET_DF["Year"] = Year
    return (Data_DF, ET_DF, Codes_DF)

In [5]:
def Datetime(Time):
    try:
        New_Time = datetime.datetime.strptime(Time, '%Y-%m-%d %H:%M:%S')
    except:
        New_Time = numpy.nan
    return (New_Time)

In [6]:
def Find_Year(Time):
    try:
        New_Time = datetime.datetime.strptime(Time, '%Y-%m-%d %H:%M:%S')
        Year = New_Time.year
    except:
        Year = numpy.nan
    return (Year)

In [7]:
# Create Phase Distribution Bins
def Create_Bins(Min, Max, Bin_Width):
    Bins = numpy.arange(Min, Max+Bin_Width, Bin_Width)
    return (Bins)
Lat_Bins = Create_Bins(10,60,5)
Lon_Bins = Create_Bins(-100,20,5)

In [8]:
Num_Years = numpy.array([90,93,93])

In [9]:
Control_Data, Control_ET, Control_Codes = Files_Open("Control", Diri, "SubsetB")
RCP45_Data, RCP45_ET, RCP45_Codes = Files_Open("RCP45", Diri, "SubsetB")
RCP85_Data, RCP85_ET, RCP85_Codes = Files_Open("RCP85", Diri, "SubsetB")

In [10]:
Control_Data_A, Control_ET_A, Control_Codes = Files_Open("Control", Diri, "SubsetA")
RCP45_Data_A, RCP45_ET_A, RCP45_Codes = Files_Open("RCP45", Diri, "SubsetA")
RCP85_Data_A, RCP85_ET_A, RCP85_Codes = Files_Open("RCP85", Diri, "SubsetA")

In [13]:
# Function For Applying SLP Bounds
def ET_SLP_Bounds(Control_ET, RCP45_ET, RCP85_ET, Begin):
# Define SLP Bound For ET Begin and ET Complete
    Begin_SLP_Bound = 1008
    Compl_SLP_Bound = 1008
# Apply Bounds
    if Begin == True:
        Control_ET_Begin = Control_ET[Control_ET["ET Begin SLP"] <= Begin_SLP_Bound].reset_index()
        RCP45_ET_Begin = RCP45_ET[RCP45_ET["ET Begin SLP"] <= Begin_SLP_Bound].reset_index()
        RCP85_ET_Begin = RCP85_ET[RCP85_ET["ET Begin SLP"] <= Begin_SLP_Bound].reset_index()
        return (Control_ET_Begin, RCP45_ET_Begin, RCP85_ET_Begin)
    else:
        Control_ET_Compl = Control_ET[Control_ET["ET Complete SLP"] <= Compl_SLP_Bound].reset_index()
        RCP45_ET_Compl = RCP45_ET[RCP45_ET["ET Complete SLP"] <= Compl_SLP_Bound].reset_index()
        RCP85_ET_Compl = RCP85_ET[RCP85_ET["ET Complete SLP"] <= Compl_SLP_Bound].reset_index()
        return (Control_ET_Compl, RCP45_ET_Compl, RCP85_ET_Compl)

In [110]:
# Create Subsets For Storm Phase Cumulative Distributions
def Phase_Subsets(Data):
    Subset_0 = Data[Data["SLP(hPa)"] <= 1008].reset_index()
    Subset_Trop = Subset_0[Subset_0["Storm Phase"] == "Tropical"]
    Subset_Trans = Subset_0[Subset_0["Storm Phase"] == "Transition"]
    Subset_Extra = Subset_0[Subset_0["Storm Phase"] == "Extratropical"]
    Subsets = [Subset_0, Subset_Trop, Subset_Trans, Subset_Extra]
    Subset_Labels = ["All", "Tropical", "Transitioning", "Extratropical"]
    return (Subsets, Subset_Labels)

In [60]:
# Create Subsets For Phase Space Parameter Cumulative Distributions
def Param_Subsets(Data, Subset_Var):
    Subset_0 = Data[Data["SLP(hPa)"] <= 1008].reset_index()
    if Subset_Var == "SLP(hPa)":
        Subset_1 = Subset_0
        Subset_2 = Subset_0[(Subset_0[Subset_Var] > 0) & (Subset_0[Subset_Var] <= 1000)].reset_index()
        Subset_3 = Subset_0[(Subset_0[Subset_Var] > 0) & (Subset_0[Subset_Var] <= 990)].reset_index()
        Subset_4 = Subset_0[Subset_0[Subset_Var] <= 980].reset_index()
        Subset_Labels = ["SLP ≤ 1008hPa", "SLP ≤ 1000hPa", "SLP ≤ 990hPa", "SLP ≤ 980hPa"]
    elif Subset_Var == "Lat":
        Subset_1 = Subset_0[Subset_0[Subset_Var] >= 40].reset_index()
        Subset_2 = Subset_0[(Subset_0[Subset_Var] >= 35) & (Subset_0[Subset_Var] < 40)].reset_index()
        Subset_3 = Subset_0[(Subset_0[Subset_Var] >= 30) & (Subset_0[Subset_Var] < 35)].reset_index()
        Subset_4 = Subset_0[Subset_0[Subset_Var] < 30].reset_index()
        Subset_Labels = ["Lat >= 40N", "Lat 35~40N", "Lat 30~35N", "Lat < 30N"]
    elif Subset_Var == "Month":
        Subset_1 = Subset_0[(Subset_0[Subset_Var] >= 1) & (Subset_0[Subset_Var] <= 5)].reset_index()
        Subset_2 = Subset_0[(Subset_0[Subset_Var] >= 6) & (Subset_0[Subset_Var] <= 7)].reset_index()
        Subset_3 = Subset_0[(Subset_0[Subset_Var] >= 8) & (Subset_0[Subset_Var] <= 9)].reset_index()
        Subset_4 = Subset_0[(Subset_0[Subset_Var] >= 10) & (Subset_0[Subset_Var] <= 12)].reset_index()
        Subset_Labels = ["Jan-May", "Jun-Jul", "Aug-Sep", "Oct-Dec"]
    Subsets = [Subset_1, Subset_2, Subset_3, Subset_4]
    return (Subsets, Subset_Labels)

In [93]:
# Function For Counting Annual Number of Storms
def Num_Storms(Control_ET, RCP45_ET, RCP85_ET):
    Control_Num, RCP45_Num, RCP85_Num = numpy.zeros(90), numpy.zeros(93), numpy.zeros(93)
    for i in range(90):
        Control_Num[i] = len(Control_ET[Control_ET["Year"] == i+1900])
    for j in range(93):
        RCP45_Num[j] = len(RCP45_ET[RCP45_ET["Year"] == j+2000])
        RCP85_Num[j] = len(RCP85_ET[RCP85_ET["Year"] == j+2100])
    return (Control_Num, RCP45_Num, RCP85_Num)

In [91]:
# Function For Calculating Duration
def Find_Duration(Control_ET, RCP45_ET, RCP85_ET, Type):
    Control_Durations = Duration(Control_ET, Type)
    RCP45_Durations = Duration(RCP45_ET, Type)
    RCP85_Durations = Duration(RCP85_ET, Type)
    return (Control_Durations, RCP45_Durations, RCP85_Durations)

In [92]:
# Function For Calculating Duration
def Duration(ET_DF, Type):
    Durations = numpy.zeros(len(ET_DF))
    for i in range(len(ET_DF)):
        if Type == "Trop Dur":
            Duration_hr = (ET_DF["ET Begin Time"][i] - ET_DF["Genesis Time"][i]).total_seconds() / 3600
        elif Type == "Storm Dur":
            Duration_hr = (ET_DF["ET Complete Time"][i] - ET_DF["Genesis Time"][i]).total_seconds() / 3600
        elif Type == "ET Dur":
            Duration_hr = (ET_DF["ET Complete Time"][i] - ET_DF["ET Begin Time"][i]).total_seconds() / 3600
        Durations[i] = Duration_hr
    return (Durations)

In [None]:
# Function For Calculating Storm Path Distance

In [16]:
# Calculate 25%, Median, 75% Percentiles
def Percentile(Array):
    Percent_25 = round(numpy.nanpercentile(Array, 25), 1)
    Median = round(numpy.nanmedian(Array), 1)
    Percent_75 = round(numpy.nanpercentile(Array, 75), 1)
    return ([Percent_25, Median, Percent_75])

In [17]:
# Calculate Statistical Significance Using KS Test
def KS_Test(Control_Array, RCP45_Array, RCP85_Array):
    P_Val_RCP45 = round(stats.kstest(Control_Array, RCP45_Array)[1], 3)
    P_Val_RCP85 = round(stats.kstest(Control_Array, RCP85_Array)[1], 3)
    return (P_Val_RCP45, P_Val_RCP85)

In [116]:
# Create DataFrame to Store Percentiles Data
def Percentile_DF(Var, Control_Array, RCP45_Array, RCP85_Array):
    Control_Percentiles = Percentile(Control_Array)
    RCP45_Percentiles = Percentile(RCP45_Array)
    RCP85_Percentiles = Percentile(RCP85_Array)
    P_Vals = KS_Test(Control_Array, RCP45_Array, RCP85_Array)
    Control_Percentiles.append(1.000)
    RCP45_Percentiles.append(P_Vals[0])
    RCP85_Percentiles.append(P_Vals[1])
    DF = pandas.DataFrame({"Var": [Var, Var, Var, Var], "Percentile": ["25%", "Median", "75%", "P Val"], \
    "Control": Control_Percentiles, "RCP4.5": RCP45_Percentiles, "RCP8.5": RCP85_Percentiles})
    return (DF)

In [117]:
# Create DataFrame For Output
def Create_Output_DF(Control_Data, RCP45_Data, RCP85_Data, Vars, Subset):
# Create Subsets
    if Subset == "Phase":
        Control_Subsets, Subset_Labels = Phase_Subsets(Control_Data)
        RCP45_Subsets, Subset_Labels = Phase_Subsets(RCP45_Data)
        RCP85_Subsets, Subset_Labels = Phase_Subsets(RCP85_Data)
    else:
        Control_Subsets, Subset_Labels = Param_Subsets(Control_Data, Subset)
        RCP45_Subsets, Subset_Labels = Param_Subsets(RCP45_Data, Subset)
        RCP85_Subsets, Subset_Labels = Param_Subsets(RCP85_Data, Subset)
#
# Calculate Percentiles Based on Phase
    for i in range(len(Vars)):
        for j in range(4):
            Var = Vars[i]
            Label = Subset_Labels[j]
            Var_Label = Var+" ("+Label+")"
            DF = Percentile_DF(Var_Label, Control_Subsets[j][Var], RCP45_Subsets[j][Var], RCP85_Subsets[j][Var])
            if i == 0 and j == 0:
                Output_DF = DF.copy()
            else:
                Output_DF = pandas.concat([Output_DF, DF])
    return (Output_DF)

In [118]:
Phase_Output_DF = Create_Output_DF(Control_Data, RCP45_Data, RCP85_Data, ["SLP(hPa)", "Lon", "Lat"], "Phase")
Phase_Output_DF

Unnamed: 0,Var,Percentile,Control,RCP4.5,RCP8.5
0,SLP(hPa) (All),25%,978.9,977.9,977.5
1,SLP(hPa) (All),Median,995.0,993.4,993.4
2,SLP(hPa) (All),75%,1002.4,1001.4,1001.6
3,SLP(hPa) (All),P Val,1.0,0.0,0.0
0,SLP(hPa) (Tropical),25%,977.8,977.3,976.9
1,SLP(hPa) (Tropical),Median,995.6,993.8,993.6
2,SLP(hPa) (Tropical),75%,1002.8,1001.6,1002.0
3,SLP(hPa) (Tropical),P Val,1.0,0.0,0.0
0,SLP(hPa) (Transitioning),25%,967.2,970.1,966.5
1,SLP(hPa) (Transitioning),Median,987.0,987.4,986.3


In [119]:
Param_Output_DF = Create_Output_DF(Control_Data, RCP45_Data, RCP85_Data, ["B", "VLT", "VUT"], "SLP(hPa)")
Param_Output_DF

Unnamed: 0,Var,Percentile,Control,RCP4.5,RCP8.5
0,B (SLP ≤ 1008hPa),25%,-2.8,-1.9,-1.6
1,B (SLP ≤ 1008hPa),Median,4.4,6.0,6.7
2,B (SLP ≤ 1008hPa),75%,16.2,16.9,17.6
3,B (SLP ≤ 1008hPa),P Val,1.0,0.0,0.0
0,B (SLP ≤ 1000hPa),25%,-2.3,-1.5,-0.7
1,B (SLP ≤ 1000hPa),Median,6.2,6.8,7.7
2,B (SLP ≤ 1000hPa),75%,18.4,17.9,18.9
3,B (SLP ≤ 1000hPa),P Val,1.0,0.01,0.0
0,B (SLP ≤ 990hPa),25%,-2.6,-1.5,-0.5
1,B (SLP ≤ 990hPa),Median,5.8,6.9,7.8


In [120]:
# Output DF to csv File
def Output_File(DF, File_Name):
    DF.to_csv(Output_Diri+File_Name)

In [121]:
Output_File(Phase_Output_DF, 'Phase_Subset_Table.csv')

OSError: Cannot save file into a non-existent directory: '/glade/u/home/whimkao/ExtraTrack/ExtraTrack_Github/RCP_Figs/Analysis_Figs_V6.5.2'

In [None]:
Output_File(Param_Output_DF, 'Param_Subset_Table.csv')