In [1]:
from pygeodesy import ellipsoidalVincenty as ev
import datetime

# The Main IMPORTANT collector of all organized data
def start_End_TrackingTime(data , lineData, cycloneNumber, previous_cycloneNumber, tracked_date):
    if not previous_cycloneNumber == cycloneNumber:
        data[previous_cycloneNumber]["Tracked_End_Date"] = tracked_date
        previous_cycloneNumber = cycloneNumber
        data[cycloneNumber]["Tracked_Start_Date"] = lineData[0] # save the starting date
    else:
        tracked_date = lineData[0]
    return previous_cycloneNumber, tracked_date # variables that would constantly be updated to track the starting and ending time of each storm


def year_storm_tracked(cycloneNumber, lineData):
    # setting the "Years" key to a list containing 'only' the years the storm was tracked, yet a very few storms may be tracked across 2 years, ex. 1995/12/31~1996/01/03, so set it to list and append the other year value
    data[cycloneNumber]["Years"] = data[cycloneNumber].get("Years", []) + [lineData[0][:4]] if lineData[0][:4] not in data[cycloneNumber].get("Years", []) else data[cycloneNumber].get("Years", [])


def get_maximun_sustained_wind(cycloneNumber, lineData):
    if data[cycloneNumber].get("Maximun_Sustained_Wind(in_knots)", 0) < int(lineData[6].strip()):
        data[cycloneNumber]["Maximun_Sustained_Wind(in_knots)"] = int(lineData[6].strip())
        # This makes will make some storm data unable to have the key "Tracked_Start_Date" since some storms have no data about the maxWind in knots at all, remember to check whether key exists or not first when dealing with the "When_Max_Wind_Occurred" key later on.
        data[cycloneNumber]["When_Max_Wind_Occurred"] = [lineData[0], lineData[1]]
    else:
        data[cycloneNumber]["Maximun_Sustained_Wind(in_knots)"] = data[cycloneNumber].get("Maximun_Sustained_Wind(in_knots)", 0)

        
def get_landfall(cycloneNumber, lineData):
    if lineData[2].strip() == "L":
        data[cycloneNumber]["Landfall_Numbers"] = data[cycloneNumber].get("Landfall_Numbers", 0) + 1
    else:
        # some do not cause a landfall, thus need to initialize the landfall key to 0 for futher use and to keep the data dictionary robust
        data[cycloneNumber]["Landfall_Numbers"] = data[cycloneNumber].get("Landfall_Numbers", 0)

        
def meanSpeed(data):
    for cycloneNumber in data:
        if data[cycloneNumber]["distance"]==0 or data[cycloneNumber]["Total_time(in Seconds)"]==0:
            data[cycloneNumber]["Mean_Speed(meters/seconds)"] = 0
        else:
            data[cycloneNumber]["Mean_Speed(meters/seconds)"] = data[cycloneNumber]["distance"] / data[cycloneNumber]["Total_time(in Seconds)"]
            
        
# A function to catergorize the data for easier use in the future
def dataProcess(data, file, number_of_storms):
    """Main processing function that retrives and saves the data we need:
        1. Strom Name
        2. Tracked Number
        3. Number of Landfalls
        4. Maximun Sustained Wind (In Knots)
        5. Years of the Storm Tracked
        6. Date and Time when the Maximun Sustained Wind was tracked
        7. Starting Date when each individual storm was tracked
        8. Ending Date when each individual storm was tracked
        
    :param data: The data dictionary saving all the necessary data from the source file
    :param file: The file object that is opened for reading, the file containing the original data
    :param number_of_storms: A global variable to save the total number of storms tracked in the data from the opened file
    :returns number_of_storms: An integer number representing total numbers of storms tracked from the government data
    
    """
    # create variables to keep track of whether the loop has begun searching through a new strom in order to track information such as the starting and ending date for each storm
    # used for the functions: start_End_TrackingTime, distance_run
    tracked_date = ''
    previous_cycloneNumber = 'dummyNode' # initialize or else it'll get a key error
    data[previous_cycloneNumber] = {}
    
    # tracking distance
    previous_longlat = tuple()
    distance = 0
    
    number_of_storms = 0
    
    for line in file:
        lineData = line.split(",")
        
#         if number_of_storms <= 5 and len(lineData) == 21:
#             print(lineData[5].strip()) # longtitude
#             print(lineData[4].strip()) # latittude
        
        if len(lineData) != 21:
            dataLine = 0 # tracking the number of data looped in each storm for usage of tracking its first data
            speed = 0 # speed of storm between each data
            
            number_of_storms += 1
            cycloneNumber = lineData[0]
            data[cycloneNumber] = {} # Create another dictionary within the main 'data' dictionary with each storms number as key and all other imformation we need as values
            data[cycloneNumber]["Name"] = lineData[1].strip() # clean out the spaces in the string
            data[cycloneNumber]["Tracked_Numbers"] = int(lineData[2].strip()) #clean out the spaces in the string
            
            data[cycloneNumber]["distance"] = 0 # initialize distance to 0 everytime we loop to a new storm
            data[cycloneNumber]["Total_time(in Seconds)"] = 0 # initialize time to 0 everytime we loop to a new storm
        else:
            
            dataLine += 1
            if dataLine == 1:
                previous_longlat = (lineData[4].strip(), lineData[5].strip()) # setting the first line of data as the initialize value to build on in the later loops
                previous_timeString = lineData[0][:4] + ":" + lineData[0][4:6] + ":" + lineData[0][6:] + ":" + lineData[1].strip()[:2] + ":" + lineData[1].strip()[:2] # time string to work with python datetime module # tracking the time of each strom data
            else:
                current_timeString = lineData[0][:4] + ":" + lineData[0][4:6] + ":" + lineData[0][6:] + ":" + lineData[1].strip()[:2] + ":" + lineData[1].strip()[:2] # time string to work with python datetime module
                current_longlat = (lineData[4].strip(), lineData[5].strip()) # tracking each latitude and longtitude
                
                if current_longlat == previous_longlat or previous_timeString == current_timeString:
                    data[cycloneNumber]["Max_Speed(meters/seconds)"] = 0
                else:
                    # do calculations
                    #a = ev.LatLon('0.0N', '0.0W')
                    #b = ev.LatLon('1.0N', '0.0W')
                    #a.distanceTo3(b)  This is moving straight NORTH, 0 degrees --> (110574.38855804392, 0.0, 0.0)
                    pre = ev.LatLon(previous_longlat[0], previous_longlat[1])
                    cur = ev.LatLon(current_longlat[0], current_longlat[1])
                    distance = pre.distanceTo3(cur)[0] # distance between the current storm and the previous storm
                    data[cycloneNumber]["distance"] += distance # Total distance

                    #current_time = lineData[1].strip() # the time string
                    # using datetime.datetime objects to count the time difference
                    curtime = datetime.datetime.strptime(current_timeString, "%Y:%m:%d:%H:%M")
                    pretime = datetime.datetime.strptime(previous_timeString, "%Y:%m:%d:%H:%M")
                    timeDiff = (curtime - pretime).total_seconds() # time difference in seconds

                    data[cycloneNumber]["Total_time(in Seconds)"] += timeDiff# the Total of the time

                    speed = distance / timeDiff # the Speed.

                    data[cycloneNumber]["Max_Speed(meters/seconds)"] = data[cycloneNumber].get("Max_Speed(meters/seconds)", 0) if data[cycloneNumber].get("Max_Speed(meters/seconds)", 0) > speed else speed

                    previous_longlat = current_longlat # hold onto the current longlat value for the calculations of next line of data
                    previous_timeString = current_timeString # time strings update for next use
                
            # Saving the values of the previous storm number in the file object and its tracked date
            previous_cycloneNumber, tracked_date =\
            start_End_TrackingTime(data, lineData, cycloneNumber, previous_cycloneNumber, tracked_date)
            

                
            # Function to save the "year" or "years" the storm was tracked
            year_storm_tracked(cycloneNumber, lineData)
            
            # Function to save the maximun sustained wind (in Knots) for the strom
            get_maximun_sustained_wind(cycloneNumber, lineData)
            
            # Function to save the Landfall numbers for the storm
            get_landfall(cycloneNumber, lineData)
            
        
    
    
    
    del data["dummyNode"]  # delete the dummyKey after all data have been looped through in order to keep the robustness of the data ditionary and prevent anyfuture errors that it may cause
    data[previous_cycloneNumber]["Tracked_End_Date"] = tracked_date # gives the very last storm its max wind tracked date and time so it wouldn't cause an error later in the printAllNeededData() function.
    
    meanSpeed(data) # calculate the mean speed and put save those information to the data dictionary
    
    return number_of_storms


def printAllNeededData(data):
    """print out the info data we need:
    
        1. Storm Name
        2. Date Range Recorded for the Storm
        3. Maximun Sustained Wind (in Knots)
        4. How many Times the Strom had a Landfall
    
        after the needed information is written into the "data" dictionary
        
    :param data: The data dictionary that holds necessary values
    :returns: None
    """
    for cycloneNumber in data:
        print("Storm System Name: " + data[cycloneNumber]["Name"])
        print("Date Range Recorded for the Storm: " + data[cycloneNumber]["Tracked_Start_Date"][0:4] + '/' + data[cycloneNumber]["Tracked_Start_Date"][4:6] + '/' + data[cycloneNumber]["Tracked_Start_Date"][6:] +" ~ " + data[cycloneNumber]["Tracked_End_Date"][0:4] + '/' + data[cycloneNumber]["Tracked_End_Date"][4:6] + '/' + data[cycloneNumber]["Tracked_End_Date"][6:])
        print("Maximun_Sustained_Wind(in_knots): " + str(data[cycloneNumber]["Maximun_Sustained_Wind(in_knots)"]))
        if "When_Max_Wind_Occurred" not in data[cycloneNumber]:
            print("Storm Missing the Maximum_Sustained_Wind data. No Time and Date of such data!")
            #print("    Date & Time of Occurence ---> Date: %s , Time: %s" % (data[cycloneNumber]["When_Max_Wind_Occurred"][0], data[cycloneNumber]["When_Max_Wind_Occurred"][1] ))
        else:
            print("    Date & Time of Occurence ---> Date: %s , Time: %s" % (data[cycloneNumber]["When_Max_Wind_Occurred"][0], data[cycloneNumber]["When_Max_Wind_Occurred"][1] ))
        print("How many times it had a 'Landfall': " + str(data[cycloneNumber]["Landfall_Numbers"]))
        print("==============================================")

        
def printTotalStromsTracked(number_of_storms):
    """Prints Number of Total Storms Tracked
    
    :param number_of_storms: An integer number of the total storms tracked, an output from the dataProcess function
    :returns: None
    """
    print(" ===== Number of Total Storm Tracked: %d =====" % number_of_storms) 

    
def countStorms_HurricanesPerYear(hurricanes_per_year, storms_per_year):
    """Loops through the data dictionary created previously and determine the type of strom, 
    based on whether the "Maximun_Sustained_Wind value is larger than 64 or not. Then update
    the number of the storm type happenings within the two dictionary passed in as arguments.
    
    :param hurricanes_per_year: The dictionary saving number of hurricanes documented in different years
    :param storms_per_year: The dictionary saving number of storms documented in different years
    :returns: None
    """
    for hurnum in data:
        if int(data[hurnum]["Maximun_Sustained_Wind(in_knots)"]) >= 64:
            for years in set(data[hurnum]["Years"]): 
                # just in case some storms span across a year, using set makes each year unique with just one value of itself
                hurricanes_per_year[years] = hurricanes_per_year.get(years, 0) + 1
        else:
            for years in set(data[hurnum]["Years"]):
                storms_per_year[years] = storms_per_year.get(years, 0) + 1

                
def printNumbersInYears(storm_or_hurricane):
    yearList = sorted(list(storm_or_hurricane.keys()))
    for years in yearList:
        print(years + " : " + str(storm_or_hurricane[years]))
    

    
# reading in the file data and perform dataProcess on the file object to extract the data we need.
with open("hurdat2-1851-2016-041117.txt", "r", encoding = "UTF-8") as hur1data:
    data = {}

    #call the data processing funtion to sort out the information we need into the dictionary data structure
    number_of_storms = dataProcess(data, hur1data, 0)
    
    # print out the informations for each storm needed
    #printAllNeededData(data)

# reading in the second file data
with open("hurdat2-nepac-1949-2016-041317.txt", "r") as hur2data:
    
    dataProcess(data, hur2data, number_of_storms)
    
    #printAllNeededData(data)
    #file2 closed
    
printTotalStromsTracked(number_of_storms)
 
hurricanes_per_year = {}
storms_per_year = {}

countStorms_HurricanesPerYear(hurricanes_per_year, storms_per_year)

# print("Hurricnaes per year:\n=====================")
# printNumbersInYears(hurricanes_per_year)
# print("=====================\n\n\n\n\n\n")
# print("Storms per year:\n=====================")
# printNumbersInYears(storms_per_year)


 ===== Number of Total Storm Tracked: 1830 =====


In [2]:
print(data)

{'AL151949': {'Total_time(in Seconds)': 388080.0, 'Maximun_Sustained_Wind(in_knots)': 45, 'Landfall_Numbers': 0, 'Tracked_Numbers': 19, 'Mean_Speed(meters/seconds)': 11.447037099687023, 'Tracked_End_Date': '19491106', 'Tracked_Start_Date': '19491101', 'Max_Speed(meters/seconds)': 29.754385622368265, 'Years': ['1949'], 'Name': 'UNNAMED', 'When_Max_Wind_Occurred': ['19491102', ' 1200'], 'distance': 4442366.15764654}, 'AL051916': {'Total_time(in Seconds)': 194760.0, 'Maximun_Sustained_Wind(in_knots)': 50, 'Landfall_Numbers': 0, 'Tracked_Numbers': 10, 'Mean_Speed(meters/seconds)': 2.9537365923329406, 'Tracked_End_Date': '19160806', 'Tracked_Start_Date': '19160804', 'Max_Speed(meters/seconds)': 4.702747396365529, 'Years': ['1916'], 'Name': 'UNNAMED', 'When_Max_Wind_Occurred': ['19160806', ' 0000'], 'distance': 575269.7387227635}, 'AL061978': {'Total_time(in Seconds)': 388080.0, 'Maximun_Sustained_Wind(in_knots)': 80, 'Landfall_Numbers': 0, 'Tracked_Numbers': 19, 'Mean_Speed(meters/seconds)'