In [1]:
# The Main IMPORTANT collector of all organized data
data = {}

# A function to catergorize the data for easier use in the future
def dataProcess(data, file, number_of_storms):
    """Main processing function that retrives and saves the data we need:
        1. Strom Name
        2. Tracked Number
        3. Number of Landfalls
        4. Maximun Sustained Wind (In Knots)
        5. Years of the Storm Tracked
        6. Date and Time when the Maximun Sustained Wind was tracked
        7. Starting Date when each individual storm was tracked
        8. Ending Date when each individual storm was tracked
        
    :param data: The data dictionary saving all the necessary data from the source file
    :param file: The file object that is opened for reading, the file containing the original data
    :param number_of_storms: A global variable to save the total number of storms tracked in the data from the opened file
    :returns number_of_storms: An integer number representing total numbers of storms tracked from the government data
    
    """
    
    number_of_storms = 0
    
    # create variables to keep track of whether the loop has begun searching through a new strom in order to track information such as the starting and ending date for each storm
    tracked_date = ''
    previous_cycloneNumber = 'dummyNode' # initialize or else it'll get a key error
    data[previous_cycloneNumber] = {}
    
    
    lastHurNum = "" # to know the last storm number for the ending tracked time, its an edge case!
    for line in file:
        lineData = line.split(",")
        
        if len(lineData) != 21:
            number_of_storms += 1
            cycloneNumber = lineData[0]
            data[cycloneNumber] = {}
            data[cycloneNumber]["Name"] = lineData[1].strip() # clean out the spaces in the string
            current_storm = data[cycloneNumber]["Name"]
            data[cycloneNumber]["Tracked_Numbers"] = int(lineData[2].strip()) #clean out the spaces in the string
        else:
            # loop reached a new storm, this is where we keep track of each storms start and end observed date
            if not previous_cycloneNumber == cycloneNumber:
                data[previous_cycloneNumber]["Tracked_End_Date"] = tracked_date
                lastHurNum = previous_cycloneNumber = cycloneNumber
                data[cycloneNumber]["Tracked_Start_Date"] = lineData[0] # save the starting date
                
            else:
                tracked_date = lineData[0]
                
            # setting the "Years" key to a list containing 'only' the years the storm was tracked, yet a very few storms may be tracked across 2 years, ex. 1995/12/31~1996/01/03, so set it to list and append the other year value
            data[cycloneNumber]["Years"] = data[cycloneNumber].get("Years", []) + [lineData[0][:4]] if lineData[0][:4] not in data[cycloneNumber].get("Years", []) else data[cycloneNumber].get("Years", [])
            # needa track its tracked date and time ...
            
            if data[cycloneNumber].get("Maximun_Sustained_Wind(in_knots)", 0) > int(lineData[6].strip()):
                data[cycloneNumber]["Maximun_Sustained_Wind(in_knots)"] = data[cycloneNumber].get("Maximun_Sustained_Wind(in_knots)", 0)
            else:
                data[cycloneNumber]["Maximun_Sustained_Wind(in_knots)"] = int(lineData[6].strip())
                # This makes will make some storm data unable to have the key "Tracked_Start_Date" since some storms have no data about the maxWind in knots at all, remember to check whether key exists or not first when dealing with the "When_Max_Wind_Occurred" key later on.
                data[cycloneNumber]["When_Max_Wind_Occurred"] = [lineData[0], lineData[1]]
            
            if lineData[2].strip() == "L":
                data[cycloneNumber]["Landfall_Numbers"] = data[cycloneNumber].get("Landfall_Numbers", 0) + 1
            else:
                # some do not cause a landfall, thus need to initialize the landfall key to 0 for futher use and to keep the data dictionary robust
                data[cycloneNumber]["Landfall_Numbers"] = data[cycloneNumber].get("Landfall_Numbers", 0)
                
    del data["dummyNode"]  # delete the dummyKey after all data have been looped through in order to keep the robustness of the data ditionary and prevent anyfuture errors that it may cause
    data[lastHurNum]["Tracked_End_Date"] = tracked_date # gives the very last storm its max wind tracked date and time so it wouldn't cause an error later in the printAllNeededData() function.
    
    return number_of_storms


# reading in the file data and perform dataProcess on the file object to extract the data we need.
with open("hurdat2-1851-2016-041117.txt", "r", encoding = "UTF-8") as hur1data:
    # first method, using O(n) space to store and solve
    # creating a dictionary data structure to organize the data, saving the data we need and for easier usage for future data queries
    # using storm system cyclone number series as key, its value would be another dictionary with its corresponding key-value in it
    
    #call the data processing funtion to sort out the information we need into the dictionary data structure
    number_of_storms = dataProcess(data, hur1data, 0)
    #file1 closed

# reading in the second file data
with open("hurdat2-nepac-1949-2016-041317.txt", "r") as hur2data:
    dataProcess(data, hur2data, number_of_storms)
    #file2 closed
    

hurricanes_per_year = {}
storms_per_year = {}

def printAllNeededData(data):
    """print out the info data we need:
    
        1. Storm Name
        2. Date Range Recorded for the Storm
        3. Maximun Sustained Wind (in Knots)
        4. How many Times the Strom had a Landfall
    
        after the needed information is written into the "data" dictionary
        
    :param data: The data dictionary that holds necessary values
    :returns: None
    """
    for hur in data:
        print("Storm System Name: " + data[hur]["Name"])
        print("Date Range Recorded for the Storm: " + data[hur]["Tracked_Start_Date"][0:4] + '/' + data[hur]["Tracked_Start_Date"][4:6] + '/' + data[hur]["Tracked_Start_Date"][6:] +" ~ " + data[hur]["Tracked_End_Date"][0:4] + '/' + data[hur]["Tracked_End_Date"][4:6] + '/' + data[hur]["Tracked_End_Date"][6:])
        print("Maximun_Sustained_Wind(in_knots): " + str(data[hur]["Maximun_Sustained_Wind(in_knots)"]))
        if "When_Max_Wind_Occurred" not in data[hur].keys():
            print("Storm Missing the Maximum_Sustained_Wind data. No Time and Date of such data!")
        else:
            print("    Date & Time of Occurence ---> Date: %s , Time: %s" % (data[hur]["When_Max_Wind_Occurred"][0], data[hur]["When_Max_Wind_Occurred"][1] ))
        print("How many times it had a 'Landfall': " + str(data[hur]["Landfall_Numbers"]))
        print("==============================================")

# printAllNeededData(data)

def printTotalStromsTracked(number_of_storms):
    """Prints Number of Total Storms Tracked
    
    :param number_of_storms: An integer number of the total storms tracked, an output from the dataProcess function
    :returns: None
    """
    print("Number of Total Storm Tracked: %d " % number_of_storms)

# printTotalStromsTracked(number_of_storms)


def countStorms_HurricanesPerYear(hurricanes_per_year, storms_per_year):
    """Loops through the data dictionary created previously and determine the type of strom, 
    based on whether the "Maximun_Sustained_Wind value is larger than 64 or not. Then update
    the number of the storm type happenings within the two dictionary passed in as arguments.
    
    :param hurricanes_per_year: The dictionary saving number of hurricanes documented in different years
    :param storms_per_year: The dictionary saving number of storms documented in different years
    :returns: None
    """
    for hurnum in data:
        if int(data[hurnum]["Maximun_Sustained_Wind(in_knots)"]) >= 64:
            for years in set(data[hurnum]["Years"]): 
                # just in case some storms span across a year, using set makes each year unique with just one value of itself
                hurricanes_per_year[years] = hurricanes_per_year.get(years, 0) + 1
        else:
            for years in set(data[hurnum]["Years"]):
                storms_per_year[years] = storms_per_year.get(years, 0) + 1



def printNumbersInYears(storm_or_hurricane):
    yearList = sorted(list(storm_or_hurricane.keys()))
    for years in yearList:
        print(years + " : " + str(storm_or_hurricane[years]))

countStorms_HurricanesPerYear(hurricanes_per_year, storms_per_year)

# print("Hurricnaes per year:\n=====================")
# printNumbersInYears(hurricanes_per_year)
# print("=====================\n\n\n\n\n\n")
# print("Storms per year:\n=====================")
# printNumbersInYears(storms_per_year)