# This notebook gives a filtered data set for every tropical cyclone in the area. This does not count the number of cyclones/storms, but rather the overall data. 

This is good for an overall picture. It says this is how many times our area has been influenced by a tropical storm. It could be misleading because one cyclone could have many rows showing TS or HU, when it is actually one storm being recorded every six hours. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
class DataReader:
    def __init__(self, file_path):
        self.file_path = file_path
        self.all_data = None

    def read_data(self):
        """Reads the cyclone data from a file and initializes the all_data attribute."""
        cyclone_data = []
        current_cyclone = None
        """Taking the headers out and storing them. Always starts with AL if using Atlantic Basin"""
        with open(self.file_path, 'r') as file:
            for line in file:
                if line.startswith('AL'):
                    if current_cyclone is not None:
                        cyclone_data.append(current_cyclone)
                    current_cyclone = {'header': line.strip(), 'data': []}
                else:
                    if current_cyclone is not None:
                        current_cyclone['data'].append(line.strip().split(','))
        if current_cyclone is not None:
            cyclone_data.append(current_cyclone)

        all_cyclone_dfs = []
        """I need to turn this into a dataframe and then add in columns. Columns are based on NHC data ref sheet"""
        for cyclone in cyclone_data:
            df = pd.DataFrame(cyclone['data'], columns=['Date', 'Time', 'Record', 'Status', 'Latitude', 'Longitude', 'WindSpeed', 'Pressure',
                                                        'Rad_34_NE', 'Rad_34_SE', 'Rad_34_SW', 'Rad_34_NW', 'Rad_50_NE', 'Rad_50_SE',
                                                        'Rad_50_SW', 'Rad_50_NW', 'Rad_64_NE', 'Rad_64_SE', 'Rad_64_SW', 'Rad_64_NW', 'Rad_maxwnd'])
            
            """I might not have needed to do all of this next part. I am defining each column type. 
                Time block is necessary to make plotting easier later.
                Lat/lon function below referenced here. It takes the letter out allowing for filter later"""
            
            df['Date'] = df['Date'].astype(str)
            df['Time'] = df['Time'].astype(str)
            df['Latitude'] = df['Latitude'].apply(self.convert_lat_lon)
            df['Longitude'] = df['Longitude'].apply(self.convert_lat_lon)
            df['WindSpeed'] = df['WindSpeed'].astype(int)
            df['Pressure'] = df['Pressure'].astype(int)
            df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%Y%m%d %H%M')
            df.set_index('Datetime', inplace=True)
            df['Cyclone'] = cyclone['header']
            all_cyclone_dfs.append(df)
        self.all_data = pd.concat(all_cyclone_dfs)

    """The static method means we dont need to input a value to use it. In this case it is used above with 
        the given data."""
    @staticmethod
    def convert_lat_lon(value):
        """Converts latitude and longitude to float values."""
        if 'N' in value or 'E' in value:
            return float(value[:-1])
        elif 'S' in value or 'W' in value:
            return -float(value[:-1])

In [21]:
class DataFilter:
    def __init__(self, all_data):
        self.all_data = all_data
        self.filtered_data = None

    def filter_data(self, min_lat, max_lat, min_lon, max_lon):
        """Applies latitude and longitude filters and initializes the filtered_data attribute."""
        self.filtered_data = self.all_data[(self.all_data['Latitude'] >= min_lat) & (self.all_data['Latitude'] <= max_lat) &
                                           (self.all_data['Longitude'] >= min_lon) & (self.all_data['Longitude'] <= max_lon)]
        self.filtered_data['Month'] = self.filtered_data.index.month

In [22]:
class CycloneCalculator:
    def __init__(self, filtered_data):
        self.filtered_data = filtered_data
        self.cyclones_per_year = None

    def calculate_cyclones_per_year(self):
        """Groups data by year and counts unique cyclones, initializes the cyclones_per_year attribute."""
        self.cyclones_per_year = self.filtered_data.groupby('Year')['Cyclone'].nunique()
        all_years = pd.Series(0, index=np.arange(self.filtered_data['Year'].min(), self.filtered_data['Year'].max() + 1))
        self.cyclones_per_year = self.cyclones_per_year.reindex(all_years.index, fill_value=0)
        
    def get_cyclones_per_year(self):
        return self.cyclones_per_year


In [20]:
def main():
    file_path = '../data/hurdat2_1851_2023.txt'
    min_lat = 27.5
    max_lat = 29.4
    min_lon = -81.5
    max_lon = -78.8

    # Reading and converting data
    reader = DataReader(file_path)
    reader.read_data()
    
    # Filtering data
    filterer = DataFilter(reader.all_data)
    filterer.filter_data(min_lat, max_lat, min_lon, max_lon)
    
    # Calculating cyclones per year
    calculator_filtered = CycloneCalculator(filterer.filtered_data)
    calculator_filtered.calculate_cyclones_per_year()
    
        # Calculate cyclones per year
    calculator_filtered.calculate_cyclones_per_year()

    # View the cyclones_per_year attribute
    cyclones_per_year = calculator_filtered.get_cyclones_per_year()
    print(cyclones_per_year)
    # Calculating trendline and plotting data for filtered data
    #plotter_filtered = FilteredTrendlinePlot(calculator_filtered.cyclones_per_year)
    #plotter_filtered.calculate_trendline()
    #plotter_filtered.plot_cyclone_data()

    # Calculating cyclones per year and plotting for unfiltered data
    #plotter_unfiltered = NoFilterTrendlinePlot(reader.all_data)
    #plotter_unfiltered.calculate_cyclones_per_year()
    #plotter_unfiltered.calculate_trendline()
    #plotter_unfiltered.plot_cyclone_data()

    # Status counts for storms
    #status_counts = calculator_filtered.filtered_data['Status'].value_counts()

    # Plot the bar chart with labels
    #BarChartStatusCountNoFilter.plot_with_labels(status_counts)

# Run the main function
main()

1852    1
1853    1
1854    1
1855    0
1856    0
       ..
2018    0
2019    2
2020    1
2021    0
2022    3
Name: Cyclone, Length: 171, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.filtered_data['Year'] = self.filtered_data.index.year


In [16]:
CycloneCalculator(filtered_data)

NameError: name 'filtered_data' is not defined