# Added a filter to group data with associated cyclones.

We are able to retrieve the amount of cyclones in the area I specified. 132 recorded cyclones since 1851. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# Define the file path
file_path = '../data/hurdat2_1851_2023.txt'

# Initialize variables
cyclone_data = []
current_cyclone = None

# Function to convert latitude and longitude to float
def convert_lat_lon(value):
    if 'N' in value or 'E' in value:
        return float(value[:-1])
    elif 'S' in value or 'W' in value:
        return -float(value[:-1])

# Read the file line by line
with open(file_path, 'r') as file:
    for line in file:
        # Check if the line starts with 'AL' indicating a new cyclone
        if line.startswith('AL'):
            if current_cyclone is not None:
                cyclone_data.append(current_cyclone)
            current_cyclone = {'header': line.strip(), 'data': []}
        else:
            if current_cyclone is not None:
                current_cyclone['data'].append(line.strip().split(','))

# Append the last cyclone
if current_cyclone is not None:
    cyclone_data.append(current_cyclone)

# Process each cyclone's data
all_cyclone_dfs = []
for cyclone in cyclone_data:
    df = pd.DataFrame(cyclone['data'], columns=['Date', 'Time', 'Record', 'Status', 'Latitude', 'Longitude', 'WindSpeed', 'Pressure',
                                                'Rad_34_NE', 'Rad_34_SE', 'Rad_34_SW', 'Rad_34_NW', 'Rad_50_NE', 'Rad_50_SE',
                                                'Rad_50_SW', 'Rad_50_NW', 'Rad_64_NE', 'Rad_64_SE', 'Rad_64_SW', 'Rad_64_NW', 'maxwnd'])
    # Convert data types where necessary
    df['Date'] = df['Date'].astype(str)
    df['Time'] = df['Time'].astype(str)
    df['Latitude'] = df['Latitude'].apply(convert_lat_lon)
    df['Longitude'] = df['Longitude'].apply(convert_lat_lon)
    df['WindSpeed'] = df['WindSpeed'].astype(int)
    df['Pressure'] = df['Pressure'].astype(int)
    df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%Y%m%d %H%M')
    df.set_index('Datetime', inplace=True)
    df['Cyclone'] = cyclone['header']  # Add a column for the cyclone identifier
    all_cyclone_dfs.append(df)

# Concatenate all cyclone dataframes into a single dataframe
all_data = pd.concat(all_cyclone_dfs)

# Move the Cyclone column to the first position
columns = ['Cyclone'] + [col for col in all_data.columns if col != 'Cyclone']
all_data = all_data[columns]

new_thing = all_data['Status'].value_counts()
print(new_thing)

# Define the latitude and longitude range for filtering
min_lat, max_lat = 27.5, 29.4
min_lon, max_lon = -81.5, -78.8

# Filter the data based on the specified latitude and longitude range
filtered_data = all_data[(all_data['Latitude'] >= min_lat) & (all_data['Latitude'] <= max_lat) &
                         (all_data['Longitude'] >= min_lon) & (all_data['Longitude'] <= max_lon)]

# Perform value counts on the 'Status' column within the filtered data
status_counts = filtered_data['Status'].value_counts()
print(status_counts)




Status
 TS    20089
 HU    15517
 TD     9872
 EX     6129
 LO     1686
 SS      715
 SD      326
 DB      277
 WV      138
Name: count, dtype: int64
Status
 TS    127
 HU     67
 TD     57
 SD      7
 LO      6
 EX      4
 SS      3
Name: count, dtype: int64


In [None]:
# Plot the value counts
status_counts.plot(kind='bar', figsize=(10, 6))
plt.title('Counts of Different Storm Statuses within Specified Lat/Lon Range')
plt.xlabel('Storm Status')
plt.ylabel('Count')
plt.show()

# Optionally, if you want to count the number of unique cyclones in the filtered data
unique_cyclones = filtered_data['Cyclone'].nunique()
print(f"Number of unique cyclones in the filtered data: {unique_cyclones}")

In [18]:
filtered_data.head(5)

Unnamed: 0_level_0,Cyclone,Date,Time,Record,Status,Latitude,Longitude,WindSpeed,Pressure,Rad_34_NE,...,Rad_34_NW,Rad_50_NE,Rad_50_SE,Rad_50_SW,Rad_50_NW,Rad_64_NE,Rad_64_SE,Rad_64_SW,Rad_64_NW,maxwnd
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1852-09-12 12:00:00,"AL031852, UNNAMED, 20,",18520912,1200,,TS,28.2,-81.5,50,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1852-09-12 18:00:00,"AL031852, UNNAMED, 20,",18520912,1800,,TS,28.5,-80.5,50,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1852-09-13 00:00:00,"AL031852, UNNAMED, 20,",18520913,0,,TS,29.2,-79.0,60,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1853-10-19 06:00:00,"AL081853, UNNAMED, 14,",18531019,600,,HU,28.0,-78.9,70,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1853-10-19 12:00:00,"AL081853, UNNAMED, 14,",18531019,1200,,HU,28.5,-79.3,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
