In [None]:
# main_script.py

# Import necessary functions and libraries
from initial_setup import load_data, initial_preview
from data_cleaning import initial_cleaning
from column_cleaning import (clean_fatal_column, clean_sex_column, clean_type_column, clean_time_column, clean_age_column)
from feature_engineering import (apply_date_reformatting, apply_seasonality, handle_activity_location_na)
from visualizations import (plot_seasonality_pie_chart, plot_type_distribution_bar)

# Section 1: Initial Setup
url = 'https://www.sharkattackfile.net/spreadsheets/GSAF5.xls'
df = load_data(url)

# Uncomment to get an initial preview of the raw DataFrame
# initial_preview(df)

# Section 2: DataFrame Cleaning Preparation
df = initial_cleaning(df)
print("Columns after initial cleaning:", df.columns.tolist())

# Creating a copy of the cleaned DataFrame for further manipulation
df_copy = df.copy()

# Section 3: Cleaning Specific Columns
df_copy = clean_fatal_column(df_copy)
df_copy = clean_sex_column(df_copy)
df_copy = clean_type_column(df_copy)
df_copy = clean_time_column(df_copy)
df_copy = clean_age_column(df_copy)

# Section 4: Reformatting Dates and Adding Seasonality Information
df_copy = apply_date_reformatting(df_copy)
print("Date reformatting - value counts:\n", df_copy['date'].value_counts(dropna=False))

df_copy = apply_seasonality(df_copy)
valid_seasons_df = df_copy[df_copy['seasonality'] != "Unknown"]
season_counts = valid_seasons_df['seasonality'].value_counts()
total_count = len(valid_seasons_df)
season_percentage = (season_counts / total_count) * 100
print("Seasonality percentage:\n", season_percentage)

# Section 5: Handling NaNs in 'activity' and 'location'
df_copy = handle_activity_location_na(df_copy)
print("Activity column - value counts:\n", df_copy['activity'].value_counts(dropna=False))

# Final Output: Updated DataFrame
print("Updated DataFrame:\n", df_copy.head())

# Section 6: Visualizations
deaths_by_year = df_copy[df_copy['fatal'] == 'y'].groupby('year').size()

# Call visualization functions
plot_seasonality_pie_chart(season_counts)
plot_type_distribution_bar(df_copy)