In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Cargar el DataFrame desde la URL
url = 'https://www.sharkattackfile.net/spreadsheets/GSAF5.xls'
df = pd.read_excel(url)

# Función para eliminar columnas innecesarias
def delete_columns(df):
    df_dropped_multiple = df.drop(['Year', 'Type', 'Location', 'Name', 'Sex', 'Age', 'Injury',
                                   'Unnamed: 11', 'Time', 'Species ', 'Source', 'pdf', 'href formula',
                                   'href', 'Case Number', 'Case Number.1', 'original order',
                                   'Unnamed: 21', 'Unnamed: 22'], axis=1)
    return df_dropped_multiple

# Limpiar el DataFrame
df = delete_columns(df)

# Función para limpiar los datos
def clean_data(df):
    df.columns = df.columns.str.lower().str.replace(' ', '_')
    df.drop_duplicates(inplace=True)
    df = df.dropna()
    return df

df= clean_data(df)

# Función para remover el prefijo "Reported " de las fechas
def remove_prefix(date):
    if isinstance(date, str) and date.startswith("Reported "):
        return date[len("Reported "):]
    return date

# Función para corregir el formato de las fechas
def fix_format_date(date):
    if isinstance(date, str):
        try:
            correct_date_format = pd.to_datetime(date, format='%d-%b-%Y', errors='coerce')
            return correct_date_format.strftime('%d-%m-%Y') if correct_date_format else None
        except ValueError:
            return None
    return None

# Aplicar las funciones de limpieza de fechas
df['date'] = df['date'].apply(remove_prefix)
df['date'] = df['date'].apply(fix_format_date)

def filter_date(df):
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df_filtered = df[df['date'] >= pd.to_datetime('2020-01-01')]
    return df_filtered

df = filter_date(df)
print(df)





          date       country                  state  \
8   2024-09-01     AUSTRALIA        South Australia   
9   2024-05-01  SOUTH AFRICA  Eastern Cape Province   
74  2023-07-06       BAHAMAS               Freeport   
84  2023-11-05           USA                Florida   
85  2023-11-05           USA                 Hawaii   
..         ...           ...                    ...   
415 2020-05-02           USA                   Maui   
421 2020-12-01      THAILAND     Phang Nga Province   
422 2020-07-01     AUSTRALIA             Queensland   
423 2020-05-01     AUSTRALIA      Western Australia   
425 2020-02-01     AUSTRALIA      Western Australia   

                                 activity  
8                                 Surfing  
9                                 Fishing  
74                           Scuba diving  
84                                Sitting  
85                          Kayak fishing  
..                                    ...  
415              Stand-Up Paddl