In [1]:
import pandas as pd

## Question:
How does human coastal development and fishing activity influence the behavior of sharks, and is there a correlation between increased human presence in shark habitats and the frequency of shark attacks?

## Hypothesis:
We hypothesize that as human invasion and fishing activities intensify in shark habitats, there will be observable changes in shark behavior, potentially leading to an increased frequency of shark attacks. This hypothesis is based on the assumption that alterations in the sharks' natural environment and food sources due to human activities may trigger defensive or predatory behaviors, contributing to a higher incidence of interactions between sharks and humans.

In [2]:
url="Data_sharks.xlsx"
shark=pd.read_excel(url,usecols=['Date','Year','Type','Country','State','Activity']).squeeze('columns')

In [3]:
#General cleaning data base
shark.columns=shark.columns.str.lower()
shark.dropna(thresh=6,inplace=True)

In [4]:
#for the year depending on what we would like to select it could be only >=1800
shark=shark[(shark['year']!=0) | (shark['year']>=1800)]


In [5]:
#normalizing by activity
activity_cleaning_dict={'bathing':'water activity','swim':'water activity','surf':'water activity',
                        'boarding':'water activity','fishing':'fishing','catching':'fishing','collecting':'work activity',
                        'crabbing':'fishing','dived':'underwater activity','diving':'underwater activity','dragging':'work activity',
                        'feeding':'work activity','fell':'accident','filming':'work activity','floating':'water activity','freediving':'underwater activity',
                        'beach':'water activity','hunting':'fishing','jumped':'water activity','kayaking':'water activity',
                        'kite':'water activity','paddling':'water activity','playing':'water activity','pulling':'work activity',
                        'removing':'work activity','rescuing':'work activity','watching':'underwater activity','siking':'work activity',
                        'sitting':'water activity','skindiving':'underwater activity','smorkeling':'underwater activity','spearfishing':'fishing',
                        'splashing':'water activity','surfing':'water activity','swimming':'water activity','treading':'work activity',
                        'attempting':'work activity','boat':'work activity','cleaning':'work activity','holding':'work activity',
                        'jumping':'water activity','killing':'work activity','lifeguard':'work activity','lifesaving':'work activity',
                        'lying':'water activity','sailing':'water activity','spearing':'underwater activity','standing':'water activity','thouching':'underwater activity',
                        'washing':'water activity','working':'work activity','wreck':'work activity','canoeing':'water activity','clamming':'work activity','dangling feet in the water':'water activity',
                        'paddleskiing':'water activity','sculling':'water activity','snorkeling':'underwater activity','wading':'fishing','watercraft':'water activity','walking':'water activity','air disaster':'accident',
                        'knocked overboard':'accident','sea disaster':'accident','rowing':'water activity','seine netting':'fishing','tagging sharks':'work activity','sup':'water activity'            
}

def cleaning_activity(activity,my_dict):
    for key, value in my_dict.items():
        if key in activity:
            return value
    return activity 

shark['activity']= shark['activity'].str.lower().str.strip().apply(cleaning_activity,my_dict=activity_cleaning_dict)

#filtering by activity>2 - there is something I'm missing

activity_counts = shark['activity'].value_counts()  # Calculate the counts of each activity

# Filter 'shark' DataFrame based on the counts of each activity being greater than 3 to simplify activity normalization
shark = shark[shark['activity'].isin(activity_counts[activity_counts > 3].index)]


In [11]:
#Normalizacion Counstries
shark['country'] = (shark['country'].str.upper().str.replace(' ','_').str.replace('?','').str.replace('&','AND').str.replace('/','AND'))
country_dict={'MEXICO_':'MEXICO','ST_KITTS_AND_NEVIS':'ST_KITTS', 'ST._MARTIN':'ST_MAARTIN',
               'ST_MARTIN': 'ST_MAARTIN','ST._MAARTIN':'ST_MAARTIN'}
shark['country'] = shark['country'].replace(country_dict)
#Filter Countries
countries_to_keep = ['USA','AUSTRALIA','SOUTH_AFRICA']
shark= shark[shark['country'].isin(countries_to_keep)]

In [24]:
pd.pivot_table(shark,index=['country'],columns='activity',values='type',aggfunc='count')

activity,accident,fishing,underwater activity,water activity,work activity
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUSTRALIA,23.0,297.0,184.0,709.0,43.0
SOUTH_AFRICA,,142.0,38.0,310.0,31.0
USA,15.0,460.0,163.0,1563.0,65.0


In [25]:
pd.pivot_table(shark,index=['state'],columns='activity',values='year',aggfunc='count')

activity,accident,fishing,underwater activity,water activity,work activity
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alabama,1.0,4.0,,10.0,1.0
BAHAMAS,,1.0,,,
Bahamas,,1.0,,,
California,,62.0,40.0,184.0,14.0
Cayman Islands,,1.0,,,
...,...,...,...,...,...
Westerm Australia,,3.0,,1.0,1.0
Western Australia,,1.0,,,
Western Australia,1.0,58.0,33.0,99.0,6.0
Western Cape Province,,73.0,15.0,78.0,9.0


In [17]:
shark[shark['country']=="USA"].state.value_counts()


state
Florida                     1063
Hawaii                       308
California                   300
South Carolina               136
North Carolina               106
Texas                         70
New Jersey                    48
New York                      42
Oregon                        31
Massachusetts                 17
Virginia                      17
Louisiana                     16
Alabama                       16
Georgia                       15
Puerto Rico                   11
Maryland                       9
Rhode Island                   8
Delaware                       7
Mississippi                    7
US Virgin Islands              5
Connecticut                    4
New York                       3
Guam                           2
Maine                          2
Washington                     2
Palmyra Atoll                  2
Maui                           2
Missouri                       1
East coast                     1
Wake Island                    1
Illi