In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
eat = pd.read_excel("SharkData.xlsx", sheet_name = 0)
targets = pd.read_excel("SharkData.xlsx", sheet_name = 1)
drops = pd.read_excel("SharkData.xlsx", sheet_name = 2)
other_factors = pd.read_excel("SharkData.xlsx", sheet_name = 3)

Cleaning for eat Dataframe

In [3]:
eat = eat.iloc[1:,:-1]
eat.rename(columns = {"Unnamed: 14":'Etc. Comments', "Pieces Eaten": "Date"}, inplace = True)

#get rid of null values at end of dataset
eat = eat.head(582)

# Change date to just date format, not datetime
eat['Date'] = pd.to_datetime(eat['Date']).dt.date

eat.fillna(0, inplace = True)

eat.tail()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:,Etc. Comments
578,2020-09-22,6.0,0.0,1.0,2.0,6.0,0.0,4.0,1.0,0.0,0.0,5.0,1.0,26.0,0
579,2020-09-24,0.0,9.0,0.0,7.0,5.0,5.0,7.0,0.0,1.0,2.0,2.0,6.0,44.0,0
580,2020-09-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,2.0,3.0,3.0,12.0,0
581,2020-09-27,5.0,6.0,3.0,6.0,4.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,33.0,0
582,2020-09-29,10.0,3.0,1.0,1.0,4.0,2.0,7.0,4.0,4.0,0.0,2.0,0.0,38.0,0


# 
Look at means of Individual sharks, then look at means of individual species

In [4]:
# Individual mean number of pieces eaten
All_Sharks = eat[['Ross', 'Chandler','BT1','BT2','BT3','BT4','BT5','GR1','GR2','GR3','GR4', 'GR5']]
All_mean = round(All_Sharks.mean(), 3)
All_mean

Ross        2.686
Chandler    2.174
BT1         1.538
BT2         2.151
BT3         1.768
BT4         2.100
BT5         2.304
GR1         1.010
GR2         1.015
GR3         1.395
GR4         1.203
GR5         1.302
dtype: float64

In [45]:
# Graph Mean Number of Fish Eaten For Each Shark

Eaten_Means = {'Shark': All_Sharks.columns,
        'Mean': All_mean}

Eaten_Means_df = pd.DataFrame(Eaten_Means, columns = ['Shark', 'Mean'])

Eaten_Means_Sorted = Eaten_Means_df.sort_values(by=['Mean'], ascending = False)

fig = px.histogram(Eaten_Means_Sorted, x="Shark", y="Mean", 
                   title = "Mean Number of Fish Eaten by Shark",
                  labels={
                     "Shark": "Shark",
                     "Mean": " Mean Number of Fish",
                 })
fig.show()

In [6]:
# Mean of Ross and Chandler
Sandbar = eat['Ross'] + eat['Chandler']
Sandbar.mean()

4.859106529209622

In [7]:
# Mean of BT
BT = eat['BT1'] + eat['BT2'] + eat['BT3'] + eat['BT4'] + eat['BT5']
BT.mean()

9.860824742268042

In [8]:
# Mean of GR
GR = eat['GR1'] + eat['GR2'] + eat['GR3'] + eat['GR4'] + eat['GR5']
GR.mean()

5.926116838487973

In [9]:
# Graph Mean Number of Fish Eaten For Each Species

Species_Eaten_Means = {'Species': ['Blacktip', 'Gray Reef', 'Sandbar'],
        'Mean': [round(BT.mean(),3),round(GR.mean(),3), round(Sandbar.mean(),3)]}

Species_Eaten_Means_df = pd.DataFrame(Species_Eaten_Means, columns = ['Species', 'Mean'])

Species_Eaten_Means_Sorted = Species_Eaten_Means_df.sort_values(by=['Mean'], ascending = False)

fig = px.histogram(Species_Eaten_Means_Sorted, x="Species", y="Mean", 
                   title = "Mean Number of Fish Eaten by Species",
                  labels={
                     "Species": "Species",
                     "Mean": " Mean Number of Fish",
                 })
fig.show()

In [10]:
# Look at how many days the sharks ate in comparison to did not eat
columns = list(All_Sharks) 
  
for i in columns: 
    Days_Eaten = All_Sharks[i] > 0
    print(i, All_Sharks[i][Days_Eaten].count())

Ross 377
Chandler 337
BT1 324
BT2 355
BT3 331
BT4 367
BT5 370
GR1 282
GR2 286
GR3 307
GR4 318
GR5 313


In [11]:
# Proportion of Days Each Shark Ate Out of Total Days
columns = list(All_Sharks) 
  
for i in columns: 
    Days_Eaten = All_Sharks[i] > 0
    prop = round((All_Sharks[i][Days_Eaten].count())/All_Sharks[i].count(), 3)
    print(i, prop)

Ross 0.648
Chandler 0.579
BT1 0.557
BT2 0.61
BT3 0.569
BT4 0.631
BT5 0.636
GR1 0.485
GR2 0.491
GR3 0.527
GR4 0.546
GR5 0.538


In [12]:
# Graph Proportions for Each Shark
Eaten_Props = {'Shark': ['Ross', 'Chandler','BT1','BT2','BT3','BT4','BT5','GR1','GR2','GR3','GR4', 'GR5'],
        'Proportion': [0.857,0.768,0.74,0.811,0.756,0.838,0.849,0.645,0.654,0.703,0.728,0.716]}

Eaten_Props_df = pd.DataFrame(Eaten_Props, columns = ['Shark', 'Proportion'])

Eaten_Props_Sorted = Eaten_Props_df.sort_values(by=['Proportion'], ascending = False)

fig = px.histogram(Eaten_Props_Sorted, x="Shark", y="Proportion", 
                   title = "Proportion of Days Where The Shark Ate",
                  labels={
                     "Shark": "Shark",
                     "Proportion": "Proportion of Days",
                 })
fig.show()

# 
Out of all the sharks, Ross had the highest proportion of days eaten. He ate at least 1 fish for .857 of the days. The shark with the lowest proportion of days eaten was GR1, who ate at least 1 fish for .645 of the days.

In [13]:
# Look at the proportion of days eaten by each species
# Sandbars
SB_Days_Eaten = Sandbar > 0
SB_Eaten_Count = Sandbar[SB_Days_Eaten].count()
SB_Eaten_Prop = round(Sandbar[SB_Days_Eaten].count() / Sandbar.count(),3)
print("Sandbar Number of Days Eaten:", SB_Eaten_Count)
print("Sandbar Proportion of Days Eaten:", SB_Eaten_Prop)


Sandbar Number of Days Eaten: 425
Sandbar Proportion of Days Eaten: 0.73


In [14]:
# Blacktips
BT_Days_Eaten = BT > 0
BT_Eaten_Count = BT[BT_Days_Eaten].count()
BT_Eaten_Prop = round(BT[BT_Days_Eaten].count() / BT.count(),3)
print("Blacktip Number of Days Eaten:", BT_Eaten_Count)
print("Blacktip Proportion of Days Eaten:", BT_Eaten_Prop)

Blacktip Number of Days Eaten: 437
Blacktip Proportion of Days Eaten: 0.751


In [15]:
# Gray Reefs
GR_Days_Eaten = GR > 0
GR_Eaten_Count = GR[GR_Days_Eaten].count()
GR_Eaten_Prop = round(GR[GR_Days_Eaten].count() / GR.count(),3)
print("Gray Reef Number of Days Eaten:", GR_Eaten_Count)
print("Gray Reef Proportion of Days Eaten:", GR_Eaten_Prop)

Gray Reef Number of Days Eaten: 418
Gray Reef Proportion of Days Eaten: 0.718


# 
Blacktip sharks had the highest number of days where at least one shark from the group ate at least 1 fish. At least 1 Blacktip shark ate for .998 of the days. Gray Reef sharks only had at least one shark eat for .957 of the days, which was the lowest of the three groups

In [16]:
# Graph Proportions Based on Species of Shark
Species_Eaten_Props = {'Species': ['Blacktip','Sandbar','Gray Reef'],
        'Proportion': [BT_Eaten_Prop,SB_Eaten_Prop,GR_Eaten_Prop]
        }
Species_Eaten_Props_df = pd.DataFrame(Species_Eaten_Props, columns = ['Species', 'Proportion'])
fig = px.histogram(Species_Eaten_Props_df, x="Species", y="Proportion", 
                   title = "Proportion of Days Where A Species Ate",
                  labels={
                     "Species": "Shark Species",
                     "Proportion": "Proportion of Days",
                 })
fig.show()

## Drop Data (Brady's Section)

In [17]:
drops = drops.iloc[1:,:-1]
drops['Drops'] = pd.to_datetime(drops['Drops']).dt.date
drops.fillna(0, inplace = True)

In [29]:
drops

Unnamed: 0,Drops,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:,All_GR,All_BT,All_SS
1,2017-12-05,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,3.0,2.0,1.0,0.0
2,2017-12-07,0.0,1.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,2.0,0.0,0.0,10.0,2.0,7.0,1.0
3,2017-12-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,2017-12-10,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
5,2017-12-12,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,1.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578,2020-09-22,0.0,0.0,1.0,4.0,0.0,2.0,2.0,0.0,0.0,0.0,1.0,0.0,10.0,1.0,9.0,0.0
579,2020-09-24,0.0,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,7.0,1.0,6.0,0.0
580,2020-09-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
581,2020-09-27,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0


In [35]:
print(round(drops["Total: "].mean(),2),"drops on average per day when feeding took place")
print(round((drops["Total: "].mean()/12),2), "drops on average per day per shark when feeding took place")

3.46 drops on average per day when feeding took place
0.29 drops on average per day per shark when feeding took place


In [28]:
drops["All_GR"] = drops["GR1"] + drops["GR2"] + drops["GR3"]+ drops["GR4"] + drops["GR5"]
drops["All_BT"] = drops["BT1"] + drops["BT2"] + drops["BT3"]+ drops["BT4"] + drops["BT5"]
drops["All_SS"] = drops["Ross"] + drops["Chandler"]

In [36]:
print(round(drops["All_GR"].mean(),2),"drops on average per day for all Grey Reef Sharks when feeding took place")
print(round(drops["All_BT"].mean(),2),"drops on average per day for all Black Tip Sharks when feeding took place")
print(round(drops["All_SS"].mean(),2),"drops on average per day for all Sand Sharks when feeding took place")

0.7 drops on average per day for all Grey Reef Sharks when feeding took place
2.49 drops on average per day for all Black Tip Sharks when feeding took place
0.29 drops on average per day for all Sand Sharks when feeding took place


In [38]:
print(round(drops["All_GR"].mean()/5,2),"drops on average per day for each Grey Reef Sharks when feeding took place")
print(round(drops["All_BT"].mean()/5,2),"drops on average per day for each Black Tip Sharks when feeding took place")
print(round(drops["All_SS"].mean()/2,2),"drops on average per day for each Sand Sharks when feeding took place")

0.14 drops on average per day for each Grey Reef Sharks when feeding took place
0.5 drops on average per day for each Black Tip Sharks when feeding took place
0.15 drops on average per day for each Sand Sharks when feeding took place


In [39]:
drops["male"] = drops["BT1"] + drops["BT5"] + drops["GR1"] + drops["Ross"] + drops["Chandler"]
drops["female"] = drops["BT2"] + drops["BT3"] + drops["BT4"] + drops["GR2"] + drops["GR3"] + drops["GR4"] + drops["GR5"]

In [40]:
print(round(drops["male"].mean(),2),"drops on average per day for all male sharks when feeding took place")
print(round(drops["female"].mean(),2),"drops on average per day for all female sharks when feeding took place")

1.41 drops on average per day for all male sharks when feeding took place
2.08 drops on average per day for all female sharks when feeding took place


In [41]:
print(round(drops["male"].mean()/5,2),"drops on average per day for all male sharks when feeding took place")
print(round(drops["female"].mean()/7,2),"drops on average per day for all female sharks when feeding took place")

0.28 drops on average per day for all male sharks when feeding took place
0.3 drops on average per day for all female sharks when feeding took place


In [54]:
All_Sharks_drops = drops[['Ross', 'Chandler','BT1','BT2','BT3','BT4','BT5','GR1','GR2','GR3','GR4', 'GR5']]
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Shark","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
drop_means
px.bar(drop_means, x = "Shark", y = "Mean", title = "Drop Means for Each Shark")

In [106]:
All_Sharks_drops = drops[["female","male"]]
All_Sharks_drops["female"] = All_Sharks_drops["female"] / 7
All_Sharks_drops["male"] = All_Sharks_drops["male"] / 5
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Gender","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
drop_means
px.bar(drop_means, x = "Gender", y = "Mean", title = "How often is the average shark dropping for each gender?")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [107]:
All_Sharks_drops = drops[["All_GR","All_BT","All_SS"]]
All_Sharks_drops["All_GR"] = All_Sharks_drops["All_GR"] / 5
All_Sharks_drops["All_BT"] = All_Sharks_drops["All_BT"] / 5
All_Sharks_drops["All_SS"] = All_Sharks_drops["All_SS"] / 2
All_Sharks_drops.columns = ["Grey Reef","Black Tip","Sand Shark"]
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Species","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
drop_means
px.bar(drop_means, x = "Species", y = "Mean", title = "Drop Means for Each species on average")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [112]:
All_Sharks_drops = drops.iloc[:,1:]
All_Sharks_drops["All_GR"] = All_Sharks_drops["All_GR"] / 5
All_Sharks_drops["All_BT"] = All_Sharks_drops["All_BT"] / 5
All_Sharks_drops["All_SS"] = All_Sharks_drops["All_SS"] / 2
All_Sharks_drops["female"] = All_Sharks_drops["female"] / 7
All_Sharks_drops["male"] = All_Sharks_drops["male"] / 5
All_Sharks_drops["Total: "] = All_Sharks_drops["Total: "] / 12
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Subset","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
drop_means
px.bar(drop_means, x = "Subset", y = "Mean", title = "Drop Means for each subset")

In [66]:
drop_prop = drops.iloc[:,1:]

In [87]:
props = {}
for i in drop_prop.columns:
    data = drop_prop[i]
    dropped_days = data > 0
    props.update({i:(round(len(data[dropped_days])/len(data),2))})
props
props = pd.DataFrame(list(props.items()),columns = ['Subset of Data','Proportion of Days Dropped'])

In [102]:
drop_prop_species = props.iloc[13:16,:].rename(columns = {"Subset of Data":"Species"}).sort_values(by = "Proportion of Days Dropped", ascending = False)
drop_prop_gender = props.iloc[-2:,:].rename(columns = {"Subset of Data":"Gender"}).sort_values(by = "Proportion of Days Dropped", ascending = False)
drop_prop_sharks = props.iloc[0:12,:].rename(columns = {"Subset of Data":"Shark"}).sort_values(by = "Proportion of Days Dropped", ascending = False)

In [103]:
px.bar(drop_prop_species, x = "Species", y = "Proportion of Days Dropped")

In [104]:
px.bar(drop_prop_gender, x = "Gender", y = "Proportion of Days Dropped")

In [105]:
px.bar(drop_prop_sharks, x = "Shark", y = "Proportion of Days Dropped")