In [84]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import datetime
import plotly.graph_objects as go

In [85]:
import numpy as np
import scipy.stats


def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return [m, m-h, m+h]

In [86]:
def mean_confidence_error(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return h

In [87]:
eat = pd.read_excel("SharkData.xlsx", sheet_name = 0)
targets = pd.read_excel("SharkData.xlsx", sheet_name = 1)
drops = pd.read_excel("SharkData.xlsx", sheet_name = 2)
other_factors = pd.read_excel("SharkData.xlsx", sheet_name = 3)

Cleaning for eat Dataframe

In [88]:
eat = eat.iloc[1:,:-1]
eat.rename(columns = {"Unnamed: 14":'Etc. Comments', "Pieces Eaten": "Date"}, inplace = True)

#get rid of null values at end of dataset
eat = eat.head(582)

# Change date to just date format, not datetime
eat['Date'] = pd.to_datetime(eat['Date']).dt.date

eat.fillna(0, inplace = True)

eat.tail()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:,Etc. Comments
578,2020-09-22,6.0,0.0,1.0,2.0,6.0,0.0,4.0,1.0,0.0,0.0,5.0,1.0,26.0,0
579,2020-09-24,0.0,9.0,0.0,7.0,5.0,5.0,7.0,0.0,1.0,2.0,2.0,6.0,44.0,0
580,2020-09-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,2.0,3.0,3.0,12.0,0
581,2020-09-27,5.0,6.0,3.0,6.0,4.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,33.0,0
582,2020-09-29,10.0,3.0,1.0,1.0,4.0,2.0,7.0,4.0,4.0,0.0,2.0,0.0,38.0,0


# 
Look at means of Individual sharks, then look at means of individual species

In [89]:
# Individual mean number of pieces eaten
All_Sharks = eat[['Ross', 'Chandler','BT1','BT2','BT3','BT4','BT5','GR1','GR2','GR3','GR4', 'GR5']]
All_mean = round(All_Sharks.mean(), 3)
All_mean

Ross        2.686
Chandler    2.174
BT1         1.538
BT2         2.151
BT3         1.768
BT4         2.100
BT5         2.304
GR1         1.010
GR2         1.015
GR3         1.395
GR4         1.203
GR5         1.302
dtype: float64

In [90]:
# Graph Mean Number of Fish Eaten For Each Shark

Eaten_Means = {'Shark': All_Sharks.columns,
        'Mean': All_mean}

Eaten_Means_df = pd.DataFrame(Eaten_Means, columns = ['Shark', 'Mean'])

Eaten_Means_Sorted = Eaten_Means_df.sort_values(by=['Mean'], ascending = False)

fig = px.histogram(Eaten_Means_Sorted, x="Shark", y="Mean", 
                   title = "Mean Number of Fish Eaten by Shark",
                  labels={
                     "Shark": "Shark",
                     "Mean": " Mean Number of Fish",
                 })
fig.show()

In [91]:
# Mean number of fish eaten for Sandbar species
Sandbar = eat['Ross'] + eat['Chandler']
Sandbar.mean() / 2

2.429553264604811

In [92]:
# Mean of BT
BT = eat['BT1'] + eat['BT2'] + eat['BT3'] + eat['BT4'] + eat['BT5']
BT.mean() / 5

1.9721649484536083

In [93]:
# Mean of GR
GR = eat['GR1'] + eat['GR2'] + eat['GR3'] + eat['GR4'] + eat['GR5']
GR.mean() / 5

1.1852233676975945

In [94]:
# Graph Mean Number of Fish Eaten For Each Species

Species_Eaten_Means = {'Species': ['Blacktip', 'Gray Reef', 'Sandbar'],
        'Mean': [round(Sandbar.mean()/2,3),round(BT.mean()/5,3), round(GR.mean()/5,3)]}

Species_Eaten_Means_df = pd.DataFrame(Species_Eaten_Means, columns = ['Species', 'Mean'])

Species_Eaten_Means_Sorted = Species_Eaten_Means_df.sort_values(by=['Mean'], ascending = False)

fig = px.histogram(Species_Eaten_Means_Sorted, x="Species", y="Mean", 
                   title = "Mean Number of Fish Eaten Per Shark by Species",
                  labels={
                     "Species": "Species",
                     "Mean": " Mean Number of Fish",
                 })
fig.show()

In [95]:
eat["male"] = eat["BT1"] + eat["BT5"] + eat["GR1"] + eat["Ross"] + eat["Chandler"]
eat["female"] = eat["BT2"] + eat["BT3"] + eat["BT4"] + eat["GR2"] + eat["GR3"] + eat["GR4"] + eat["GR5"]

In [96]:
print(round(eat["male"].mean()/5,2),"Pieces eaten on average per day for all male sharks when feeding took place")
print(round(eat["female"].mean()/7,2),"Pieces eaten on average per day for all female sharks when feeding took place")

1.94 Pieces eaten on average per day for all male sharks when feeding took place
1.56 Pieces eaten on average per day for all female sharks when feeding took place


In [97]:
eat["All_GR"] = eat['GR1'] + eat['GR2'] + eat['GR3'] + eat['GR4'] + eat['GR5']
eat["All_BT"] = eat['BT1'] + eat['BT2'] + eat['BT3'] + eat['BT4'] + eat['BT5']
eat["All_SS"] = eat['Ross'] + eat['Chandler']
eat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:,Etc. Comments,male,female,All_GR,All_BT,All_SS
1,2017-12-05,0.0,5.0,3.0,6.0,5.0,4.0,4.0,0.0,0.0,2.0,3.0,1.0,33.0,0,12.0,21.0,6.0,22.0,5.0
2,2017-12-07,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0,1.0,2.0,0.0,2.0,1.0
3,2017-12-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,3.0,1.0,0.0,6.0,0,2.0,4.0,6.0,0.0,0.0
4,2017-12-10,5.0,1.0,1.0,3.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0,7.0,7.0,0.0,8.0,6.0
5,2017-12-12,2.0,2.0,0.0,5.0,2.0,4.0,5.0,0.0,2.0,3.0,2.0,0.0,27.0,0,9.0,18.0,7.0,16.0,4.0


In [98]:
eat["All_GR"] = eat["All_GR"] / 5
eat["All_BT"] = eat["All_BT"] / 5
eat["All_SS"] = eat["All_SS"] / 2
eat["female"] = eat["female"] / 7
eat["male"] = eat["male"] / 5
eat["Total Adjusted"] = eat["Total: "] / 12
eaten_means = pd.DataFrame(round(eat.mean(), 3)).reset_index()
eaten_means.columns = ["Subset","Mean"]
eaten_means = eaten_means.sort_values(by = "Mean", ascending = False)
prop_average = eaten_means[eaten_means["Subset"]=="Total Adjusted"]["Mean"].values[0]
eaten_means = pd.merge(eaten_means,pd.melt(eat.describe().loc[["std"]], var_name = "Subset",value_name = "Standard Deviation"), how = "outer", on = "Subset")
#drop_means["color"] = "blue"
Average = []
for i in eaten_means.Mean:
    if i > prop_average:
        Average.append("Above Average")
    elif i == prop_average:
        Average.append("Average")
    else:
        Average.append("Below Average")
eaten_means["Average"] = Average
max_y = (eaten_means[1:]["Mean"] + eaten_means["Standard Deviation"]).max()   
px.bar(eaten_means.iloc[1:,:], x = "Subset", y = "Mean", color = "Average", error_y = "Standard Deviation",range_y = [0,max_y],title = "Eaten Means for each subset with 1 standard deviation")

In [100]:
eaten_means = pd.DataFrame(round(eat.mean(), 3)).reset_index()
eaten_means.columns = ["Subset","Mean"]
eaten_means = eaten_means.sort_values(by = "Mean", ascending = False)
prop_average = eaten_means[eaten_means["Subset"]=="Total Adjusted"]["Mean"].values[0]
ci = {}
for column in eat.drop(["Total: ","Etc. Comments"], axis = 1).iloc[:,1:].columns:
    ci.update({column:mean_confidence_error(eat[column])})
ci = pd.DataFrame(list(ci.items()),columns = ['Subset','SE'])
eaten_means = pd.merge(eaten_means, ci, how = 'outer', on = 'Subset')
Average = []
for i in eaten_means.Mean:
    if i > prop_average:
        Average.append("Above Average")
    elif i == prop_average:
        Average.append("Average")
    else:
        Average.append("Below Average")
eaten_means["Average"] = Average  
px.bar(eaten_means.iloc[1:,:], x = "Subset", y = "Mean", color = "Average", error_y = "SE",title = "Eaten Means for each subset with Confidence Intervals")

In [17]:
# Look at how many days the sharks ate in comparison to did not eat
columns = list(All_Sharks) 
  
for i in columns: 
    Days_Eaten = All_Sharks[i] > 0
    print(i, All_Sharks[i][Days_Eaten].count())

Ross 377
Chandler 337
BT1 324
BT2 355
BT3 331
BT4 367
BT5 370
GR1 282
GR2 286
GR3 307
GR4 318
GR5 313


In [18]:
# Proportion of Days Each Shark Ate Out of Total Days
columns = list(All_Sharks) 
  
for i in columns: 
    Days_Eaten = All_Sharks[i] > 0
    prop = round((All_Sharks[i][Days_Eaten].count())/All_Sharks[i].count(), 3)
    print(i, prop)

Ross 0.648
Chandler 0.579
BT1 0.557
BT2 0.61
BT3 0.569
BT4 0.631
BT5 0.636
GR1 0.485
GR2 0.491
GR3 0.527
GR4 0.546
GR5 0.538


In [19]:
# Graph Proportions for Each Shark
Eaten_Props = {'Shark': ['Ross', 'Chandler','BT1','BT2','BT3','BT4','BT5','GR1','GR2','GR3','GR4', 'GR5'],
        'Proportion': [0.857,0.768,0.74,0.811,0.756,0.838,0.849,0.645,0.654,0.703,0.728,0.716]}

Eaten_Props_df = pd.DataFrame(Eaten_Props, columns = ['Shark', 'Proportion'])

Eaten_Props_Sorted = Eaten_Props_df.sort_values(by=['Proportion'], ascending = False)

fig = px.histogram(Eaten_Props_Sorted, x="Shark", y="Proportion", 
                   title = "Proportion of Days Where The Shark Ate",
                  labels={
                     "Shark": "Shark",
                     "Proportion": "Proportion of Days",
                 })
fig.show()

# 
Out of all the sharks, Ross had the highest proportion of days eaten. He ate at least 1 fish for .857 of the days. The shark with the lowest proportion of days eaten was GR1, who ate at least 1 fish for .645 of the days.

In [20]:
# Look at the proportion of days eaten by each species
# Sandbars
SB_Days_Eaten = Sandbar > 0
SB_Eaten_Count = Sandbar[SB_Days_Eaten].count()
SB_Eaten_Prop = round(Sandbar[SB_Days_Eaten].count() / Sandbar.count(),3)
print("Sandbar Number of Days Eaten:", SB_Eaten_Count)
print("Sandbar Proportion of Days Eaten:", SB_Eaten_Prop)


Sandbar Number of Days Eaten: 425
Sandbar Proportion of Days Eaten: 0.73


In [21]:
# Blacktips
BT_Days_Eaten = BT > 0
BT_Eaten_Count = BT[BT_Days_Eaten].count()
BT_Eaten_Prop = round(BT[BT_Days_Eaten].count() / BT.count(),3)
print("Blacktip Number of Days Eaten:", BT_Eaten_Count)
print("Blacktip Proportion of Days Eaten:", BT_Eaten_Prop)

Blacktip Number of Days Eaten: 437
Blacktip Proportion of Days Eaten: 0.751


In [22]:
# Gray Reefs
GR_Days_Eaten = GR > 0
GR_Eaten_Count = GR[GR_Days_Eaten].count()
GR_Eaten_Prop = round(GR[GR_Days_Eaten].count() / GR.count(),3)
print("Gray Reef Number of Days Eaten:", GR_Eaten_Count)
print("Gray Reef Proportion of Days Eaten:", GR_Eaten_Prop)

Gray Reef Number of Days Eaten: 418
Gray Reef Proportion of Days Eaten: 0.718


# 
Blacktip sharks had the highest number of days where at least one shark from the group ate at least 1 fish. At least 1 Blacktip shark ate for .998 of the days. Gray Reef sharks only had at least one shark eat for .957 of the days, which was the lowest of the three groups

In [23]:
# Graph Proportions Based on Species of Shark
Species_Eaten_Props = {'Species': ['Blacktip','Sandbar','Gray Reef'],
        'Proportion': [BT_Eaten_Prop,SB_Eaten_Prop,GR_Eaten_Prop]
        }
Species_Eaten_Props_df = pd.DataFrame(Species_Eaten_Props, columns = ['Species', 'Proportion'])
fig = px.histogram(Species_Eaten_Props_df, x="Species", y="Proportion", 
                   title = "Proportion of Days Where A Species Ate",
                  labels={
                     "Species": "Shark Species",
                     "Proportion": "Proportion of Days",
                 })
fig.show()

## Drop Data (Brady's Section)

In [102]:
drops = drops.iloc[1:,:-1]
drops['Drops'] = pd.to_datetime(drops['Drops']).dt.date
drops.fillna(0, inplace = True)

In [103]:
#print(round(drops["Total: "].mean(),2),"drops on average per day when feeding took place")
print(round((drops["Total: "].mean()/12),2), "drops on average per day per shark when feeding took place")

0.29 drops on average per day per shark when feeding took place


In [104]:
drops["All_GR"] = drops["GR1"] + drops["GR2"] + drops["GR3"]+ drops["GR4"] + drops["GR5"]
drops["All_BT"] = drops["BT1"] + drops["BT2"] + drops["BT3"]+ drops["BT4"] + drops["BT5"]
drops["All_SS"] = drops["Ross"] + drops["Chandler"]

In [105]:
print(round(drops["All_GR"].mean()/5,2),"drops on average per day for each Grey Reef Sharks when feeding took place")
print(round(drops["All_BT"].mean()/5,2),"drops on average per day for each Black Tip Sharks when feeding took place")
print(round(drops["All_SS"].mean()/2,2),"drops on average per day for each Sand Sharks when feeding took place")

0.14 drops on average per day for each Grey Reef Sharks when feeding took place
0.5 drops on average per day for each Black Tip Sharks when feeding took place
0.15 drops on average per day for each Sand Sharks when feeding took place


In [106]:
drops["male"] = drops["BT1"] + drops["BT5"] + drops["GR1"] + drops["Ross"] + drops["Chandler"]
drops["female"] = drops["BT2"] + drops["BT3"] + drops["BT4"] + drops["GR2"] + drops["GR3"] + drops["GR4"] + drops["GR5"]

In [107]:
print(round(drops["male"].mean()/5,2),"drops on average per day for all male sharks when feeding took place")
print(round(drops["female"].mean()/7,2),"drops on average per day for all female sharks when feeding took place")

0.28 drops on average per day for all male sharks when feeding took place
0.3 drops on average per day for all female sharks when feeding took place


In [108]:
All_Sharks_drops = drops[['Ross', 'Chandler','BT1','BT2','BT3','BT4','BT5','GR1','GR2','GR3','GR4', 'GR5']]
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Shark","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
drop_means
px.bar(drop_means, x = "Shark", y = "Mean", title = "Drop Means for Each Shark")

In [109]:
All_Sharks_drops = drops[["female","male"]]
All_Sharks_drops["female"] = All_Sharks_drops["female"] / 7
All_Sharks_drops["male"] = All_Sharks_drops["male"] / 5
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Gender","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
print(drop_means) 
px.bar(drop_means, x = "Gender", y = "Mean", title = "How often is the average shark dropping for each gender?")

   Gender   Mean
0  female  0.297
1    male  0.281




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [110]:
All_Sharks_drops = drops[["All_GR","All_BT","All_SS"]]
All_Sharks_drops["All_GR"] = All_Sharks_drops["All_GR"] / 5
All_Sharks_drops["All_BT"] = All_Sharks_drops["All_BT"] / 5
All_Sharks_drops["All_SS"] = All_Sharks_drops["All_SS"] / 2
All_Sharks_drops.columns = ["Grey Reef","Black Tip","Sand Shark"]
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Species","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
print(drop_means)
px.bar(drop_means, x = "Species", y = "Mean", title = "Drop Means for Each species on average")

      Species   Mean
1   Black Tip  0.498
2  Sand Shark  0.146
0   Grey Reef  0.141




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [112]:
All_Sharks_drops = drops.iloc[:,1:]
All_Sharks_drops["All_GR"] = All_Sharks_drops["All_GR"] / 5
All_Sharks_drops["All_BT"] = All_Sharks_drops["All_BT"] / 5
All_Sharks_drops["All_SS"] = All_Sharks_drops["All_SS"] / 2
All_Sharks_drops["female"] = All_Sharks_drops["female"] / 7
All_Sharks_drops["male"] = All_Sharks_drops["male"] / 5
All_Sharks_drops["Total: "] = All_Sharks_drops["Total: "] / 12
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Subset","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)
ci = {}
for column in drops.iloc[:,1:].columns:
    ci.update({column:mean_confidence_error(drops[column])})
ci = pd.DataFrame(list(ci.items()),columns = ['Subset','SE'])
drop_means = pd.merge(drop_means, ci, how = 'outer', on = "Subset")
#drop_means = pd.merge(drop_means,pd.melt(drops.describe().loc[["std"]], var_name = "Subset",value_name = "Standard Deviation"), how = "outer", on = "Subset")
prop_average = drop_means[drop_means["Subset"]=="Total: "]["Mean"].values[0]
#drop_means["color"] = "blue"
Average = []
for i in drop_means.Mean:
    if i > prop_average:
        Average.append("Above Average")
    elif i == prop_average:
        Average.append("Average")
    else:
        Average.append("Below Average")
drop_means["Average"] = Average
max_y = (drop_means["Mean"] + drop_means["SE"]).max() 
#max_y = (drop_means["Mean"] + drop_means["Standard Deviation"]).max() 
px.bar(drop_means, x = "Subset", y = "Mean", color = "Average", error_y = "SE",title = "Drop Means for each subset with Confidence Intervals")

In [113]:
All_Sharks_drops = drops.iloc[:,1:]
All_Sharks_drops["All_GR"] = All_Sharks_drops["All_GR"] / 5
All_Sharks_drops["All_BT"] = All_Sharks_drops["All_BT"] / 5
All_Sharks_drops["All_SS"] = All_Sharks_drops["All_SS"] / 2
All_Sharks_drops["female"] = All_Sharks_drops["female"] / 7
All_Sharks_drops["male"] = All_Sharks_drops["male"] / 5
All_Sharks_drops["Total: "] = All_Sharks_drops["Total: "] / 12
drop_means = pd.DataFrame(round(All_Sharks_drops.mean(), 3)).reset_index()
drop_means.columns = ["Subset","Mean"]
drop_means = drop_means.sort_values(by = "Mean", ascending = False)

drop_means = pd.merge(drop_means,pd.melt(drops.describe().loc[["std"]], var_name = "Subset",value_name = "Standard Deviation"), how = "outer", on = "Subset")
prop_average = drop_means[drop_means["Subset"]=="Total: "]["Mean"].values[0]
Average = []
for i in drop_means.Mean:
    if i > prop_average:
        Average.append("Above Average")
    elif i == prop_average:
        Average.append("Average")
    else:
        Average.append("Below Average")
drop_means["Average"] = Average
max_y = (drop_means["Mean"] + drop_means["Standard Deviation"]).max() 
px.bar(drop_means, x = "Subset", y = "Mean", color = "Average", error_y = "Standard Deviation",range_y = [0,max_y],title = "Drop Means for each subset with 1 standard deviation")

In [35]:
drop_prop = drops.iloc[:,1:]

In [36]:
props = {}
for i in drop_prop.columns:
    data = drop_prop[i]
    dropped_days = data > 0
    props.update({i:(round(len(data[dropped_days])/len(data),2))})
props = pd.DataFrame(list(props.items()),columns = ['Subset of Data','Proportion of Days Dropped'])

In [37]:
print(props.iloc[12].values[1],"is the overall proportion of days with a drop by a shark")

0.84 is the overall proportion of days with a drop by a shark


In [38]:
drop_prop_species = props.iloc[13:16,:].rename(columns = {"Subset of Data":"Species"}).sort_values(by = "Proportion of Days Dropped", ascending = False)
drop_prop_gender = props.iloc[-2:,:].rename(columns = {"Subset of Data":"Gender"}).sort_values(by = "Proportion of Days Dropped", ascending = False)
drop_prop_sharks = props.iloc[0:12,:].rename(columns = {"Subset of Data":"Shark"}).sort_values(by = "Proportion of Days Dropped", ascending = False)

In [39]:
px.bar(drop_prop_species, x = "Species", y = "Proportion of Days Dropped", title = "What proportion of days does each species drop?")

In [40]:
px.bar(drop_prop_gender, x = "Gender", y = "Proportion of Days Dropped", title = "What proportion of days does each gender drop?")

In [41]:
px.bar(drop_prop_sharks, x = "Shark", y = "Proportion of Days Dropped",title = "What proportion of days does each shark drop?")

# Targeting Data -Michael's Section

In [42]:
targets

Unnamed: 0,Targets,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:,Unnamed: 14
0,Date:,,,,,,,,,,,,,,
1,2017-12-05 00:00:00,3.0,0.0,4.0,3.0,0.0,2.0,2.0,1.0,1.0,3.0,3.0,0.0,22.0,
2,2017-12-07 00:00:00,4.0,5.0,2.0,2.0,5.0,3.0,5.0,0.0,0.0,2.0,2.0,0.0,30.0,
3,2017-12-09 00:00:00,,,,,,,,1.0,3.0,1.0,0.0,3.0,8.0,
4,2017-12-10 00:00:00,4.0,7.0,0.0,2.0,2.0,0.0,0.0,,,,,,15.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578,2020-09-22 00:00:00,6.0,1.0,4.0,6.0,5.0,4.0,7.0,2.0,1.0,5.0,4.0,1.0,46.0,
579,2020-09-24 00:00:00,8.0,18.0,2.0,9.0,10.0,10.0,11.0,5.0,6.0,3.0,7.0,9.0,98.0,
580,2020-09-26 00:00:00,,,,,,,,6.0,2.0,3.0,5.0,8.0,24.0,
581,2020-09-27 00:00:00,10.0,19.0,9.0,10.0,8.0,16.0,3.0,,,,,,75.0,


In [43]:
targets = targets.iloc[1:,:-1]
targets.rename(columns = {"Unnamed: 14":'Etc. Comments', "Targets": "Date"}, inplace = True)

#get rid of null values at end of dataset
targets = targets.head(582)

# Change date to just date format, not datetime
targets['Date'] = pd.to_datetime(targets['Date']).dt.date

targets.fillna(0, inplace = True)

targets.tail()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:
578,2020-09-22,6.0,1.0,4.0,6.0,5.0,4.0,7.0,2.0,1.0,5.0,4.0,1.0,46.0
579,2020-09-24,8.0,18.0,2.0,9.0,10.0,10.0,11.0,5.0,6.0,3.0,7.0,9.0,98.0
580,2020-09-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,2.0,3.0,5.0,8.0,24.0
581,2020-09-27,10.0,19.0,9.0,10.0,8.0,16.0,3.0,0.0,0.0,0.0,0.0,0.0,75.0
582,2020-09-29,15.0,5.0,6.0,6.0,4.0,4.0,7.0,4.0,1.0,4.0,4.0,1.0,61.0


In [44]:
# Individual mean number of pieces eaten
All_Sharks = targets[['Ross', 'Chandler','BT1','BT2','BT3','BT4','BT5','GR1','GR2','GR3','GR4', 'GR5']]
All_mean = round(All_Sharks.mean(), 3)
All_mean

Ross        7.168
Chandler    6.790
BT1         6.905
BT2         5.285
BT3         5.361
BT4         5.809
BT5         4.983
GR1         1.878
GR2         2.405
GR3         3.662
GR4         2.098
GR5         2.802
dtype: float64

In [45]:
target_means = {'Shark': All_Sharks.columns,
                'Mean': All_mean}

target_mean_df = pd.DataFrame(target_means, columns = ['Shark', 'Mean'])
target_mean_sorted = target_mean_df.sort_values(by=['Mean'], ascending = False)
px.histogram(target_mean_sorted, x="Shark", y="Mean", 
             title = "Mean Number of Fish Eaten by Shark",
             labels={
                     "Shark": "Shark",
                     "Mean": " Mean Number of Fish",
                     })


In [46]:
# The mean for 
round(targets['Total: '].mean()/12, 3)

4.596

In [47]:
targets["All_GR"] = targets["GR1"] + targets["GR2"] + targets["GR3"]+ targets["GR4"] + targets["GR5"]
targets["All_BT"] = targets["BT1"] + targets["BT2"] + targets["BT3"]+ targets["BT4"] + targets["BT5"]
targets["All_SS"] = targets["Ross"] + targets["Chandler"]

In [48]:
print(round(targets["All_GR"].mean()/5,2),"targets on average per day for each Grey Reef Sharks when feeding took place")
print(round(targets["All_BT"].mean()/5,2),"targets on average per day for each Black Tip Sharks when feeding took place")
print(round(targets["All_SS"].mean()/2,2),"targets on average per day for each Sand Sharks when feeding took place")

2.57 targets on average per day for each Grey Reef Sharks when feeding took place
5.67 targets on average per day for each Black Tip Sharks when feeding took place
6.98 targets on average per day for each Sand Sharks when feeding took place


In [49]:
targets["male"] = targets["BT1"] + targets["BT5"] + targets["GR1"] + targets["Ross"] + targets["Chandler"]
targets["female"] = targets["BT2"] + targets["BT3"] + targets["BT4"] + targets["GR2"] + targets["GR3"] + targets["GR4"] + targets["GR5"]

In [50]:
print(round(targets["male"].mean()/5,2),"targets on average per day for all male sharks when feeding took place")
print(round(targets["female"].mean()/7,2),"targets on average per day for all female sharks when feeding took place")

5.55 targets on average per day for all male sharks when feeding took place
3.92 targets on average per day for all female sharks when feeding took place


In [51]:
targets

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:,All_GR,All_BT,All_SS,male,female
1,2017-12-05,3.0,0.0,4.0,3.0,0.0,2.0,2.0,1.0,1.0,3.0,3.0,0.0,22.0,8.0,11.0,3.0,10.0,12.0
2,2017-12-07,4.0,5.0,2.0,2.0,5.0,3.0,5.0,0.0,0.0,2.0,2.0,0.0,30.0,4.0,17.0,9.0,16.0,14.0
3,2017-12-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,0.0,3.0,8.0,8.0,0.0,0.0,1.0,7.0
4,2017-12-10,4.0,7.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,4.0,11.0,11.0,4.0
5,2017-12-12,3.0,1.0,3.0,2.0,4.0,1.0,1.0,1.0,1.0,5.0,1.0,3.0,26.0,11.0,11.0,4.0,9.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578,2020-09-22,6.0,1.0,4.0,6.0,5.0,4.0,7.0,2.0,1.0,5.0,4.0,1.0,46.0,13.0,26.0,7.0,20.0,26.0
579,2020-09-24,8.0,18.0,2.0,9.0,10.0,10.0,11.0,5.0,6.0,3.0,7.0,9.0,98.0,30.0,42.0,26.0,44.0,54.0
580,2020-09-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,2.0,3.0,5.0,8.0,24.0,24.0,0.0,0.0,6.0,18.0
581,2020-09-27,10.0,19.0,9.0,10.0,8.0,16.0,3.0,0.0,0.0,0.0,0.0,0.0,75.0,0.0,46.0,29.0,41.0,34.0


In [52]:
all_sharks_targets = targets[["All_GR","All_BT","All_SS"]]
all_sharks_targets["All_GR"] = all_sharks_targets["All_GR"] / 5
all_sharks_targets["All_BT"] = all_sharks_targets["All_BT"] / 5
all_sharks_targets["All_SS"] = all_sharks_targets["All_SS"] / 2
all_sharks_targets.columns = ["Grey Reef","Black Tip","Sand Shark"]
target_means = pd.DataFrame(round(all_sharks_targets.mean(), 3)).reset_index()
target_means.columns = ["Species","Mean"]
target_means = target_means.sort_values(by = "Mean", ascending = False)
print(target_means)
px.bar(target_means, x = "Species", y = "Mean", title = "Target Means for Each species on average")

      Species   Mean
2  Sand Shark  6.979
1   Black Tip  5.669
0   Grey Reef  2.569




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [53]:
targets

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total:,All_GR,All_BT,All_SS,male,female
1,2017-12-05,3.0,0.0,4.0,3.0,0.0,2.0,2.0,1.0,1.0,3.0,3.0,0.0,22.0,8.0,11.0,3.0,10.0,12.0
2,2017-12-07,4.0,5.0,2.0,2.0,5.0,3.0,5.0,0.0,0.0,2.0,2.0,0.0,30.0,4.0,17.0,9.0,16.0,14.0
3,2017-12-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,0.0,3.0,8.0,8.0,0.0,0.0,1.0,7.0
4,2017-12-10,4.0,7.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,4.0,11.0,11.0,4.0
5,2017-12-12,3.0,1.0,3.0,2.0,4.0,1.0,1.0,1.0,1.0,5.0,1.0,3.0,26.0,11.0,11.0,4.0,9.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578,2020-09-22,6.0,1.0,4.0,6.0,5.0,4.0,7.0,2.0,1.0,5.0,4.0,1.0,46.0,13.0,26.0,7.0,20.0,26.0
579,2020-09-24,8.0,18.0,2.0,9.0,10.0,10.0,11.0,5.0,6.0,3.0,7.0,9.0,98.0,30.0,42.0,26.0,44.0,54.0
580,2020-09-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,2.0,3.0,5.0,8.0,24.0,24.0,0.0,0.0,6.0,18.0
581,2020-09-27,10.0,19.0,9.0,10.0,8.0,16.0,3.0,0.0,0.0,0.0,0.0,0.0,75.0,0.0,46.0,29.0,41.0,34.0


In [54]:
all_sharks_targets = targets.iloc[:,1:]
all_sharks_targets["All_GR"] = all_sharks_targets["All_GR"] / 5
all_sharks_targets["All_BT"] = all_sharks_targets["All_BT"] / 5
all_sharks_targets["All_SS"] = all_sharks_targets["All_SS"] / 2
all_sharks_targets["female"] = all_sharks_targets["female"] / 7
all_sharks_targets["male"] = all_sharks_targets["male"] / 5
all_sharks_targets["Total: "] = all_sharks_targets["Total: "] / 12
target_means = pd.DataFrame(round(all_sharks_targets.mean(), 3)).reset_index()
target_means.columns = ["Subset","Mean"]
target_means = target_means.sort_values(by = "Mean", ascending = False)
target_means = pd.merge(target_means,pd.melt(targets.describe().loc[["std"]], var_name = "Subset",value_name = "Standard Deviation"), how = "outer", on = "Subset")

prop_average = target_means[target_means["Subset"]=="Total: "]["Mean"].values[0]
#target_means["color"] = "blue"
Average = []
for i in target_means.Mean:
    if i > prop_average:
        Average.append("Above Average")
    elif i == prop_average:
        Average.append("Average")
    else:
        Average.append("Below Average")
target_means["Average"] = Average
max_y = (target_means["Mean"] + target_means["Standard Deviation"]).max()    
px.bar(target_means, x = "Subset", y = "Mean", color = "Average", error_y = "Standard Deviation", range_y = [0,max_y],title = "Target Means for each subset with 1 standard deviation")

In [55]:
all_sharks_targets = targets.iloc[:,1:]
all_sharks_targets["All_GR"] = all_sharks_targets["All_GR"] / 5
all_sharks_targets["All_BT"] = all_sharks_targets["All_BT"] / 5
all_sharks_targets["All_SS"] = all_sharks_targets["All_SS"] / 2
all_sharks_targets["female"] = all_sharks_targets["female"] / 7
all_sharks_targets["male"] = all_sharks_targets["male"] / 5
all_sharks_targets["Total: "] = all_sharks_targets["Total: "] / 12
target_means = pd.DataFrame(round(all_sharks_targets.mean(), 3)).reset_index()
target_means.columns = ["Subset","Mean"]
target_means = target_means.sort_values(by = "Mean", ascending = False)
ci = {}
for column in targets.iloc[:,1:].columns:
    ci.update({column:mean_confidence_error(targets[column])})
ci = pd.DataFrame(list(ci.items()),columns = ['Subset','SE'])
target_means = pd.merge(target_means, ci, how = 'outer', on  = 'Subset')
prop_average = target_means[target_means["Subset"]=="Total: "]["Mean"].values[0]
Average = []
for i in target_means.Mean:
    if i > prop_average:
        Average.append("Above Average")
    elif i == prop_average:
        Average.append("Average")
    else:
        Average.append("Below Average")
target_means["Average"] = Average  
px.bar(target_means, x = "Subset", y = "Mean", color = "Average", error_y = "SE",title = "Target Means for each subset with Confidence Intervals")

In [56]:
# How many days did the sharks target?

columns = list(All_Sharks) 
  
for i in columns: 
    Days_Targeted = All_Sharks[i] > 0
    print(i, All_Sharks[i][Days_Targeted].count())

Ross 427
Chandler 413
BT1 431
BT2 430
BT3 429
BT4 429
BT5 418
GR1 352
GR2 375
GR3 394
GR4 353
GR5 376


In [57]:
# Proportion of Days Each Shark Targeted Out of Total Days
columns = list(All_Sharks)
proportions = []
sharks_target_proportion_per_day = {}
  
for i in columns: 
    Days_Targeted = All_Sharks[i] > 0
    prop = round((All_Sharks[i][Days_Targeted].count())/All_Sharks[i].count(), 3)
    proportions.append(prop)
proportions

[0.734,
 0.71,
 0.741,
 0.739,
 0.737,
 0.737,
 0.718,
 0.605,
 0.644,
 0.677,
 0.607,
 0.646]

In [58]:
proportions_dict = {'Shark' : columns,
                    'Proportions' : proportions}

sharks_target_proportion_per_day_df = pd.DataFrame(proportions_dict, columns = ['Shark', 'Proportions'])

proportions_sorted = sharks_target_proportion_per_day_df.sort_values(by=['Proportions'], ascending = False)
px.histogram(proportions_sorted, x="Shark", y="Proportions", 
             title = "Proportion of Days Where The Shark Targeted",
             labels={
                     "Shark": "Shark",
                     "Proportions": "Proportions",
                     })


In [59]:
# Look at the proportion of days eaten by each species

# Proportion of Days Each Shark Targeted Out of Total Days
subsets_targets = targets[['All_GR', 'All_BT', 'All_SS', 'male', 'female']]
columns = list(subsets_targets)

for i in columns: 
    Days_Targeted = subsets_targets[i] > 0
    print(i, subsets_targets[i][Days_Targeted].count())


All_GR 430
All_BT 437
All_SS 437
male 558
female 579


In [60]:
# Proportion of Days Each Shark Targeted Out of Total Days
proportions = []
  
for i in columns: 
    Days_Targeted = subsets_targets[i] > 0
    prop = round((subsets_targets[i][Days_Targeted].count())/subsets_targets[i].count(), 3)
    proportions.append(prop)
proportions



[0.739, 0.751, 0.751, 0.959, 0.995]

In [61]:
subset_props = {'Subsets': ['All_GR', 'All_BT', 'All_SS', 'male', 'female'],
        'proportions': proportions}

subset_props_df = pd.DataFrame(subset_props, columns = ['Subsets', 'proportions'])
subset_props_df

Unnamed: 0,Subsets,proportions
0,All_GR,0.739
1,All_BT,0.751
2,All_SS,0.751
3,male,0.959
4,female,0.995


In [62]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(subset_props_df.columns),
                fill_color='lightskyblue',
                align='left'),
    cells=dict(values=[subset_props_df.Subsets, subset_props_df.proportions],
               fill_color='lightcyan',
               align='left'))
])
fig.update_layout(width=500, height=400)
fig.show()

In [63]:
drop_prop_species = drop_prop_species.rename(columns = {'Proportion of Days Dropped':'Proportion_of_Days_Dropped'})

In [64]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(drop_prop_species.columns),
                fill_color='lightskyblue',
                align='left'),
    cells=dict(values=[drop_prop_species.Species, drop_prop_species.Proportion_of_Days_Dropped],
               fill_color='lightcyan',
               align='left'))
])
fig.update_layout(width=550, height=400)
fig.show()

In [65]:
drop_prop_gender=drop_prop_gender.rename(columns = {'Proportion of Days Dropped':'Proportion_of_Days_Dropped'})

In [66]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(drop_prop_gender.columns),
                fill_color='lightskyblue',
                align='left'),
    cells=dict(values=[drop_prop_gender.Gender, drop_prop_gender.Proportion_of_Days_Dropped],
               fill_color='lightcyan',
               align='left'))
])
fig.update_layout(width=550, height=400)
fig.show()

## Looking at the data through time

In [67]:
targets_by_month = targets.groupby(by = [pd.to_datetime(drops['Drops']).dt.year,pd.to_datetime(targets['Date']).dt.month]).sum().reset_index()
targets_by_month["Dates"] = pd.to_datetime(targets_by_month["Drops"].astype(str)+'-'+targets_by_month["Date"].astype(str))
targets_by_month = targets_by_month.iloc[:,[14,20]]
#This line is in case you want to show multiple sharks or groups
#drops_by_month = pd.melt(drops_by_month, id_vars = "Dates", var_name = "Shark", value_name = "Total Dropped")
#This line graph is also shown on the overall line graph but if you want you can look at it too
px.line(targets_by_month, x = "Dates", y = "Total: ", title = "Total Targets Through Time")

In [68]:
eating_by_month = eat.groupby(by = [pd.to_datetime(drops['Drops']).dt.year,pd.to_datetime(targets['Date']).dt.month]).sum().reset_index()
eating_by_month["Dates"] = pd.to_datetime(eating_by_month["Drops"].astype(str)+'-'+eating_by_month["Date"].astype(str))
eating_by_month = eating_by_month.iloc[:,[14,21]]
#This line is in case you want to show multiple sharks or groups
#drops_by_month = pd.melt(drops_by_month, id_vars = "Dates", var_name = "Shark", value_name = "Total Dropped")
#This line graph is also shown on the overall line graph but if you want you can look at it too
px.line(eating_by_month, x = "Dates", y = "Total: ", title = "Total Pieces Eaten Through Time")

In [69]:
drops_by_month = drops.groupby(by = [pd.to_datetime(eat['Date']).dt.year,pd.to_datetime(drops['Drops']).dt.month]).sum().reset_index()
drops_by_month["Dates"] = pd.to_datetime(drops_by_month["Date"].astype(str)+'-'+drops_by_month["Drops"].astype(str))
drops_by_month = drops_by_month.iloc[:,[14,20]]
#This line is in case you want to show multiple sharks or groups
#drops_by_month = pd.melt(drops_by_month, id_vars = "Dates", var_name = "Shark", value_name = "Total Dropped")
#This line graph is also shown on the overall line graph but if you want you can look at it too
px.line(drops_by_month, x = "Dates", y = "Total: ", title = "Total Drops Through Time")

In [70]:
all_by_month = pd.merge(pd.merge(eating_by_month,drops_by_month, how = 'outer', on = "Dates"),targets_by_month, how = 'outer', on = "Dates").rename(columns = {"Total: ":"Targets","Total: _x":"Eaten Pieces","Total: _y":"Dropped"})
all_by_month = pd.melt(all_by_month, id_vars = "Dates", var_name = "Variable", value_name = "Total")
px.line(all_by_month, x = "Dates", y = "Total", color = "Variable", title = "How Has the Sharks Changed Over Time With Eating, Targeting, and Dropping?")

## Here is how long the sharks are fasting

In [71]:
fast_days = pd.DataFrame()
fast_data = eat.iloc[:,1:-8]
for i in fast_data.columns:
    data = fast_data[i]
    data = pd.concat([eat.Date,data], axis = 1)
    fast = []
    row_num = -1
    for pieces in data.iloc[:,1]:
        row_num +=1
        if pieces > 0:
            fast.append(data.iloc[row_num,0])
    fast = pd.Series(fast, name = i).diff(1).dt.days
    fast_days = pd.concat([fast_days, fast], axis = 1)
fast_days = fast_days[1:]
fast_days

Unnamed: 0,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5
1,3.0,5.0,5.0,2.0,5.0,5.0,7.0,5.0,2.0,4.0,4.0,9.0
2,2.0,2.0,4.0,3.0,2.0,2.0,2.0,5.0,7.0,3.0,3.0,2.0
3,2.0,5.0,3.0,2.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,7.0
4,3.0,2.0,2.0,2.0,2.0,3.0,2.0,7.0,3.0,5.0,5.0,3.0
5,2.0,2.0,7.0,3.0,2.0,4.0,2.0,7.0,4.0,2.0,4.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
372,3.0,,,,,,,,,,,
373,3.0,,,,,,,,,,,
374,2.0,,,,,,,,,,,
375,5.0,,,,,,,,,,,


In [72]:
fast_days.describe().loc[["count","mean","max"]]

Unnamed: 0,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5
count,376.0,336.0,323.0,354.0,330.0,366.0,369.0,281.0,285.0,306.0,317.0,312.0
mean,2.731383,3.0625,3.185759,2.90678,3.118182,2.811475,2.788618,3.647687,3.585965,3.352941,3.246057,3.288462
max,9.0,12.0,14.0,14.0,14.0,9.0,7.0,24.0,21.0,17.0,17.0,23.0


In [73]:
fast_days_means = pd.melt(fast_days.describe().loc[["mean"]], var_name = "Shark", value_name = "Mean").sort_values(by = "Mean", ascending = False)
fast_days_means["Mean"] = round(fast_days_means["Mean"],2)
fast_days_means

Unnamed: 0,Shark,Mean
7,GR1,3.65
8,GR2,3.59
9,GR3,3.35
11,GR5,3.29
10,GR4,3.25
2,BT1,3.19
4,BT3,3.12
1,Chandler,3.06
3,BT2,2.91
5,BT4,2.81


In [74]:
px.bar(fast_days_means, x = "Shark", y = "Mean", title = "How long are the sharks fasting on average?")
print(fast_days_means.min())
print(fast_days_means.max())

Shark     BT1
Mean     2.73
dtype: object
Shark    Ross
Mean     3.65
dtype: object


In [75]:
fast_days_max = pd.melt(fast_days.describe().loc[["max"]], var_name = "Shark", value_name = "Days").sort_values(by = "Days", ascending = False)
fast_days_max["Days"] = round(fast_days_max["Days"],2)
px.bar(fast_days_max, x = "Shark", y = "Days", title = "What is the longest that each shark will go fasting?")

In [76]:
px.box(pd.melt(fast_days, var_name = "Shark", value_name = "Days"), x = "Shark", y = "Days", category_orders = {"Shark":fast_days_max["Shark"]}, title = "Side by Side Barplots of Each Shark's Fasting")

In [77]:
px.box(pd.melt(fast_days, var_name = "Shark", value_name = "Days"), x = "Shark", y = "Days", category_orders = {"Shark":fast_days_means["Shark"]}, title = "Side by Side Barplots of Each Shark's Fasting")

## Correlations between the groups

In [78]:
#This shows all the correlations for each group
drop_target = {}
drop_eat = {}
eat_target = {}
for column in drops.columns[1:]:
    drop_target.update({column:drops[column].corr(targets[column])})
    drop_eat.update({column:drops[column].corr(eat[column])})
    eat_target.update({column:eat[column].corr(targets[column])})
drop_target = pd.DataFrame(list(drop_target.items()),columns = ['Subset','Correlation of Drops and Targets'])
drop_eat = pd.DataFrame(list(drop_eat.items()),columns = ['Subset','Correlation of Drops and Eating'])
eat_target = pd.DataFrame(list(eat_target.items()),columns = ['Subset','Correlation of Eating and Targets'])
correlations = round(pd.merge(pd.merge(drop_target, drop_eat, how = 'outer', on = 'Subset'),eat_target, how = 'outer', on = 'Subset'),2)

In [79]:
correlations

Unnamed: 0,Subset,Correlation of Drops and Targets,Correlation of Drops and Eating,Correlation of Eating and Targets
0,Ross,0.14,0.11,0.57
1,Chandler,0.29,0.21,0.64
2,BT1,0.34,0.23,0.44
3,BT2,0.29,0.17,0.5
4,BT3,0.29,0.16,0.52
5,BT4,0.22,0.16,0.54
6,BT5,0.32,0.3,0.63
7,GR1,0.13,0.15,0.46
8,GR2,0.16,0.16,0.52
9,GR3,0.27,0.21,0.57


In [80]:
correlations.iloc[:12].describe().loc[["mean"]]

Unnamed: 0,Correlation of Drops and Targets,Correlation of Drops and Eating,Correlation of Eating and Targets
mean,0.23,0.175,0.5375


In [81]:
overall_corr = pd.concat([pd.Series(["Dropping:Eating","Dropping:Targeting","Eating:Targeting"], name = "Variable"),pd.Series([round(drops["Total: "].corr(eat["Total: "]),2),round(drops["Total: "].corr(targets["Total: "]),2),round(targets["Total: "].corr(eat["Total: "]),2)], name = "Correlations")], axis = 1)
overall_corr

Unnamed: 0,Variable,Correlations
0,Dropping:Eating,0.27
1,Dropping:Targeting,0.34
2,Eating:Targeting,0.67


In [82]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(overall_corr.columns),
                fill_color='lightskyblue',
                align='left'),
    cells=dict(values=[overall_corr.Variable, overall_corr.Correlations],
               fill_color='lightcyan',
               align='left'))
])
fig.update_layout(width=550, height=400)
fig.show()