In [1]:
# Import Packages
import pandas as pd
from patsy import dmatrices
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import datetime

In [2]:
# Read in shark data
eat = pd.read_excel("SharkData.xlsx", sheet_name = 0)
targets = pd.read_excel("SharkData.xlsx", sheet_name = 1)
drops = pd.read_excel("SharkData.xlsx", sheet_name = 2)
other_factors = pd.read_excel("SharkData.xlsx", sheet_name = 3)

# Data Cleaning

In [3]:
# Clean eating dataset
eat = eat.iloc[1:,:-1]
eat.rename(columns = {"Unnamed: 14":'Etc. Comments', "Pieces Eaten": "Date", 'Total: ' : 'Total'}, inplace = True)

#get rid of null values at end of dataset
eat = eat.head(582)

# Change date to just date format, not datetime
eat['Date'] = pd.to_datetime(eat['Date']).dt.date



eat.reset_index(drop=True,inplace = True)
eat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total,Etc. Comments
0,2017-12-05,0.0,5.0,3.0,6.0,5.0,4.0,4.0,0.0,0.0,2.0,3.0,1.0,33.0,
1,2017-12-07,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,
2,2017-12-09,,,,,,,,2.0,0.0,3.0,1.0,0.0,6.0,
3,2017-12-10,5.0,1.0,1.0,3.0,2.0,2.0,0.0,,,,,,14.0,
4,2017-12-12,2.0,2.0,0.0,5.0,2.0,4.0,5.0,0.0,2.0,3.0,2.0,0.0,27.0,


In [4]:
# Clean Drops Dataset
drops = drops.iloc[1:,:-1]
drops['Drops'] = pd.to_datetime(drops['Drops']).dt.date
drops.rename(columns = {"Drops" : "Date", "Total: ":"Total"}, inplace = True)
drops.reset_index(drop=True,inplace = True)
drops.tail()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total
577,2020-09-22,0.0,0.0,1.0,4.0,0.0,2.0,2.0,0.0,0.0,0.0,1.0,0.0,10.0
578,2020-09-24,0.0,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,7.0
579,2020-09-26,,,,,,,,0.0,0.0,0.0,0.0,1.0,1.0
580,2020-09-27,0.0,0.0,2.0,0.0,0.0,0.0,1.0,,,,,,3.0
581,2020-09-29,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,4.0


In [5]:
# Clean Targets Dataset
targets = targets.iloc[1:,:-1]
targets.rename(columns = {"Unnamed: 14":'Etc. Comments', "Targets": "Date", "Total: ": "Total"}, inplace = True)

#get rid of null values at end of dataset
targets = targets.head(582)

# Change date to just date format, not datetime
targets['Date'] = pd.to_datetime(targets['Date']).dt.date

targets.reset_index(drop=True,inplace = True)
targets.tail()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,GR3,GR4,GR5,Total
577,2020-09-22,6.0,1.0,4.0,6.0,5.0,4.0,7.0,2.0,1.0,5.0,4.0,1.0,46.0
578,2020-09-24,8.0,18.0,2.0,9.0,10.0,10.0,11.0,5.0,6.0,3.0,7.0,9.0,98.0
579,2020-09-26,,,,,,,,6.0,2.0,3.0,5.0,8.0,24.0
580,2020-09-27,10.0,19.0,9.0,10.0,8.0,16.0,3.0,,,,,,75.0
581,2020-09-29,15.0,5.0,6.0,6.0,4.0,4.0,7.0,4.0,1.0,4.0,4.0,1.0,61.0


# Other Factors Data Cleaning and Dummy Variable Creation for Foods


In [6]:
dummy_factors = other_factors[['Saury', 'Blue Runner',
                'Squid', 'Mackerel', 'Herring', 'Sardine',
                'Mazuri Vitamins', 'Garlic', 'Salmon', 'Bonito', 'Bluefish', 'Mahi',
                    'Goggle Eye', 'Humbolt Squid']]
other_dummies = other_factors[['BT/SB Location (1-6)','GR Location (1-6)']]
other_dummies.rename(columns= {"BT/SB Location (1-6)": "BT_SB_Location", "GR Location (1-6)": "GR_Location"}, inplace = True)
for i in dummy_factors:
    dummy_factors[i] = dummy_factors[i].fillna(0)
dummy_factors.rename(columns = {"Blue Runner": "Blue_Runner", "Mazuri Vitamins": "Mazuri_Vitamins", 
                               "Goggle Eye": "Goggle_Eye", "Humbolt Squid": "Humbolt_Squid"}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dummy_factors[i] = dummy_factors[i].fillna(0)


# Combine Datasets to get all factors to perform regression on

In [7]:
# Add seasons variables
def season_of_date(date):
    date = pd.to_datetime(date)
    year = str(date.year)
    seasons = {'spring': pd.date_range(start=year+'/03/21', end=year+'/06/20'),
               'summer': pd.date_range(start=year+'/06/21', end=year+'/09/22'),
               'fall': pd.date_range(start=year+'/09/23', end=year+'/12/20')}
    if date in seasons['spring']:
        return 'spring'
    if date in seasons['summer']:
        return 'summer'
    if date in seasons['fall']:
        return 'fall'
    else:
        return 'winter'

In [8]:
eat_summer = eat[eat.Date.map(season_of_date)=="summer"]
eat_summer['summer'] = 1
eat_summer['fall'] = 0
eat_summer['spring'] = 0
eat_fall = eat[eat.Date.map(season_of_date)=="fall"]
eat_fall['summer'] = 0
eat_fall['fall'] = 1
eat_fall['spring'] = 0
eat_winter = eat[eat.Date.map(season_of_date)=="winter"]
eat_winter['summer'] = 0
eat_winter['fall'] = 0
eat_winter['spring'] = 0
eat_spring = eat[eat.Date.map(season_of_date)=="spring"]
eat_spring['summer'] = 0
eat_spring['fall'] = 0
eat_spring['spring'] = 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eat_summer['summer'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eat_summer['fall'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eat_summer['spring'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the 

In [9]:
eat_all_seasons = pd.concat([eat_summer,eat_fall,eat_winter,eat_spring])
eat_all_seasons_sort = eat_all_seasons.sort_values(by=["Date"])
eat_season_vars = eat_all_seasons_sort[['summer', 'fall', 'spring']]

In [10]:
drops_summer = drops[eat.Date.map(season_of_date)=="summer"]
drops_summer['summer'] = 1
drops_summer['fall'] = 0
drops_summer['spring'] = 0
drops_fall = drops[eat.Date.map(season_of_date)=="fall"]
drops_fall['summer'] = 0
drops_fall['fall'] = 1
drops_fall['spring'] = 0
drops_winter = drops[eat.Date.map(season_of_date)=="winter"]
drops_winter['summer'] = 0
drops_winter['fall'] = 0
drops_winter['spring'] = 0
drops_spring = drops[eat.Date.map(season_of_date)=="spring"]
drops_spring['summer'] = 0
drops_spring['fall'] = 0
drops_spring['spring'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drops_summer['summer'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drops_summer['fall'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drops_summer['spring'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

Se

In [11]:
drops_all_seasons = pd.concat([drops_summer,drops_fall,drops_winter,drops_spring])
drops_all_seasons_sort = drops_all_seasons.sort_values(by=["Date"])
drops_season_vars = drops_all_seasons_sort[['summer', 'fall', 'spring']]

In [12]:
targets_summer = targets[eat.Date.map(season_of_date)=="summer"]
targets_summer['summer'] = 1
targets_summer['fall'] = 0
targets_summer['spring'] = 0
targets_fall = targets[eat.Date.map(season_of_date)=="fall"]
targets_fall['summer'] = 0
targets_fall['fall'] = 1
targets_fall['spring'] = 0
targets_winter = targets[eat.Date.map(season_of_date)=="winter"]
targets_winter['summer'] = 0
targets_winter['fall'] = 0
targets_winter['spring'] = 0
targets_spring = targets[eat.Date.map(season_of_date)=="spring"]
targets_spring['summer'] = 0
targets_spring['fall'] = 0
targets_spring['spring'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  targets_summer['summer'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  targets_summer['fall'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  targets_summer['spring'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value inste

In [13]:
targets_all_seasons = pd.concat([targets_summer,targets_fall,targets_winter,targets_spring])
targets_all_seasons_sort = targets_all_seasons.sort_values(by=["Date"])
targets_season_vars = targets_all_seasons_sort[['summer', 'fall', 'spring']]

In [14]:
# combine eat data
eat_concat1 = pd.concat([eat,dummy_factors,other_dummies, eat_season_vars], axis = 1)
eat_concat = pd.concat([eat_concat1, other_factors["Temperature"]], axis = 1)
eat_concat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,...,Bluefish,Mahi,Goggle_Eye,Humbolt_Squid,BT_SB_Location,GR_Location,summer,fall,spring,Temperature
0,2017-12-05,0.0,5.0,3.0,6.0,5.0,4.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.2
1,2017-12-07,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.2
2,2017-12-09,,,,,,,,2.0,0.0,...,0.0,0.0,0.0,0.0,,3.0,0,1,0,73.6
3,2017-12-10,5.0,1.0,1.0,3.0,2.0,2.0,0.0,,,...,0.0,0.0,0.0,0.0,1.0,,0,1,0,73.6
4,2017-12-12,2.0,2.0,0.0,5.0,2.0,4.0,5.0,0.0,2.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.4


In [120]:
GroupFeed = []
for row in drops.index:
    if drops.iloc[row,:].isna().sum() > 0:
        GroupFeed.append(0)
    else:
        GroupFeed.append(1)
GroupFeed

#you could do something like this to add it to a dataframe
#eat["GroupFeed"] = GroupFeed


[1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,


In [15]:
# Add eat variable for Covid shut down
start_date = pd.to_datetime('03-14-2020')
end_date = pd.to_datetime('05-09-2020')
conditions = [ (eat_concat['Date'] >= start_date) & (eat_concat['Date'] <= end_date),
              (eat_concat['Date'] < start_date) | (eat_concat['Date'] > end_date)]
values = [1, 0]
eat_concat["covid"] = np.select(conditions, values)

In [16]:
# Add eat variable for light training
light_start = pd.to_datetime('02-15-2018')
conditions_light = [ (eat_concat['Date'] >= light_start), (eat_concat['Date'] < light_start)]
values_light = [1, 0]
eat_concat["light_training"] = np.select(conditions_light, values_light)

In [17]:
# combine drops data
drops_concat1 = pd.concat([drops,dummy_factors,other_dummies, drops_season_vars], axis = 1)
drops_concat = pd.concat([drops_concat1, other_factors["Temperature"]], axis = 1)
drops_concat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,...,Bluefish,Mahi,Goggle_Eye,Humbolt_Squid,BT_SB_Location,GR_Location,summer,fall,spring,Temperature
0,2017-12-05,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.2
1,2017-12-07,0.0,1.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.2
2,2017-12-09,,,,,,,,0.0,1.0,...,0.0,0.0,0.0,0.0,,3.0,0,1,0,73.6
3,2017-12-10,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,...,0.0,0.0,0.0,0.0,1.0,,0,1,0,73.6
4,2017-12-12,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.4


In [18]:
# Add drop variable for Covid shut down
start_date = pd.to_datetime('03-14-2020')
end_date = pd.to_datetime('05-09-2020')
conditions = [ (drops_concat['Date'] >= start_date) & (drops_concat['Date'] <= end_date),
              (drops_concat['Date'] < start_date) | (drops_concat['Date'] > end_date)]
values = [1, 0]
drops_concat["covid"] = np.select(conditions, values)

In [19]:
# Add drop variable for light training
light_start = pd.to_datetime('02-15-2018')
conditions_light = [ (drops_concat['Date'] >= light_start), (drops_concat['Date'] < light_start)]
values_light = [1, 0]
drops_concat["light_training"] = np.select(conditions_light, values_light)

In [20]:
# combine Target data
targets_concat1 = pd.concat([targets,dummy_factors,other_dummies, targets_season_vars], axis = 1)
targets_concat = pd.concat([targets_concat1, other_factors["Temperature"]], axis = 1)
targets_concat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,...,Bluefish,Mahi,Goggle_Eye,Humbolt_Squid,BT_SB_Location,GR_Location,summer,fall,spring,Temperature
0,2017-12-05,3.0,0.0,4.0,3.0,0.0,2.0,2.0,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.2
1,2017-12-07,4.0,5.0,2.0,2.0,5.0,3.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.2
2,2017-12-09,,,,,,,,1.0,3.0,...,0.0,0.0,0.0,0.0,,3.0,0,1,0,73.6
3,2017-12-10,4.0,7.0,0.0,2.0,2.0,0.0,0.0,,,...,0.0,0.0,0.0,0.0,1.0,,0,1,0,73.6
4,2017-12-12,3.0,1.0,3.0,2.0,4.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,3.0,0,1,0,73.4


In [21]:
# Add target variable for Covid shut down
start_date = pd.to_datetime('03-14-2020')
end_date = pd.to_datetime('05-09-2020')
conditions = [ (targets_concat['Date'] >= start_date) & (targets_concat['Date'] <= end_date),
              (targets_concat['Date'] < start_date) | (targets_concat['Date'] > end_date)]
values = [1, 0]
targets_concat["covid"] = np.select(conditions, values)

In [22]:
# Add target variable for light training
light_start = pd.to_datetime('02-15-2018')
conditions_light = [ (targets_concat['Date'] >= light_start), (targets_concat['Date'] < light_start)]
values_light = [1, 0]
targets_concat["light_training"] = np.select(conditions_light, values_light)

# Subset Creation

In [23]:

# All SS Subset
All_SS = eat_concat[['Ross', 'Chandler']]

# All BT Subset
All_BT = eat_concat[['BT1', 'BT2', 'BT3', 'BT4', 'BT5']]

# All GR Subset
All_GR = eat_concat[['GR1', 'GR2', 'GR3', 'GR4', 'GR5']]

# Male Subset
Male = eat_concat[["BT1","BT5","GR1","Ross","Chandler"]]

# Female Subset
female = eat_concat[["BT2","BT3","BT4","GR2","GR3","GR4","GR5"]]

# Append all subsets to original datasets
# eat dataset
eat_concat["All_GR"] = eat['GR1'] + eat['GR2'] + eat['GR3'] + eat['GR4'] + eat['GR5']
eat_concat["All_BT"] = eat['BT1'] + eat['BT2'] + eat['BT3'] + eat['BT4'] + eat['BT5']
eat_concat["All_SS"] = eat['Ross'] + eat['Chandler']
eat_concat["male"] = eat[["BT1","BT5","GR1","Ross","Chandler"]].sum(axis = 1)
eat_concat["female"] = eat[["BT2","BT3","BT4","GR2","GR3","GR4","GR5"]].sum(axis = 1)

# drop dataset
drops_concat["All_GR"] = drops['GR1'] + drops['GR2'] + drops['GR3'] + drops['GR4'] + drops['GR5']
drops_concat["All_BT"] = drops['BT1'] + drops['BT2'] + drops['BT3'] + drops['BT4'] + drops['BT5']
drops_concat["All_SS"] = drops['Ross'] + drops['Chandler']
drops_concat["male"] = drops[["BT1","BT5","GR1","Ross","Chandler"]].sum(axis = 1)
drops_concat["female"] = drops[["BT2","BT3","BT4","GR2","GR3","GR4","GR5"]].sum(axis = 1)

# target dataset
targets_concat["All_GR"] = targets['GR1'] + targets['GR2'] + targets['GR3'] + targets['GR4'] + targets['GR5']
targets_concat["All_BT"] = targets['BT1'] + targets['BT2'] + targets['BT3'] + targets['BT4'] + targets['BT5']
targets_concat["All_SS"] = targets['Ross'] + targets['Chandler']
targets_concat["male"] = targets[["BT1","BT5","GR1","Ross","Chandler"]].sum(axis = 1)
targets_concat["female"] = targets[["BT2","BT3","BT4","GR2","GR3","GR4","GR5"]].sum(axis = 1)


# Create Tables for Poisson Regression Time Series For Pieces Eaten For All Sharks

In [24]:
# create additional time variables for eating
eat_concat['Date'] = pd.to_datetime(eat_concat['Date'])
eat_concat['Day_of_week'] = pd.to_datetime(eat_concat['Date']).dt.dayofweek
eat_concat['Month'] = pd.to_datetime(eat_concat['Date']).dt.month
eat_concat['Day'] = pd.to_datetime(eat_concat['Date']).dt.day
eat_concat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,...,covid,light_training,All_GR,All_BT,All_SS,male,female,Day_of_week,Month,Day
0,2017-12-05,0.0,5.0,3.0,6.0,5.0,4.0,4.0,0.0,0.0,...,0,0,6.0,22.0,5.0,12.0,21.0,1,12,5
1,2017-12-07,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,2.0,1.0,1.0,2.0,3,12,7
2,2017-12-09,,,,,,,,2.0,0.0,...,0,0,6.0,,,2.0,4.0,5,12,9
3,2017-12-10,5.0,1.0,1.0,3.0,2.0,2.0,0.0,,,...,0,0,,8.0,6.0,7.0,7.0,6,12,10
4,2017-12-12,2.0,2.0,0.0,5.0,2.0,4.0,5.0,0.0,2.0,...,0,0,7.0,16.0,4.0,9.0,18.0,1,12,12


In [25]:
# Create training and testing datasets
mask = np.random.rand(len(eat_concat)) < 0.8
eat_train = eat_concat[mask]
eat_test = eat_concat[~mask]
print('Training data set length='+str(len(eat_train)))
print('Testing data set length='+str(len(eat_test)))

Training data set length=465
Testing data set length=117


In [26]:
# setup Poisson regression expression
expr = """Total ~ Day_of_week + Month + Day + Temperature + Saury + Blue_Runner + 
Squid + Mackerel + Herring + Sardine + Mazuri_Vitamins + Garlic + Salmon + Bonito + Bluefish +
Mahi + Goggle_Eye + Humbolt_Squid + BT_SB_Location + GR_Location + covid + light_training + summer + fall + spring"""

In [27]:
y_train, X_train = dmatrices(expr, eat_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, eat_test, return_type='dataframe')

In [28]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  Total   No. Observations:                  233
Model:                            GLM   Df Residuals:                      207
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -781.86
Date:                Sun, 06 Dec 2020   Deviance:                       388.56
Time:                        11:41:26   Pearson chi2:                     361.
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           2.3517      1.176     

Day of week, Month, temperature, Saury, Blue_runner, Herring, light training, summer, and fall were all significant predictors for the number of pieces eaten for all sharks

# Poisson Regression for Number of Drops for All Sharks

In [29]:
# create additional time variables for dropping
drops_concat['Date'] = pd.to_datetime(drops_concat['Date'])
drops_concat['Day_of_week'] = pd.to_datetime(drops_concat['Date']).dt.dayofweek
drops_concat['Month'] = pd.to_datetime(drops_concat['Date']).dt.month
drops_concat['Day'] = pd.to_datetime(drops_concat['Date']).dt.day
drops_concat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,...,covid,light_training,All_GR,All_BT,All_SS,male,female,Day_of_week,Month,Day
0,2017-12-05,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0,0,2.0,1.0,0.0,0.0,3.0,1,12,5
1,2017-12-07,0.0,1.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,...,0,0,2.0,7.0,1.0,4.0,6.0,3,12,7
2,2017-12-09,,,,,,,,0.0,1.0,...,0,0,1.0,,,0.0,1.0,5,12,9
3,2017-12-10,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,...,0,0,,1.0,0.0,1.0,0.0,6,12,10
4,2017-12-12,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0,0,1.0,2.0,0.0,0.0,3.0,1,12,12


In [30]:
# Create training and testing datasets
mask = np.random.rand(len(drops_concat)) < 0.8
drops_train = drops_concat[mask]
drops_test = drops_concat[~mask]
print('Training data set length='+str(len(drops_train)))
print('Testing data set length='+str(len(drops_test)))

Training data set length=466
Testing data set length=116


In [31]:
y_train, X_train = dmatrices(expr, drops_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, drops_test, return_type='dataframe')

In [32]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  Total   No. Observations:                  228
Model:                            GLM   Df Residuals:                      202
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -519.42
Date:                Sun, 06 Dec 2020   Deviance:                       325.00
Time:                        11:41:27   Pearson chi2:                     324.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          10.1988      2.985     

Day of week, Month, Temperature, Blue runner, Mackerel, Garlic, and Bluefish were all significant predictors for the number of drops for all sharks

# Poisson Regression for Number of Targets for All Sharks

In [33]:
# create additional time variables for Targeting
targets_concat['Date'] = pd.to_datetime(targets_concat['Date'])
targets_concat['Day_of_week'] = pd.to_datetime(targets_concat['Date']).dt.dayofweek
targets_concat['Month'] = pd.to_datetime(targets_concat['Date']).dt.month
targets_concat['Day'] = pd.to_datetime(targets_concat['Date']).dt.day
targets_concat.head()

Unnamed: 0,Date,Ross,Chandler,BT1,BT2,BT3,BT4,BT5,GR1,GR2,...,covid,light_training,All_GR,All_BT,All_SS,male,female,Day_of_week,Month,Day
0,2017-12-05,3.0,0.0,4.0,3.0,0.0,2.0,2.0,1.0,1.0,...,0,0,8.0,11.0,3.0,10.0,12.0,1,12,5
1,2017-12-07,4.0,5.0,2.0,2.0,5.0,3.0,5.0,0.0,0.0,...,0,0,4.0,17.0,9.0,16.0,14.0,3,12,7
2,2017-12-09,,,,,,,,1.0,3.0,...,0,0,8.0,,,1.0,7.0,5,12,9
3,2017-12-10,4.0,7.0,0.0,2.0,2.0,0.0,0.0,,,...,0,0,,4.0,11.0,11.0,4.0,6,12,10
4,2017-12-12,3.0,1.0,3.0,2.0,4.0,1.0,1.0,1.0,1.0,...,0,0,11.0,11.0,4.0,9.0,17.0,1,12,12


In [34]:
# Create training and testing datasets
mask = np.random.rand(len(targets_concat)) < 0.8
targets_train = targets_concat[mask]
targets_test = targets_concat[~mask]
print('Training data set length='+str(len(targets_train)))
print('Testing data set length='+str(len(targets_test)))

Training data set length=459
Testing data set length=123


In [35]:
y_train, X_train = dmatrices(expr, targets_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, targets_test, return_type='dataframe')

In [36]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  Total   No. Observations:                  229
Model:                            GLM   Df Residuals:                      203
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -1322.0
Date:                Sun, 06 Dec 2020   Deviance:                       1274.3
Time:                        11:41:28   Pearson chi2:                 1.29e+03
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           1.6985      0.793     

Day of week, Day of month, Blue runner, Mackerel, Herring, Mazuri vitamins, garlic, salmon, humbolt squid, covid, spring, light training were all significant predictors for number of targets for all sharks

# Poisson Regression Across Different Species of Sharks

# Sandbar Sharks

In [37]:
expr = """All_SS ~ Day_of_week + Month + Day + Temperature + Saury + Blue_Runner + 
Squid + Mackerel + Herring + Sardine + Mazuri_Vitamins + Garlic + Salmon + Bonito + Bluefish +
Mahi + Goggle_Eye + Humbolt_Squid + BT_SB_Location + GR_Location + covid + light_training + summer + fall + spring"""

In [38]:
eat_concatSS = eat_concat[eat_concat['All_SS'].notna()]

In [39]:
# Create training and testing datasets
mask = np.random.rand(len(eat_concatSS)) < 0.8
eat_train = eat_concatSS[mask]
eat_test = eat_concatSS[~mask]
print('Training data set length='+str(len(eat_train)))
print('Testing data set length='+str(len(eat_test)))

Training data set length=355
Testing data set length=84


Eating:

In [40]:
y_train, X_train = dmatrices(expr, eat_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, eat_test, return_type='dataframe')

In [41]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_SS   No. Observations:                  238
Model:                            GLM   Df Residuals:                      212
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -593.80
Date:                Sun, 06 Dec 2020   Deviance:                       412.34
Time:                        11:41:29   Pearson chi2:                     374.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           7.9037      2.705     

Garlic, Bonito, Bluefish, Location,summer, spring and light training were all significant predictors for the number of fish eaten for Sandbars

Dropping:

In [42]:
drops_concatSS = drops_concat[drops_concat['All_SS'].notna()]

In [43]:
# Create training and testing datasets
mask = np.random.rand(len(drops_concatSS)) < 0.8
drops_train = drops_concatSS[mask]
drops_test = drops_concatSS[~mask]
print('Training data set length='+str(len(drops_train)))
print('Testing data set length='+str(len(drops_test)))

Training data set length=350
Testing data set length=88


In [44]:
y_train, X_train = dmatrices(expr, drops_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, drops_test, return_type='dataframe')

In [45]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_SS   No. Observations:                  233
Model:                            GLM   Df Residuals:                      207
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -167.81
Date:                Sun, 06 Dec 2020   Deviance:                       207.82
Time:                        11:41:29   Pearson chi2:                     273.
No. Iterations:                    21                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           1.5048     10.804     

Goggle eye, spring, and light training were the only significant predictors for the number of drops for the Sandbar sharks

Targets:

In [46]:
targets_concatSS = targets_concat[targets_concat['All_SS'].notna()]

In [47]:
# Create training and testing datasets
mask = np.random.rand(len(targets_concatSS)) < 0.8
targets_train = targets_concatSS[mask]
targets_test = targets_concatSS[~mask]
print('Training data set length='+str(len(targets_train)))
print('Testing data set length='+str(len(targets_test)))

Training data set length=354
Testing data set length=83


In [48]:
y_train, X_train = dmatrices(expr, targets_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, targets_test, return_type='dataframe')

In [49]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_SS   No. Observations:                  230
Model:                            GLM   Df Residuals:                      204
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -944.44
Date:                Sun, 06 Dec 2020   Deviance:                       868.81
Time:                        11:41:30   Pearson chi2:                     837.
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           5.4861      1.575     

Mazuri Vitamins, garlic, salmon, bluefish, goggle eye, location, and light training were all significant predictors for number of targets for sandbar sharks

# Blacktip Sharks

Eating:

In [50]:
expr = """All_BT ~ Day_of_week + Month + Day + Temperature + Saury + Blue_Runner + 
Squid + Mackerel + Herring + Sardine + Mazuri_Vitamins + Garlic + Salmon + Bonito + Bluefish +
Mahi + Goggle_Eye + Humbolt_Squid + BT_SB_Location + GR_Location + covid + light_training + summer + fall + spring"""

In [51]:
eat_concatBT = eat_concat[eat_concat['All_BT'].notna()]

In [52]:
# Create training and testing datasets
mask = np.random.rand(len(eat_concatBT)) < 0.8
eat_train = eat_concatBT[mask]
eat_test = eat_concatBT[~mask]
print('Training data set length='+str(len(eat_train)))
print('Testing data set length='+str(len(eat_test)))

Training data set length=338
Testing data set length=98


In [53]:
y_train, X_train = dmatrices(expr, eat_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, eat_test, return_type='dataframe')

In [54]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_BT   No. Observations:                  234
Model:                            GLM   Df Residuals:                      208
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -708.97
Date:                Sun, 06 Dec 2020   Deviance:                       395.06
Time:                        11:41:31   Pearson chi2:                     391.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          -4.0286      1.684     

Temperature, Blue Runner, Mackerel, Mazuri vitamins, and light training were all significant predictors for pieces eaten for Black Tips

Dropping:

In [55]:
drops_concatBT = drops_concat[drops_concat['All_BT'].notna()]

In [56]:
# Create training and testing datasets
mask = np.random.rand(len(drops_concatBT)) < 0.8
drops_train = drops_concatBT[mask]
drops_test = drops_concatBT[~mask]
print('Training data set length='+str(len(drops_train)))
print('Testing data set length='+str(len(drops_test)))

Training data set length=359
Testing data set length=77


In [57]:
y_train, X_train = dmatrices(expr, drops_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, drops_test, return_type='dataframe')

In [58]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_BT   No. Observations:                  243
Model:                            GLM   Df Residuals:                      217
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -518.55
Date:                Sun, 06 Dec 2020   Deviance:                       359.25
Time:                        11:41:32   Pearson chi2:                     360.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           6.6594      3.165     

Day of week, Temperature, Saury, Garlic, Bluefish, humbolt squid, covid, and light training were all significant predictors for the number of drops for Black Tips.

Targeting:

In [59]:
targets_concatBT = targets_concat[targets_concat['All_BT'].notna()]

In [60]:
# Create training and testing datasets
mask = np.random.rand(len(targets_concatBT)) < 0.8
targets_train = targets_concatBT[mask]
targets_test = targets_concatBT[~mask]
print('Training data set length='+str(len(targets_train)))
print('Testing data set length='+str(len(targets_test)))

Training data set length=346
Testing data set length=91


In [61]:
y_train, X_train = dmatrices(expr, targets_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, targets_test, return_type='dataframe')

In [62]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_BT   No. Observations:                  229
Model:                            GLM   Df Residuals:                      203
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -1106.9
Date:                Sun, 06 Dec 2020   Deviance:                       994.61
Time:                        11:41:32   Pearson chi2:                     999.
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          -0.3366      1.078     

Month, Day of month, Saury, Squid, Mackerel, Herring, Salmon, Bonito, location, covid, summer, spring and light training were all significant predictors for the number of targets for Black Tips

# Gray Reef Sharks

Eating:

In [63]:
expr = """All_GR ~ Day_of_week + Month + Day + Temperature + Saury + Blue_Runner + 
Squid + Mackerel + Herring + Sardine + Mazuri_Vitamins + Garlic + Salmon + Bonito + Bluefish +
Mahi + Goggle_Eye + Humbolt_Squid + BT_SB_Location + GR_Location + covid + light_training + summer + fall + spring"""

In [64]:
eat_concatGR = eat_concat[eat_concat['All_GR'].notna()]

In [65]:
# Create training and testing datasets
mask = np.random.rand(len(eat_concatGR)) < 0.8
eat_train = eat_concatGR[mask]
eat_test = eat_concatGR[~mask]
print('Training data set length='+str(len(eat_train)))
print('Testing data set length='+str(len(eat_test)))

Training data set length=345
Testing data set length=92


In [66]:
y_train, X_train = dmatrices(expr, eat_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, eat_test, return_type='dataframe')

In [67]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_GR   No. Observations:                  239
Model:                            GLM   Df Residuals:                      213
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -653.85
Date:                Sun, 06 Dec 2020   Deviance:                       458.04
Time:                        11:41:33   Pearson chi2:                     410.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           1.4009      2.088     

Saury, Blue runner, Squid, Mackerel, Herring, Bonito, Humbolt Squid, summer, fall, and light training were all significant predictors for number of fish eaten for Gray Reefs

Dropping:

In [68]:
drops_concatGR = drops_concat[drops_concat['All_GR'].notna()]

In [69]:
# Create training and testing datasets
mask = np.random.rand(len(drops_concatGR)) < 0.8
drops_train = drops_concatGR[mask]
drops_test = drops_concatGR[~mask]
print('Training data set length='+str(len(drops_train)))
print('Testing data set length='+str(len(drops_test)))

Training data set length=348
Testing data set length=90


In [70]:
y_train, X_train = dmatrices(expr, drops_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, drops_test, return_type='dataframe')

In [71]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_GR   No. Observations:                  226
Model:                            GLM   Df Residuals:                      200
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -295.09
Date:                Sun, 06 Dec 2020   Deviance:                       311.50
Time:                        11:41:34   Pearson chi2:                     314.
No. Iterations:                     6                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           6.9423      6.763     

Blue runner and light training were the only significant predictors for number of drops for Gray Reefs

Targeting:

In [72]:
targets_concatGR = targets_concat[targets_concat['All_GR'].notna()]

In [73]:
# Create training and testing datasets
mask = np.random.rand(len(targets_concatGR)) < 0.8
targets_train = targets_concatGR[mask]
targets_test = targets_concatGR[~mask]
print('Training data set length='+str(len(targets_train)))
print('Testing data set length='+str(len(targets_test)))

Training data set length=346
Testing data set length=91


In [74]:
y_train, X_train = dmatrices(expr, targets_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, targets_test, return_type='dataframe')

In [75]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 All_GR   No. Observations:                  233
Model:                            GLM   Df Residuals:                      207
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -910.82
Date:                Sun, 06 Dec 2020   Deviance:                       810.45
Time:                        11:41:35   Pearson chi2:                     783.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          -4.6608      1.513     

Day of week, Temperature, Saury, Blue runner, Squid, Mackerel, Herring, Mazuri vitamins, Bluefish, humbolt squid, location, covid, fall, and light training were all significant predictors for the number of targets for Gray Reefs

# Poisson Regression for Each Gender of Shark

# Male

Eating:

In [76]:
expr = """male ~ Day_of_week + Month + Day + Temperature + Saury + Blue_Runner + 
Squid + Mackerel + Herring + Sardine + Mazuri_Vitamins + Garlic + Salmon + Bonito + Bluefish +
Mahi + Goggle_Eye + Humbolt_Squid + BT_SB_Location + GR_Location + covid + light_training + summer + fall + spring"""

In [77]:
eat_concatM = eat_concat[eat_concat['male'].notna()]

In [78]:
# Create training and testing datasets
mask = np.random.rand(len(eat_concatM)) < 0.8
eat_train = eat_concatM[mask]
eat_test = eat_concatM[~mask]
print('Training data set length='+str(len(eat_train)))
print('Testing data set length='+str(len(eat_test)))

Training data set length=468
Testing data set length=114


In [79]:
y_train, X_train = dmatrices(expr, eat_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, eat_test, return_type='dataframe')

In [80]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   male   No. Observations:                  239
Model:                            GLM   Df Residuals:                      213
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -671.55
Date:                Sun, 06 Dec 2020   Deviance:                       318.72
Time:                        11:41:35   Pearson chi2:                     305.
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           7.1089      1.769     

Month, Herring, Bluefish, BT_SB location, summer, and light training were all significant predictors for the number of fish eaten for male sharks

Dropping:

In [81]:
drops_concatM = drops_concat[drops_concat['male'].notna()]

In [82]:
# Create training and testing datasets
mask = np.random.rand(len(drops_concatM)) < 0.8
drops_train = drops_concatM[mask]
drops_test = drops_concatM[~mask]
print('Training data set length='+str(len(drops_train)))
print('Testing data set length='+str(len(drops_test)))

Training data set length=471
Testing data set length=111


In [83]:
y_train, X_train = dmatrices(expr, drops_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, drops_test, return_type='dataframe')

In [84]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   male   No. Observations:                  242
Model:                            GLM   Df Residuals:                      216
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -414.05
Date:                Sun, 06 Dec 2020   Deviance:                       339.54
Time:                        11:41:36   Pearson chi2:                     316.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          -1.6982      4.254     

Day of week, Squid, BT_SB location, and light training were all significant predictors for number of pieces dropped for male sharks

Targetting:

In [85]:
targets_concatM = targets_concat[targets_concat['male'].notna()]

In [86]:
# Create training and testing datasets
mask = np.random.rand(len(targets_concatM)) < 0.8
targets_train = targets_concatM[mask]
targets_test = targets_concatM[~mask]
print('Training data set length='+str(len(targets_train)))
print('Testing data set length='+str(len(targets_test)))

Training data set length=472
Testing data set length=110


In [87]:
y_train, X_train = dmatrices(expr, targets_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, targets_test, return_type='dataframe')

In [88]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   male   No. Observations:                  239
Model:                            GLM   Df Residuals:                      213
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -1109.0
Date:                Sun, 06 Dec 2020   Deviance:                       949.62
Time:                        11:41:37   Pearson chi2:                     949.
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           4.3328      1.010     

Sardine, Day of week, Month, Day of month, Squid, Mackerel, Herring, Mazuri Vitamins, Salmon, Bonito, Goggle eye, humbolt squid, BT_SB location, covid, spring, and light training were all significant predictors for the number of targets for male sharks

# Female

Eating:

In [89]:
expr = """female ~ Day_of_week + Month + Day + Temperature + Saury + Blue_Runner + 
Squid + Mackerel + Herring + Sardine + Mazuri_Vitamins + Garlic + Salmon + Bonito + Bluefish +
Mahi + Goggle_Eye + Humbolt_Squid + BT_SB_Location + GR_Location + covid + light_training + summer + fall + spring"""

In [90]:
eat_concatF = eat_concat[eat_concat['female'].notna()]

In [91]:
# Create training and testing datasets
mask = np.random.rand(len(eat_concatF)) < 0.8
eat_train = eat_concatF[mask]
eat_test = eat_concatF[~mask]
print('Training data set length='+str(len(eat_train)))
print('Testing data set length='+str(len(eat_test)))

Training data set length=468
Testing data set length=114


In [92]:
y_train, X_train = dmatrices(expr, eat_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, eat_test, return_type='dataframe')

In [93]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 female   No. Observations:                  228
Model:                            GLM   Df Residuals:                      202
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -678.32
Date:                Sun, 06 Dec 2020   Deviance:                       357.50
Time:                        11:41:38   Pearson chi2:                     338.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          -3.9935      1.725     

Sardine, Temperature, Saury, Blue runner, Mackerel, Herring, humbolt squid, summer, fall, and BT_SB locations were all significant predictors for number of pieces eaten for female sharks

Dropping:

In [94]:
drops_concatF = drops_concat[drops_concat['female'].notna()]

In [95]:
# Create training and testing datasets
mask = np.random.rand(len(drops_concatF)) < 0.8
drops_train = drops_concatF[mask]
drops_test = drops_concatF[~mask]
print('Training data set length='+str(len(drops_train)))
print('Testing data set length='+str(len(drops_test)))

Training data set length=466
Testing data set length=116


In [96]:
y_train, X_train = dmatrices(expr, drops_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, drops_test, return_type='dataframe')

In [97]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 female   No. Observations:                  234
Model:                            GLM   Df Residuals:                      208
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -449.83
Date:                Sun, 06 Dec 2020   Deviance:                       319.52
Time:                        11:41:38   Pearson chi2:                     306.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           8.2053      3.948     

Sardine, Day of week, Month, Temperature, Blue runner, Squid, Herring, Bluefish, BT_SB Location, GR_location, and covid were all significant predictors for the number of pieces dropped for female sharks

Targetting:

In [98]:
targets_concatF = targets_concat[targets_concat['female'].notna()]

In [99]:
# Create training and testing datasets
mask = np.random.rand(len(targets_concatF)) < 0.8
targets_train = targets_concatF[mask]
targets_test = targets_concatF[~mask]
print('Training data set length='+str(len(targets_train)))
print('Testing data set length='+str(len(targets_test)))

Training data set length=446
Testing data set length=136


In [100]:
y_train, X_train = dmatrices(expr, targets_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, targets_test, return_type='dataframe')

In [101]:
# Fit model and print results of model
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 female   No. Observations:                  221
Model:                            GLM   Df Residuals:                      195
Model Family:                 Poisson   Df Model:                           25
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -946.40
Date:                Sun, 06 Dec 2020   Deviance:                       727.13
Time:                        11:41:39   Pearson chi2:                     724.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          -3.1488      1.094     

Temperature, Blue runner, Mackerel, Herring, Bluefish, Humbolt squid, BT_SB location, spring, and light training were all significant predictors for the number of targets for female sharks