# Notebook showing how to correctly calculate CPUE for CCFRP data

In [1]:
# Imports

import numpy as np
import pandas as pd

In [3]:
# Load data

occ = pd.read_csv('CCFRP_grid-level_occurrence.csv')
mof = pd.read_csv('CCFRP_grid-level_mof.csv', sep=',')

ParserError: Error tokenizing data. C error: Expected 4 fields in line 480633, saw 5


**Note** that I've now included an `organismQuantity` column in occ that contains CPUE for each species during each survey. This was previously only in the MoF file, requiring a merge to get it paired with other survey-level data.

In [4]:
occ.head()

Unnamed: 0,eventID,eventDate,datasetID,locality,locationRemarks,countryCode,footprintWKT,decimalLatitude,decimalLongitude,coordinateUncertaintyInMeters,...,scientificName,scientificNameID,taxonID,nameAccordingTo,identificationQualifier,occurrenceStatus,basisOfRecord,individualCount,organismQuantity,organismQuantityType
0,AIM09082002,2020-09-08,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01885605 -119.3689897, 34.0219688...",34.020412,-119.372326,354,...,Paralabrax nebulifer,urn:lsid:marinespecies.org:taxname:282059,282059,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
1,AIM09082002,2020-09-08,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01885605 -119.3689897, 34.0219688...",34.020412,-119.372326,354,...,Myliobatis californica,urn:lsid:marinespecies.org:taxname:271485,271485,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
2,AIM09082002,2020-09-08,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01885605 -119.3689897, 34.0219688...",34.020412,-119.372326,354,...,Hippoglossina stomata,urn:lsid:marinespecies.org:taxname:275827,275827,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
3,AIM09082002,2020-09-08,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01885605 -119.3689897, 34.0219688...",34.020412,-119.372326,354,...,Sebastes chrysomelas,urn:lsid:marinespecies.org:taxname:240737,240737,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
4,AIM09082002,2020-09-08,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01885605 -119.3689897, 34.0219688...",34.020412,-119.372326,354,...,Sebastes melanops,urn:lsid:marinespecies.org:taxname:274817,274817,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...


In this data set `eventID` contains all the information about when and where a survey was done (site code, date, grid cell number). You'll see that for each survey, the number caught of 93 different species of fish is recorded.

In [10]:
## 93 fish species were looked for per survey

num_sp_looked_for = occ.groupby('eventID')['scientificName'].count()
any(num_sp_looked_for != 93)

False

In [11]:
## Here they are

occ['scientificName'].unique()

array(['Paralabrax nebulifer', 'Myliobatis californica',
       'Hippoglossina stomata', 'Sebastes chrysomelas',
       'Sebastes melanops', 'Chromis punctipinnis', 'Sebastes mystinus',
       'Sebastes paucispinis', 'Sebastes auriculatus', 'Enophrys bison',
       'Enophrys taurina', 'Scorpaenichthys marmoratus', 'Sebastes dalli',
       'Paralichthys californicus', 'Synodus lucioceps',
       'Scorpaena guttata', 'Semicossyphus pulcher', 'Sebastes pinniger',
       'Sebastes goodei', 'Sebastes nebulosus',
       'Oncorhynchus tshawytscha', 'Alopias vulpinus',
       'Sebastes caurinus', 'Sebastes diaconus', 'Xystreurys liolepis',
       'Sebastes lentiginosus', 'Hypsypops rubicundus',
       'Stereolepis gigas', 'Sebastes carnatus', 'Sebastes rastrelliger',
       'Medialuna californiensis', 'Sebastes umbrosus',
       'Trachurus symmetricus', 'Atherinopsis californiensis',
       'Paralabrax clathratus', 'Hexagrammos decagrammus',
       'Sebastes atrovirens', 'Ophiodon elongatus',


**Note** that this list contains only 91 names. There are two rows in each survey where `scientificName` = Sebastes and Sebastes mystinus due to uncertainty in naming categories in the original data (e.g. when a fish could have been either S. serranoides or S. flavidus, that gets generalized to Sebastes).

```python
occ[(occ['eventID'] == 'AIM09082002') & (occ['scientificName'] == 'Sebastes mystinus')]
```

In [22]:
## Calculate CPUE for one survey

occ.loc[occ['eventID'] == 'AIM09082002', 'organismQuantity'].sum()

23.190184048000003

In [24]:
## For all surveys

occ.groupby('eventID', as_index=False)['organismQuantity'].sum()

Unnamed: 0,eventID,organismQuantity
0,AIM09082002,23.190184
1,AIM09082006,16.408163
2,AIM09102005,21.333333
3,AIM09102008,7.272727
4,AIM09152001,18.981233
...,...,...
2408,TMR09061721,2.000000
2409,TMR10311702,5.666667
2410,TMR10311713,8.426966
2411,TMR10311722,17.865169


In [46]:
## So that we can see the place, date, etc. separated out

cpue = occ.groupby(['eventID', 'locality', 'locationRemarks'], as_index=False)['organismQuantity'].sum()
cpue['year'] = cpue['eventID'].str[7:9]
cpue['gridCellID'] = cpue['eventID'].str[9:]
cpue

Unnamed: 0,eventID,locality,locationRemarks,organismQuantity,year,gridCellID
0,AIM09082002,Anacapa Island,marine protected area,23.190184,20,02
1,AIM09082006,Anacapa Island,marine protected area,16.408163,20,06
2,AIM09102005,Anacapa Island,marine protected area,21.333333,20,05
3,AIM09102008,Anacapa Island,marine protected area,7.272727,20,08
4,AIM09152001,Anacapa Island,marine protected area,18.981233,20,01
...,...,...,...,...,...,...
2408,TMR09061721,Ten Mile,fished area,2.000000,17,21
2409,TMR10311702,Ten Mile,fished area,5.666667,17,02
2410,TMR10311713,Ten Mile,fished area,8.426966,17,13
2411,TMR10311722,Ten Mile,fished area,17.865169,17,22


In [47]:
cpue[(cpue['locality'] == 'Anacapa Island') & (cpue['year'] == '19')]

Unnamed: 0,eventID,locality,locationRemarks,organismQuantity,year,gridCellID
8,AIM09171903,Anacapa Island,marine protected area,22.044728,19,3
9,AIM09171904,Anacapa Island,marine protected area,18.03681,19,4
12,AIM09181901,Anacapa Island,marine protected area,21.391304,19,1
13,AIM09181905,Anacapa Island,marine protected area,23.147793,19,5
14,AIM09181907,Anacapa Island,marine protected area,10.337838,19,7
15,AIM09191901,Anacapa Island,marine protected area,20.055249,19,1
16,AIM09191906,Anacapa Island,marine protected area,2.818792,19,6
19,AIM10091905,Anacapa Island,marine protected area,22.008197,19,5
20,AIM10091909,Anacapa Island,marine protected area,6.230769,19,9
23,AIM10231904,Anacapa Island,marine protected area,16.704197,19,4


In [48]:
cpue_mapper = occ.groupby(['eventID', 'locality', 'locationRemarks'], as_index=False)['organismQuantity'].mean()
cpue_mapper['year'] = cpue_mapper['eventID'].str[7:9]
cpue_mapper['gridCellID'] = cpue_mapper['eventID'].str[9:]
cpue_mapper

Unnamed: 0,eventID,locality,locationRemarks,organismQuantity,year,gridCellID
0,AIM09082002,Anacapa Island,marine protected area,0.249357,20,02
1,AIM09082006,Anacapa Island,marine protected area,0.176432,20,06
2,AIM09102005,Anacapa Island,marine protected area,0.229391,20,05
3,AIM09102008,Anacapa Island,marine protected area,0.078201,20,08
4,AIM09152001,Anacapa Island,marine protected area,0.204099,20,01
...,...,...,...,...,...,...
2408,TMR09061721,Ten Mile,fished area,0.021505,17,21
2409,TMR10311702,Ten Mile,fished area,0.060932,17,02
2410,TMR10311713,Ten Mile,fished area,0.090613,17,13
2411,TMR10311722,Ten Mile,fished area,0.192099,17,22


In [49]:
cpue_mapper[(cpue_mapper['locality'] == 'Anacapa Island') & (cpue_mapper['year'] == '19')]

Unnamed: 0,eventID,locality,locationRemarks,organismQuantity,year,gridCellID
8,AIM09171903,Anacapa Island,marine protected area,0.23704,19,3
9,AIM09171904,Anacapa Island,marine protected area,0.193944,19,4
12,AIM09181901,Anacapa Island,marine protected area,0.230014,19,1
13,AIM09181905,Anacapa Island,marine protected area,0.248901,19,5
14,AIM09181907,Anacapa Island,marine protected area,0.11116,19,7
15,AIM09191901,Anacapa Island,marine protected area,0.215648,19,1
16,AIM09191906,Anacapa Island,marine protected area,0.03031,19,6
19,AIM10091905,Anacapa Island,marine protected area,0.236647,19,5
20,AIM10091909,Anacapa Island,marine protected area,0.066998,19,9
23,AIM10231904,Anacapa Island,marine protected area,0.179615,19,4


In [56]:
## But in the mapper 

test = occ[(occ['locality'] == 'Anacapa Island') & 
    (occ['locationRemarks'] == 'marine protected area') & 
    (occ['eventDate'] > '2019-01-01') &
    (occ['eventDate'] < '2019-12-31') &
    (occ['decimalLatitude'] == 34.01169968)
          ]

test

Unnamed: 0,eventID,eventDate,datasetID,locality,locationRemarks,countryCode,footprintWKT,decimalLatitude,decimalLongitude,coordinateUncertaintyInMeters,...,scientificName,scientificNameID,taxonID,nameAccordingTo,identificationQualifier,occurrenceStatus,basisOfRecord,individualCount,organismQuantity,organismQuantityType
837,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Paralabrax nebulifer,urn:lsid:marinespecies.org:taxname:282059,282059,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
838,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Myliobatis californica,urn:lsid:marinespecies.org:taxname:271485,271485,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
839,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Hippoglossina stomata,urn:lsid:marinespecies.org:taxname:275827,275827,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
840,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Sebastes chrysomelas,urn:lsid:marinespecies.org:taxname:240737,240737,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
841,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Sebastes melanops,urn:lsid:marinespecies.org:taxname:274817,274817,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2227,AIM10231904,2019-10-23,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Anarrhichthys ocellatus,urn:lsid:marinespecies.org:taxname:279605,279605,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
2228,AIM10231904,2019-10-23,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Sebastes ruberrimus,urn:lsid:marinespecies.org:taxname:274844,274844,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
2229,AIM10231904,2019-10-23,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Umbrina roncador,urn:lsid:marinespecies.org:taxname:273802,273802,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
2230,AIM10231904,2019-10-23,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Seriola lalandi,urn:lsid:marinespecies.org:taxname:218436,218436,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...


In [60]:
d = test[test['eventDate'] == '2019-09-17']
d

Unnamed: 0,eventID,eventDate,datasetID,locality,locationRemarks,countryCode,footprintWKT,decimalLatitude,decimalLongitude,coordinateUncertaintyInMeters,...,scientificName,scientificNameID,taxonID,nameAccordingTo,identificationQualifier,occurrenceStatus,basisOfRecord,individualCount,organismQuantity,organismQuantityType
837,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Paralabrax nebulifer,urn:lsid:marinespecies.org:taxname:282059,282059,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
838,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Myliobatis californica,urn:lsid:marinespecies.org:taxname:271485,271485,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
839,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Hippoglossina stomata,urn:lsid:marinespecies.org:taxname:275827,275827,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
840,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Sebastes chrysomelas,urn:lsid:marinespecies.org:taxname:240737,240737,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
841,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Sebastes melanops,urn:lsid:marinespecies.org:taxname:274817,274817,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
925,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Anarrhichthys ocellatus,urn:lsid:marinespecies.org:taxname:279605,279605,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
926,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Sebastes ruberrimus,urn:lsid:marinespecies.org:taxname:274844,274844,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
927,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Umbrina roncador,urn:lsid:marinespecies.org:taxname:273802,273802,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...
928,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Seriola lalandi,urn:lsid:marinespecies.org:taxname:218436,218436,WoRMS,,absent,HumanObservation,0,0.0,Catch-Per-Unit-Effort in number of fishes caug...


In [61]:
d['organismQuantity'].mean()

0.19394419156989248

In [62]:
d['organismQuantity'].sum()

18.036809816

In [65]:
d['organismQuantity'].max()

14.72392638

In [66]:
d['organismQuantity'].min()

0.0

In [69]:
not_zero = d[d['organismQuantity'] > 0]
not_zero

Unnamed: 0,eventID,eventDate,datasetID,locality,locationRemarks,countryCode,footprintWKT,decimalLatitude,decimalLongitude,coordinateUncertaintyInMeters,...,scientificName,scientificNameID,taxonID,nameAccordingTo,identificationQualifier,occurrenceStatus,basisOfRecord,individualCount,organismQuantity,organismQuantityType
853,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Semicossyphus pulcher,urn:lsid:marinespecies.org:taxname:282753,282753,WoRMS,,present,HumanObservation,3,0.552147,Catch-Per-Unit-Effort in number of fishes caug...
867,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Medialuna californiensis,urn:lsid:marinespecies.org:taxname:281541,281541,WoRMS,,present,HumanObservation,1,0.184049,Catch-Per-Unit-Effort in number of fishes caug...
871,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Paralabrax clathratus,urn:lsid:marinespecies.org:taxname:282054,282054,WoRMS,,present,HumanObservation,14,2.576687,Catch-Per-Unit-Effort in number of fishes caug...
878,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Caulolatilus princeps,urn:lsid:marinespecies.org:taxname:276217,276217,WoRMS,,present,HumanObservation,80,14.723926,Catch-Per-Unit-Effort in number of fishes caug...


In [70]:
not_zero['organismQuantity'].min()

0.18404908

In [72]:
not_zero['organismQuantity'].mean()

4.509202454

In [73]:
test[test['scientificName'] == 'Caulolatilus princeps']

Unnamed: 0,eventID,eventDate,datasetID,locality,locationRemarks,countryCode,footprintWKT,decimalLatitude,decimalLongitude,coordinateUncertaintyInMeters,...,scientificName,scientificNameID,taxonID,nameAccordingTo,identificationQualifier,occurrenceStatus,basisOfRecord,individualCount,organismQuantity,organismQuantityType
878,AIM09171904,2019-09-17,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Caulolatilus princeps,urn:lsid:marinespecies.org:taxname:276217,276217,WoRMS,,present,HumanObservation,80,14.723926,Catch-Per-Unit-Effort in number of fishes caug...
2180,AIM10231904,2019-10-23,CCFRP,Anacapa Island,marine protected area,US,"POLYGON ((34.01392365 -119.38961, 34.00950241 ...",34.0117,-119.392358,354,...,Caulolatilus princeps,urn:lsid:marinespecies.org:taxname:276217,276217,WoRMS,,present,HumanObservation,116,14.247697,Catch-Per-Unit-Effort in number of fishes caug...


In [74]:
116+80

196