In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

sns.set_theme()

# jupyter notebook full-width display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# no text wrapping
display(HTML("<style>.dataframe td { white-space: nowrap; }</style>"))

# pandas formatting
pd.set_option('display.float_format', '{:.3f}'.format)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth', 400)

In [2]:
df_SD = pd.read_pickle('df_SD.pickle')
df_FD = pd.read_pickle('df_FD.pickle')
df_LF = pd.read_pickle('df_LF.pickle')
df_LF_grouped = pd.read_pickle('df_LF_grouped.pickle')
df_Site = pd.read_pickle('df_Site.pickle')
df_TrapSupervisors = pd.read_pickle('df_TrapSupervisors.pickle')

# checking stuff

### why don't these match?
* answer: they do, but there is no matching sample to link them

In [3]:
df_LF[(df_LF.yy==2019) & (df_LF.mm==5) & (df_LF.dd==28) & (df_LF.site=="47")]

Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
11195,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,215,1,215,2019-05-28,47,,,2019052847,21.5,,,,True
11196,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,220,6,220,2019-05-28,47,,,2019052847,22.0,,,,True
11197,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,225,16,225,2019-05-28,47,,,2019052847,22.5,,,,True
11198,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,230,26,230,2019-05-28,47,,,2019052847,23.0,,,,True
11199,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,235,39,235,2019-05-28,47,,,2019052847,23.5,,,,True
11200,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,240,51,240,2019-05-28,47,,,2019052847,24.0,,,,True
11201,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,245,35,245,2019-05-28,47,,,2019052847,24.5,,,,True
11202,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,250,27,250,2019-05-28,47,,,2019052847,25.0,,,,True
11203,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,255,17,255,2019-05-28,47,,,2019052847,25.5,,,,True
11204,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,260,5,260,2019-05-28,47,,,2019052847,26.0,,,,True


In [4]:
df_SD[(df_SD.DATETIME.dt.year==2019) & (df_SD.DATETIME.dt.month==5) & (df_SD.DATETIME.dt.day==28)]

Unnamed: 0,DIST,RIVER,NAME,code,GEAR,SITE_NO,no_nets,YEAR,MM,DD,Week,catch_lbs,catch_kg,hours_fished,zone,last_name,comments,bycatch_sbass,bycatch_shad,bycatch_other,DATETIME,SITE1,SITE2,remarks,id,total_fish_preserved,total_fish_measured,AM_PM_PERIOD,wt_lbs,FLAG_DATETIME,FLAG_HOURS_FISHED,FLAG_SITE,FLAG_AM_PM_PERIOD_DISCREPANCIES
14959,2,SWMARG,Martin E Cameron,,81,12,1.0,2019,5,28,5,350.0,158.8,5,lower,Cameron,,,,,2019-05-28,12,,,2019052812,31.0,195.0,AM,71.9,,,,
14960,2,SWMARG,Robert Peters,,81,25,1.0,2019,5,28,5,25.0,11.3,8,lower,Peters,,,,,2019-05-28,25,,,2019052825,,,,,,,,
14961,2,SWMARG,Pierre Chiasson,,81,26,1.0,2019,5,28,5,200.0,90.7,7,lower,Chiasson,,,,,2019-05-28,26,,,2019052826,,,,,,,,
14962,2,SWMARG,Gerard MacFarlane,,81,28,1.0,2019,5,28,5,200.0,90.7,8,upper,MacFarlane,,,,1 perch,2019-05-28,28,,bycatch_other: 1 perch,2019052828,,,,,,,,
14963,2,SWMARG,Daniel Stewart,,81,33,1.0,2019,5,28,5,150.0,68.0,8,upper,Stewart,,,,,2019-05-28,33,,,2019052833,,,,,,,,
14964,2,SWMARG,Stewart Gillis,,81,41,1.0,2019,5,28,5,30.0,13.6,7,upper,Gillis,,,,,2019-05-28,41,,,2019052841,,,,,,,,
14965,2,SWMARG,Finley Stewart,,81,44,,2019,5,28,5,300.0,136.1,14,upper,Stewart,,,,,2019-05-28,44,,,2019052844,,,,,,,,
14966,2,SWMARG,Mary E Gillis,,81,49,1.0,2019,5,28,5,700.0,317.5,14,upper,Gillis,,,,,2019-05-28,49,,,2019052849,,,,,,,,
14967,2,SWMARG,Bruce MacLellan,,81,51,1.0,2019,5,28,5,15.0,6.8,6,upper,MacLellan,,,,,2019-05-28,51,,,2019052851,,,,,,,,
14968,2,SWMARG,Elizabeth MacKinnon,,81,62,1.0,2019,5,28,5,500.0,226.8,10,upper,MacKinnon,,,,,2019-05-28,62,,,2019052862,36.0,203.0,AM,74.1,,,,


### kevin followup question: how many ambiguous matches don't have a matching sample because there are multiple sample matches

In [5]:
ambiguous = set()
for i in list(df_SD[df_SD.id>2024000000].id):
    while i > 2024000000:
        i -= 1000000000
    ambiguous.add(i)
    
ambiguous = list(ambiguous)

In [6]:
# EXACT MATCHES = NONE

for ambiguous_sample in ambiguous:
    YEAR, MONTH, DAY, SITE = (
        ambiguous_sample//1000000, 
        ambiguous_sample//10000 - 100*(ambiguous_sample//1000000), 
        ambiguous_sample//100 - 100*(ambiguous_sample//10000), 
        ambiguous_sample - 100*(ambiguous_sample//100)
    )
    print()
    print(YEAR, MONTH, DAY, SITE)
    # no exact matches
    display(df_LF[(df_LF.yy==YEAR) & (df_LF.mm==MONTH) & (df_LF.dd==DAY) & (df_LF.site==str(SITE))])
    display(df_FD[(df_FD.YEAR==YEAR) & (df_FD.MM==MONTH) & (df_FD.DD==DAY) & (df_FD.SITE==SITE)])


2004 6 10 1


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1997 6 12 58


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 5 23 48


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 4 0 37


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


In [7]:
# POTENTIAL MATCHES WITH MESSY SITE NAMES = NONE

for ambiguous_sample in ambiguous:
    YEAR, MONTH, DAY, SITE = (
        ambiguous_sample//1000000, 
        ambiguous_sample//10000 - 100*(ambiguous_sample//1000000), 
        ambiguous_sample//100 - 100*(ambiguous_sample//10000), 
        ambiguous_sample - 100*(ambiguous_sample//100)
    )
    print()
    print(YEAR, MONTH, DAY, SITE)
    # partial matches
    display(df_LF[(df_LF.yy==YEAR) & (df_LF.mm==MONTH) & (df_LF.dd==DAY) & (df_LF.site.str.contains(str(SITE))==True)])
    display(df_FD[(df_FD.YEAR==YEAR) & (df_FD.MM==MONTH) & (df_FD.DD==DAY) & (df_FD.SITE.str.contains(str(SITE))==True)])


2004 6 10 1


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1997 6 12 58


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 5 23 48


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 4 0 37


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


In [8]:
# A FEW MATCHES WHEN SITE IS NOT CONSIDERED
# maybe there the site number was written down wrong, some partial matches (excluding site)

for ambiguous_sample in ambiguous:
    YEAR, MONTH, DAY, SITE = (
        ambiguous_sample//1000000, 
        ambiguous_sample//10000 - 100*(ambiguous_sample//1000000), 
        ambiguous_sample//100 - 100*(ambiguous_sample//10000), 
        ambiguous_sample - 100*(ambiguous_sample//100)
    )
    print()
    print(YEAR, MONTH, DAY, SITE)
    # very partial matches
    display(df_LF[(df_LF.yy==YEAR) & (df_LF.mm==MONTH) & (df_LF.dd==DAY)].head())
    display(df_FD[(df_FD.YEAR==YEAR) & (df_FD.MM==MONTH) & (df_FD.DD==DAY)].head())


2004 6 10 1


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1997 6 12 58


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
3024,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,220,3,220,1997-06-12,26,,,1997061226,22.0,,,,
3025,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,225,4,225,1997-06-12,26,,,1997061226,22.5,,,,
3026,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,230,19,230,1997-06-12,26,,,1997061226,23.0,,,,
3027,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,235,52,235,1997-06-12,26,,,1997061226,23.5,,,,
3028,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,240,70,240,1997-06-12,26,,,1997061226,24.0,,,,


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
15016,1997,6,12,7,26,AM,Frozen,1,,230,238.0,171.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,230,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15017,1997,6,12,7,26,AM,Frozen,2,,224,232.0,166.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,224,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15018,1997,6,12,7,26,AM,Frozen,3,,272,280.0,307.0,B,F,4,39.9,,7,3,,,,,,,,,,,1997-06-12,7,3,,,,,26,26,,,272,SITE_notes: 26; AGE_notes_1: 7; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15019,1997,6,12,7,26,AM,Frozen,4,,212,220.0,138.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,212,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15020,1997,6,12,7,26,AM,Frozen,5,,220,228.0,143.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,220,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,



1988 5 23 48


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
4503,1988,5,23,,12,,Fresh,1,253,,253.0,208.0,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,253,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4504,1988,5,23,,12,,Fresh,2,258,,258.0,236.0,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,258,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4505,1988,5,23,,12,,Fresh,3,245,,245.0,,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,245,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4506,1988,5,23,,12,,Fresh,4,250,,250.0,234.0,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,250,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4507,1988,5,23,,12,,Fresh,5,245,,245.0,209.0,A,F,4,29.1,,3,3,,,,,,,,,,,1988-05-23,3,3,,,,,12,12,,,245,SITE_notes: 12; AGE_notes_1: 3; FSP_notes_1: 3,1988052312,,,,,,,,,,,,,,



1988 4 0 37


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


### how do we flag and leave the potential to merge ghost/ambiguous samples?

In [9]:
id = 1990061312
date = id //100
date

19900613

In [10]:
from datetime import datetime
datetime.strptime(str(id//100), "%Y%m%d").date()

datetime.date(1990, 6, 13)

In [11]:
id = 4990061312
date = id // 100
while date > 20240000:
    date -= 10000000
datetime.strptime(str(date), "%Y%m%d").date()

datetime.date(1990, 6, 13)

In [12]:
sum(df_LF.id.isna()), sum(df_FD.id.isna())

(0, 0)

In [13]:
id - 100*(id//100)

12

# When making ghost samples, will any of our ambiguous id match between fish details and length frequencies?

In [14]:
ambiguous_FD = set(df_FD[df_FD.id>2024000000].id)
ambiguous_LF = set(df_LF[df_LF.id>2024000000].id)
ambiguous_SD = set(df_SD[df_SD.id>2024000000].id)
# yes, we need to further disambiguate: maybe add 20 to months, this number would never occur naturally

In [15]:
# let's make a summary of ambiguous dates

df_A_FD = pd.DataFrame(ambiguous_FD, columns=['id_FD'])
df_A_FD['date'] = df_A_FD['id_FD'] // 100
while df_A_FD.date.max() > 20240000:
    df_A_FD.loc[df_A_FD.date>20240000, 'date'] -= 10000000
    
df_A_LF = pd.DataFrame(ambiguous_LF, columns=['id_LF'])
df_A_LF['date'] = df_A_LF['id_LF'] // 100
while df_A_LF.date.max() > 20240000:
    df_A_LF.loc[df_A_LF.date>20240000, 'date'] -= 10000000
    
df_A_SD = pd.DataFrame(ambiguous_SD, columns=['id_SD'])
df_A_SD['date'] = df_A_SD['id_SD'] // 100
while df_A_SD.date.max() > 20240000:
    df_A_SD.loc[df_A_SD.date>20240000, 'date'] -= 10000000

# group and split into columns
df_A_FD = df_A_FD.groupby('date')['id_FD'].apply(lambda x: pd.Series(x.values)).unstack()
df_A_FD.columns = ['id_FD' for _ in range(df_A_FD.shape[1])]

df_A_LF = df_A_LF.groupby('date')['id_LF'].apply(lambda x: pd.Series(x.values)).unstack()
df_A_LF.columns = ['id_LF' for _ in range(df_A_LF.shape[1])]

df_A_SD = df_A_SD.groupby('date')['id_SD'].apply(lambda x: pd.Series(x.values)).unstack()
df_A_SD.columns = ['id_SD' for _ in range(df_A_SD.shape[1])]

# create a dataframe summarising ambiguous data
df_AMBIGUOUS = pd.merge(
    df_A_FD,
    pd.merge(
        df_A_SD,
        df_A_LF,
        on='date', 
        how='outer'
    ),
    on='date', 
    how='outer'
).sort_index().astype('Int64')

df_AMBIGUOUS.loc[df_AMBIGUOUS['id_FD'].any(axis=1), 'FLAG_FD_AMBIGUOUS'] = True
df_AMBIGUOUS.loc[df_AMBIGUOUS['id_LF'].any(axis=1), 'FLAG_LF_AMBIGUOUS'] = True
df_AMBIGUOUS.loc[df_AMBIGUOUS['id_SD'].any(axis=1), 'FLAG_SD_AMBIGUOUS'] = True

df_AMBIGUOUS['FLAG_AMBIGUITY_OVERLAP'] = df_AMBIGUOUS.FLAG_FD_AMBIGUOUS.notnull()*1 + df_AMBIGUOUS.FLAG_LF_AMBIGUOUS.notnull()*1 + df_AMBIGUOUS.FLAG_SD_AMBIGUOUS.notnull()*1

In [16]:
# these are our problems if we are making ghost samples

df_AMBIGUOUS.loc[df_AMBIGUOUS.FLAG_AMBIGUITY_OVERLAP > 1].dropna(axis=1).T

date,19980501,20000607,20000609,20010516,20090520,20090522,20100511,20100519,20100527
id_FD,2998050105,3000060726,4000060926,3001051605,4009052026,3009052226,3010051126,3010051926,3010052726
id_FD,3998050105,4000060726,3000060926,4001051605,3009052026,4009052226,4010051126,4010051926,4010052726
id_LF,3998050105,4000060726,3000060926,3001051605,3009052026,3009052226,3010051126,3010051926,3010052726
id_LF,2998050105,3000060726,4000060926,4001051605,4009052026,4009052226,4010051126,4010051926,4010052726
FLAG_FD_AMBIGUOUS,True,True,True,True,True,True,True,True,True
FLAG_LF_AMBIGUOUS,True,True,True,True,True,True,True,True,True
FLAG_AMBIGUITY_OVERLAP,2,2,2,2,2,2,2,2,2


# Potential Sorting Issues? LF? FD?

In [46]:
for i in ambiguous_FD:
#     print('\n', i)
    display(df_FD[df_FD.id==i][['YEAR', 'MM', 'DD', 'SITE', 'PERIOD', 'FISH_NO', 'id']].head(1))

Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35924,1989,5,14,12,AM,1,2989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36456,1989,5,14,12,PM,1,3989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35995,1993,5,29,52,AM,1,2993052952


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36505,1993,5,29,52,AM,11,3993052952


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36677,2009,5,20,26,PM,1,4009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36590,2000,6,9,26,PM,1,4000060926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36048,2000,6,7,26,AM,1,3000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36380,2014,5,30,47 or 62,PM,1,3014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36224,2010,5,11,26,AM,1,3010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36047,1998,5,1,5,PM,1,2998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36515,1998,5,1,5,PM,2,3998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36545,2000,6,7,26,PM,1,4000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36768,2010,5,11,26,PM,1,4010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36319,2010,5,29,25,PM,1,3010052925


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36859,2010,5,29,25,PM,18,4010052925


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36861,2014,5,30,47,AM,1,4014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36891,1998,5,1,5,PM,1,4998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35972,1993,5,27,37,AM,1,2993052737


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36192,2009,5,22,26,AM,1,3009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36502,1993,5,27,37,AM,15,3993052737


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36708,2009,5,22,26,PM,1,4009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36024,1993,6,9,33,AM,1,2993060933


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36512,1993,6,9,33,AM,14,3993060933


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35878,1989,5,13,35,AM,1,2989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36410,1989,5,13,35,PM,1,3989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36349,2010,6,11,41,AM,1,3010061141


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36118,2001,5,16,5,PM,1,3001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36253,2010,5,19,26,AM,1,3010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36630,2001,5,16,526,AM,1,4001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36794,2010,5,19,26,PM,1,4010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36860,2010,6,11,41,AM,18,4010061141


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36223,2010,5,5,41,AM,33,3010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36737,2010,5,5,41,PM,1,4010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36911,2010,5,5,41,PM,24,5010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36290,2010,5,27,26,AM,1,3010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36830,2010,5,27,26,PM,1,4010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36163,2009,5,20,26,AM,1,3009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36084,2000,6,9,26,AM,1,3000060926


In [53]:
# sort all dataframes before analysing ambiguous data
# df_FD = df_FD.sort_values(['DATETIME', 'FISH_NO']).reset_index(drop=True)
df_LF.sort_values(['DATETIME', 'SITE1', 'id', 'lgth']).reset_index()

Unnamed: 0,index,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
0,0,1990,5,7,,,2,12,LOWER,PM,,,250,1,250,1990-05-07,12,,,1990050712,25.000,,,,
1,1,1990,5,7,,,2,12,LOWER,PM,,,253,1,250,1990-05-07,12,,,1990050712,25.000,,,,
2,2,1990,5,7,,,2,12,LOWER,PM,,,255,2,255,1990-05-07,12,,,1990050712,25.500,,,,
3,3,1990,5,7,,,2,12,LOWER,PM,,,258,2,255,1990-05-07,12,,,1990050712,25.500,,,,
4,4,1990,5,7,,,2,12,LOWER,PM,,,260,7,260,1990-05-07,12,,,1990050712,26.000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11435,11409,2019,6,28,,MARGAREE,9,41,UPPER,AM,75.400,34.201,240,27,240,2019-06-28,41,,,2019062841,24.000,,,,
11436,11410,2019,6,28,,MARGAREE,9,41,UPPER,AM,75.400,34.201,245,11,245,2019-06-28,41,,,2019062841,24.500,,,,
11437,11411,2019,6,28,,MARGAREE,9,41,UPPER,AM,75.400,34.201,250,6,250,2019-06-28,41,,,2019062841,25.000,,,,
11438,11412,2019,6,28,,MARGAREE,9,41,UPPER,AM,75.400,34.201,255,2,255,2019-06-28,41,,,2019062841,25.500,,,,


In [43]:
for i in ambiguous_FD:
    print('\n', i)
    display(df_FD[df_FD.id==i][['YEAR', 'MM', 'DD', 'SITE', 'PERIOD', 'FISH_NO']].head())


 2989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35924,1989,5,14,12,AM,1
35925,1989,5,14,12,AM,2
35926,1989,5,14,12,AM,3
35927,1989,5,14,12,AM,4
35928,1989,5,14,12,AM,5



 3989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36456,1989,5,14,12,PM,1
36457,1989,5,14,12,PM,2
36458,1989,5,14,12,PM,3
36459,1989,5,14,12,PM,4
36460,1989,5,14,12,PM,5



 2993052952


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35995,1993,5,29,52,AM,1
35996,1993,5,29,52,AM,2
35997,1993,5,29,52,AM,3
35998,1993,5,29,52,AM,4
35999,1993,5,29,52,AM,5



 3993052952


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36505,1993,5,29,52,AM,11
36506,1993,5,29,52,AM,12
36507,1993,5,29,52,AM,13
36508,1993,5,29,52,AM,14
36509,1993,5,29,52,AM,15



 4009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36677,2009,5,20,26,PM,1
36678,2009,5,20,26,PM,2
36679,2009,5,20,26,PM,3
36680,2009,5,20,26,PM,4
36681,2009,5,20,26,PM,5



 4000060926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36590,2000,6,9,26,PM,1
36591,2000,6,9,26,PM,2
36592,2000,6,9,26,PM,3
36593,2000,6,9,26,PM,4
36594,2000,6,9,26,PM,5



 3000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36048,2000,6,7,26,AM,1
36049,2000,6,7,26,AM,2
36050,2000,6,7,26,AM,3
36051,2000,6,7,26,AM,4
36052,2000,6,7,26,AM,5



 3014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36380,2014,5,30,47 or 62,PM,1
36381,2014,5,30,47 or 62,PM,2
36382,2014,5,30,47 or 62,PM,3
36383,2014,5,30,47 or 62,PM,4
36384,2014,5,30,47 or 62,PM,5



 3010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36224,2010,5,11,26,AM,1
36225,2010,5,11,26,AM,2
36226,2010,5,11,26,AM,3
36227,2010,5,11,26,AM,4
36228,2010,5,11,26,AM,5



 2998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36047,1998,5,1,5,PM,1



 3998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36515,1998,5,1,5,PM,2
36516,1998,5,1,5,PM,3
36517,1998,5,1,5,PM,4
36518,1998,5,1,5,PM,5
36519,1998,5,1,5,PM,6



 4000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36545,2000,6,7,26,PM,1
36546,2000,6,7,26,PM,2
36547,2000,6,7,26,PM,3
36548,2000,6,7,26,PM,4
36549,2000,6,7,26,PM,5



 4010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36768,2010,5,11,26,PM,1
36769,2010,5,11,26,PM,2
36770,2010,5,11,26,PM,3
36771,2010,5,11,26,PM,4
36772,2010,5,11,26,PM,5



 3010052925


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36319,2010,5,29,25,PM,1
36320,2010,5,29,25,PM,2
36321,2010,5,29,25,PM,3
36322,2010,5,29,25,PM,4
36323,2010,5,29,25,PM,5



 4010052925


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36859,2010,5,29,25,PM,18



 4014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36861,2014,5,30,47,AM,1
36862,2014,5,30,47,AM,2
36863,2014,5,30,47,AM,3
36864,2014,5,30,47,AM,4
36865,2014,5,30,47,AM,5



 4998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36891,1998,5,1,5,PM,1
36892,1998,5,1,5,PM,2
36893,1998,5,1,5,PM,3
36894,1998,5,1,5,PM,4
36895,1998,5,1,5,PM,5



 2993052737


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35972,1993,5,27,37,AM,1
35973,1993,5,27,37,AM,2
35974,1993,5,27,37,AM,3
35975,1993,5,27,37,AM,4
35976,1993,5,27,37,AM,5



 3009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36192,2009,5,22,26,AM,1
36193,2009,5,22,26,AM,2
36194,2009,5,22,26,AM,3
36195,2009,5,22,26,AM,4
36196,2009,5,22,26,AM,5



 3993052737


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36502,1993,5,27,37,AM,15
36503,1993,5,27,37,AM,16
36504,1993,5,27,37,AM,17



 4009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36708,2009,5,22,26,PM,1
36709,2009,5,22,26,PM,2
36710,2009,5,22,26,PM,3
36711,2009,5,22,26,PM,4
36712,2009,5,22,26,PM,5



 2993060933


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36024,1993,6,9,33,AM,1
36025,1993,6,9,33,AM,2
36026,1993,6,9,33,AM,3
36027,1993,6,9,33,AM,4
36028,1993,6,9,33,AM,5



 3993060933


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36512,1993,6,9,33,AM,14
36513,1993,6,9,33,AM,15
36514,1993,6,9,33,AM,16



 2989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35878,1989,5,13,35,AM,1
35879,1989,5,13,35,AM,2
35880,1989,5,13,35,AM,3
35881,1989,5,13,35,AM,4
35882,1989,5,13,35,AM,5



 3989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36410,1989,5,13,35,PM,1
36411,1989,5,13,35,PM,2
36412,1989,5,13,35,PM,3
36413,1989,5,13,35,PM,4
36414,1989,5,13,35,PM,5



 3010061141


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36349,2010,6,11,41,AM,1
36350,2010,6,11,41,AM,2
36351,2010,6,11,41,AM,3
36352,2010,6,11,41,AM,4
36353,2010,6,11,41,AM,5



 3001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36118,2001,5,16,5,PM,1
36119,2001,5,16,5,PM,2
36120,2001,5,16,5,PM,3
36121,2001,5,16,5,PM,4
36122,2001,5,16,5,PM,5



 3010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36253,2010,5,19,26,AM,1
36254,2010,5,19,26,AM,2
36255,2010,5,19,26,AM,3
36256,2010,5,19,26,AM,4
36257,2010,5,19,26,AM,5



 4001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36630,2001,5,16,526,AM,1
36631,2001,5,16,526,AM,2
36632,2001,5,16,526,AM,3
36633,2001,5,16,526,AM,4
36634,2001,5,16,526,AM,5



 4010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36794,2010,5,19,26,PM,1
36795,2010,5,19,26,PM,2
36796,2010,5,19,26,PM,3
36797,2010,5,19,26,PM,4
36798,2010,5,19,26,PM,5



 4010061141


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36860,2010,6,11,41,AM,18



 3010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36223,2010,5,5,41,AM,33



 4010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36737,2010,5,5,41,PM,1
36738,2010,5,5,41,PM,2
36739,2010,5,5,41,PM,3
36740,2010,5,5,41,PM,4
36741,2010,5,5,41,PM,5



 5010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36911,2010,5,5,41,PM,24



 3010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36290,2010,5,27,26,AM,1
36291,2010,5,27,26,AM,2
36292,2010,5,27,26,AM,3
36293,2010,5,27,26,AM,4
36294,2010,5,27,26,AM,5



 4010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36830,2010,5,27,26,PM,1
36831,2010,5,27,26,PM,2
36832,2010,5,27,26,PM,3
36833,2010,5,27,26,PM,4
36834,2010,5,27,26,PM,5



 3009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36163,2009,5,20,26,AM,1
36164,2009,5,20,26,AM,2
36165,2009,5,20,26,AM,3
36166,2009,5,20,26,AM,4
36167,2009,5,20,26,AM,5



 3000060926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36084,2000,6,9,26,AM,1
36085,2000,6,9,26,AM,2
36086,2000,6,9,26,AM,3
36087,2000,6,9,26,AM,4
36088,2000,6,9,26,AM,5
