In [137]:
import re
import pandas as pd
from helpers import sql

# pandas formatting
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('max_colwidth', 200)
pd.set_option('display.float_format', '{:.0f}'.format)

In [138]:
meetings = sql("SELECT * FROM csas2_meeting")
documents = sql("""
    SELECT * FROM csas2_document 
        JOIN csas2_documenttype ON csas2_document.document_type_id = csas2_documenttype.id;
""")
processes = sql(
    """
    SELECT * FROM csas2_process
        JOIN csas2_csasoffice ON csas2_process.lead_office_id = csas2_csasoffice.id
        JOIN shared_models_region ON csas2_csasoffice.region_id = shared_models_region.id;
    """
)

In [139]:
# image of names with no id or pub numbers, typed manually
fsar_pilots = [
    ('Pacific Ocean Perch', 'PAC'),
    ('Salmon (7 SMUs)', 'PAC'),
    ('Dolly Varden', 'Arctic'),
    ('EAZ/WAZ Shrimp', 'Arctic'),
    ('4RST Capelin', 'QC'),
    ('Unit 1 & 2 Redfish', 'QC'),
    ('Lobster LFA 35', 'MAR'),
    ('4TVn Atlantic Cod', 'GULF'),
    ('4T Spring and Fall Herring', 'GULF'),
    ('Gulf of St. Lawrence Snow Crab', 'GULF'),
    ('Southern Gulf of St. Lawrence Scallop', 'GULF'),
    ('3Ps Atlantic Cod', 'NL'),
    ('Northern Shrimp SFA 4-6', 'NL'),
    ('Northern Cod', 'NL'),
]
fsar_srr_2024 = [
    '2024/016',
    '2024/017',
    '2024/039',
    '2024/040',
    '2024/041',
]
fsar_sar_2024 = [
    '2024/012',
    '2024/016',
    '2024/020',
    '2024/009',
    '2024/021',
    '2024/026',
    '2024/030',
    '2024/029',
    '2024/037',
    '2024/049',
    '2024/055',
    '2024/057',
    '2024/061',
]
fsar_srr_2025 = [
    '2025/001',
    '2025/002',
    '2025/003',
    '2025/008',
    '2025/009',
    '2025/010',
    '2025/011',
    '2025/012',
    '2025/015',
    '2025/016',
    '2025/019',
    '2025/020',
    '2025/021',
]
fsar_sar_2025 = [
    '2025/007',
    '2025/009',
    '2025/010',
    '2025/011',
    '2025/012',
    '2025/013',
    '2025/014',
    '2025/017',
    '2025/018',
    '2025/022',
    '2025/024',
    '2025/026',
    '2025/029',
]
# raw data from the spreadsheet, cleaned the best I can
fsar_coordinators = [
    # NL
    'SAR 2024-016',
    'SAR 2024/061',
    'SAR 2024_049',
    'CSAS - Process 761 - 3Ps cod ',
    'CSAS - Process 756 - 3KLPs herring',
    'CSAS - Process 757 - Snow Crab ',
    'CSAS - Process 758 - Capelin',
    'CSAS - Process 759 - Striped and Northern Shrimp ',
    'CSAS - Process 795 - Northern cod ',
    # QC
    'SAR 2024/021',
    'SR2024/008',
    'SR2025/002',
    'SAR2025/017',
    'SCAS - Processus 807 - Estuary and Northern Gulf Snow crab',
    'SAR2025/011',
    'SAR2025/012',
    'SAR2025/013',
    'SAR2025/009',
    'SAR2025/014',
    'SAR2025/024',
    'SR2025/016',
    'SR2025/015',
    # ON (n/a)
    # ARCTIC
    'Great Slave Lake Inconnu (Stenodus leucichthys) Stock Assessment, 2022',
    'Great Slave Lake Lake Whitefish Stock Assessment, 2022',
    'Great Slave Lake Lake Trout Stock Assessment, 2022',
    # PACIFIC (n/a)
    # GULF
    'https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_026-eng.html',
    'Science Advisory Report 2024/029',
    'Science Advisory Report 2024/030',
    'Science Advisory Report 2024/037',
    'SAR 2024/055',
    '4T striped bass',
    '4T groundfish',
    '4T snow crab',
    '4T salmon',
    # MAR
    'SAR 2024/056',
    'SAR 2024/057',
    'SR 2024/039',
    'SR 2024/040',
    'SR 2025/008',
    'Western Component Pollock Management Strategy Evaluation Interim Update',
    'SR 2025/009',
    '2024 Update of the Eastern Scotian Shelf Shrimp Stock Assessment',
    'Assessment of Maritimes Region Snow Crab (N-ENS (CFA 20-22), S-ENS(CFA 23 and CFA 24) and 4X)',
    'LFA 35 Lobster Stock Status Update',
    'LFA 36 Lobster Stock Status Update', 
    'LFA 38 Lobster Stock Status Update',
]  
fsar_coordinators_sar = [
    '2024/016',
    '2024/061',
    '2024/049',
    '2024/021',
    '2025/017',
    '2025/011',
    '2025/012',
    '2025/013',
    '2025/009',
    '2025/014',
    '2025/024',
    '2024/026',
    '2024/029',
    '2024/030',
    '2024/037',
    '2024/055',
    '2024/056',
    '2024/057',
]
fsar_coordinators_srr = [
    '2024/008',
    '2025/002',
    '2025/016',
    '2025/015',
    '2024/039',
    '2024/040',
    '2025/008',
    '2025/009',
]
fsar_coordinators_process_id = [
    761,
    756,
    757,
    758,
    759,
    795,
    807,
    
    # added based on MAR requests below (fsar_requests)
    782,
    750,
    784,
    777,
    
]
# doc name doesn't generally match the name used by the process or document
fsar_coordinators_doc_titles_approx = [
    # ARCTIC
    ('Great Slave Lake Inconnu (Stenodus leucichthys) Stock Assessment, 2022', 'Arctic'),
    ('Great Slave Lake Lake Whitefish Stock Assessment, 2022', 'Arctic'),
    ('Great Slave Lake Lake Trout Stock Assessment, 2022', 'Arctic'),
    # GULF
    ('4T striped bass', 'GULF'),
    ('4T groundfish', 'GULF'),
    ('4T snow crab', 'GULF'),
    ('4T salmon', 'GULF'),
    # MAR - these are all requests based on links in spreadsheet
    # ('Western Component Pollock Management Strategy Evaluation Interim Update', 'MAR'),
    # ('2024 Update of the Eastern Scotian Shelf Shrimp Stock Assessment', 'MAR'),
    # ('Assessment of Maritimes Region Snow Crab (N-ENS (CFA 20-22), S-ENS(CFA 23 and CFA 24) and 4X)', 'MAR'),
    # ('LFA 35 Lobster Stock Status Update', 'MAR'),
    # ('LFA 36 Lobster Stock Status Update', 'MAR'),
    # ('LFA 38 Lobster Stock Status Update', 'MAR'),
]
fsar_requests = [407, 464, 433, 449, 446, 446] # from MAR request links
# note: requests 449/446 are both process 777 
# process 777 has 3 meetings: 1441, 1442, 1443 all stock status updates for lobster (35, 36, 38)


# docs from pub numbers

In [140]:
doc_columns = ['id', 'title_en', 'pub_number', 'process_id', 'acronym_en', ]

fsar_pub_number_lists = [
    [fsar_srr_2024, 'SRR'],
    [fsar_sar_2024, 'SAR'],
    [fsar_srr_2025, 'SRR'],
    [fsar_sar_2025, 'SAR'],
    [fsar_coordinators_srr, 'SRR'],
    [fsar_coordinators_sar, 'SAR'],
]

In [141]:
# confirm that all fsar docs are accounted for and single counted - GOOD
for fsar_list, doc_type in fsar_pub_number_lists:
    temp_docs = documents[
        (documents.pub_number.isin(fsar_list)) & (documents.acronym_en == doc_type)
    ]
    print(temp_docs.shape[0] == len(fsar_list))

True
True
True
True
True
True


In [142]:
# confirm that none of the docs are on multiple lists - BAD
len([t + y for x, t in fsar_pub_number_lists for y in x]), len({t + y for x, t in fsar_pub_number_lists for y in x})

(70, 46)

In [143]:
# all duplication comes from the coordinators list, but that list also includes 2 docs that aren't in the original list
sorted([t + y for x, t in fsar_pub_number_lists for y in x])

['SAR2024/009',
 'SAR2024/012',
 'SAR2024/016',
 'SAR2024/016',
 'SAR2024/020',
 'SAR2024/021',
 'SAR2024/021',
 'SAR2024/026',
 'SAR2024/026',
 'SAR2024/029',
 'SAR2024/029',
 'SAR2024/030',
 'SAR2024/030',
 'SAR2024/037',
 'SAR2024/037',
 'SAR2024/049',
 'SAR2024/049',
 'SAR2024/055',
 'SAR2024/055',
 'SAR2024/056',
 'SAR2024/057',
 'SAR2024/057',
 'SAR2024/061',
 'SAR2024/061',
 'SAR2025/007',
 'SAR2025/009',
 'SAR2025/009',
 'SAR2025/010',
 'SAR2025/011',
 'SAR2025/011',
 'SAR2025/012',
 'SAR2025/012',
 'SAR2025/013',
 'SAR2025/013',
 'SAR2025/014',
 'SAR2025/014',
 'SAR2025/017',
 'SAR2025/017',
 'SAR2025/018',
 'SAR2025/022',
 'SAR2025/024',
 'SAR2025/024',
 'SAR2025/026',
 'SAR2025/029',
 'SRR2024/008',
 'SRR2024/016',
 'SRR2024/017',
 'SRR2024/039',
 'SRR2024/039',
 'SRR2024/040',
 'SRR2024/040',
 'SRR2024/041',
 'SRR2025/001',
 'SRR2025/002',
 'SRR2025/002',
 'SRR2025/003',
 'SRR2025/008',
 'SRR2025/008',
 'SRR2025/009',
 'SRR2025/009',
 'SRR2025/010',
 'SRR2025/011',
 'SRR202

In [144]:
fsar_pub_number_lists

[[['2024/016', '2024/017', '2024/039', '2024/040', '2024/041'], 'SRR'],
 [['2024/012',
   '2024/016',
   '2024/020',
   '2024/009',
   '2024/021',
   '2024/026',
   '2024/030',
   '2024/029',
   '2024/037',
   '2024/049',
   '2024/055',
   '2024/057',
   '2024/061'],
  'SAR'],
 [['2025/001',
   '2025/002',
   '2025/003',
   '2025/008',
   '2025/009',
   '2025/010',
   '2025/011',
   '2025/012',
   '2025/015',
   '2025/016',
   '2025/019',
   '2025/020',
   '2025/021'],
  'SRR'],
 [['2025/007',
   '2025/009',
   '2025/010',
   '2025/011',
   '2025/012',
   '2025/013',
   '2025/014',
   '2025/017',
   '2025/018',
   '2025/022',
   '2025/024',
   '2025/026',
   '2025/029'],
  'SAR'],
 [['2024/008',
   '2025/002',
   '2025/016',
   '2025/015',
   '2024/039',
   '2024/040',
   '2025/008',
   '2025/009'],
  'SRR'],
 [['2024/016',
   '2024/061',
   '2024/049',
   '2024/021',
   '2025/017',
   '2025/011',
   '2025/012',
   '2025/013',
   '2025/009',
   '2025/014',
   '2025/024',
   '2024/026',

In [145]:
all_fsar_pub_numbers = [(t, y) for x, t in fsar_pub_number_lists for y in x]

In [146]:
fsar_doc_id = list()

for abbrev, pub_number in all_fsar_pub_numbers:
    id_to_append = int(documents[
        (documents.pub_number == pub_number) & (documents.acronym_en == abbrev)
    ].id.values[0])
    print(abbrev, pub_number, id_to_append)
    fsar_doc_id.append(id_to_append)
fsar_doc_id = sorted(set(fsar_doc_id))
len(fsar_doc_id)

SRR 2024/016 1230
SRR 2024/017 1229
SRR 2024/039 1346
SRR 2024/040 1347
SRR 2024/041 1348
SAR 2024/012 1123
SAR 2024/016 1211
SAR 2024/020 1187
SAR 2024/009 1172
SAR 2024/021 1274
SAR 2024/026 1213
SAR 2024/030 1250
SAR 2024/029 1251
SAR 2024/037 1272
SAR 2024/049 1255
SAR 2024/055 1326
SAR 2024/057 1316
SAR 2024/061 1270
SRR 2025/001 1359
SRR 2025/002 1373
SRR 2025/003 1302
SRR 2025/008 1378
SRR 2025/009 1379
SRR 2025/010 1377
SRR 2025/011 1376
SRR 2025/012 1380
SRR 2025/015 1446
SRR 2025/016 1444
SRR 2025/019 1417
SRR 2025/020 1382
SRR 2025/021 1303
SAR 2025/007 1352
SAR 2025/009 1419
SAR 2025/010 1398
SAR 2025/011 1416
SAR 2025/012 1425
SAR 2025/013 1424
SAR 2025/014 1445
SAR 2025/017 1415
SAR 2025/018 1355
SAR 2025/022 1426
SAR 2025/024 1466
SAR 2025/026 1407
SAR 2025/029 1458
SRR 2024/008 1202
SRR 2025/002 1373
SRR 2025/016 1444
SRR 2025/015 1446
SRR 2024/039 1346
SRR 2024/040 1347
SRR 2025/008 1378
SRR 2025/009 1379
SAR 2024/016 1211
SAR 2024/061 1270
SAR 2024/049 1255
SAR 2024/0

46

# processes from names and ids

In [147]:
process_lists = [fsar_pilots, fsar_coordinators_doc_titles_approx, fsar_coordinators_process_id]

In [148]:
# pilot image tab 1
for name, abbrev in fsar_pilots:
    temp_processes = processes[
        (processes['name'].str.contains(re.escape(name))) & (processes.abbrev == abbrev)
    ]
    print(f'{abbrev:<10} {temp_processes.shape[0]:<5} {name} ')

PAC        1     Pacific Ocean Perch 
PAC        0     Salmon (7 SMUs) 
Arctic     2     Dolly Varden 
Arctic     0     EAZ/WAZ Shrimp 
QC         0     4RST Capelin 
QC         0     Unit 1 & 2 Redfish 
MAR        0     Lobster LFA 35 
GULF       0     4TVn Atlantic Cod 
GULF       0     4T Spring and Fall Herring 
GULF       0     Gulf of St. Lawrence Snow Crab 
GULF       2     Southern Gulf of St. Lawrence Scallop 
NL         9     3Ps Atlantic Cod 
NL         0     Northern Shrimp SFA 4-6 
NL         7     Northern Cod 


In [149]:
# manual database search by inspection, expanding accronyms and making best guess. 
#  -> these are not confirmed
#  typically guessed the newest one that fits the criteria
#  no meeting dates to confirm vs
possible_pilot_process_id = [
    ('Pacific Ocean Perch', 'PAC', 176),
    ('Salmon (7 SMUs)', 'PAC', 241),
    ('Dolly Varden', 'Arctic', 628),
    ('EAZ/WAZ Shrimp', 'Arctic', 629),
    ('4RST Capelin', 'QC', 356),
    ('Unit 1 & 2 Redfish', 'QC', 804),
    ('Lobster LFA 35', 'MAR', 777),
    ('4TVn Atlantic Cod', 'GULF', 648),
    ('4T Spring and Fall Herring', 'GULF', 825),
    ('Gulf of St. Lawrence Snow Crab', 'GULF', 822),
    ('Southern Gulf of St. Lawrence Scallop', 'GULF', 99),
    ('3Ps Atlantic Cod', 'NL', 840),
    ('Northern Shrimp SFA 4-6', 'NL', 645),
    ('Northern Cod', 'NL', 795),
]

In [150]:
# coordinator tab without pub number or process number 
for name, abbrev in fsar_coordinators_doc_titles_approx:
    temp_processes = processes[
        (processes['name'].str.contains(re.escape(name))) & (processes.abbrev == abbrev) 
    ]
    print(f'{abbrev:<10} {temp_processes.shape[0]:<5} {name} ')

Arctic     0     Great Slave Lake Inconnu (Stenodus leucichthys) Stock Assessment, 2022 
Arctic     0     Great Slave Lake Lake Whitefish Stock Assessment, 2022 
Arctic     0     Great Slave Lake Lake Trout Stock Assessment, 2022 
GULF       0     4T striped bass 
GULF       0     4T groundfish 
GULF       0     4T snow crab 
GULF       0     4T salmon 


In [151]:
possible_coord_doc_name_to_process_id = [
    ('Great Slave Lake Inconnu (Stenodus leucichthys) Stock Assessment, 2022', 'Arctic', 627),  # 2022 seems wrong, maybe the request, maybe that applies to 741 as well?
        # no date to confirm vs
    ('Great Slave Lake Lake Whitefish Stock Assessment, 2022', 'Arctic', 741),  # there are two that are the same (251, 741)
    ('Great Slave Lake Lake Trout Stock Assessment, 2022', 'Arctic', 741),  # no idea which is correct (or if both are wrong)
        # both the above are the same process, maybe 741?
        # 741 doesn't match meeting dates (may 22-24 2024 vs expected may 14 2024)
        # 251 even more doesn't match meeting dates (may 14-15 2023 vs expected may 14 2024)
    ('4T striped bass', 'GULF', 746),  # assume 4T means southern st lawrence, and most recent one is FSAR
        # 746 matches one of the meeting dates (dec 4 2024)
    ('4T groundfish', 'GULF', 745),
        # 745 matches one of the meeting dates (dec 11 2024)
    ('4T snow crab', 'GULF', 743),
        # 743 matches one of the meeting dates (jan 22-23 2025)
    ('4T salmon', 'GULF', 764),
        # 764 matches one of the meeting dates (feb 27 2025)
]

In [152]:
possible_process_id = list()
for x in possible_pilot_process_id:
    possible_process_id.append(x[-1])
for x in possible_coord_doc_name_to_process_id:
    possible_process_id.append(x[-1])

In [153]:
all_process_id = fsar_coordinators_process_id + possible_process_id

len(all_process_id), len(set(all_process_id))

# only a few duplicates

(32, 29)

In [154]:
doc_ids_from_fsar_processes = documents[documents.process_id.isin(all_process_id)].id.to_list()

In [155]:
[x for x in fsar_doc_id if x in doc_ids_from_fsar_processes]

[1123,
 1213,
 1229,
 1230,
 1272,
 1352,
 1376,
 1377,
 1380,
 1382,
 1398,
 1417,
 1426,
 1445,
 1458]

In [156]:
[x for x in doc_ids_from_fsar_processes if x not in fsar_doc_id]

[1422,
 1432,
 1441,
 1442,
 1443,
 1451,
 81,
 73,
 1080,
 1414,
 1429,
 1081,
 1292,
 1293,
 1294,
 1351,
 1413,
 1423,
 1430,
 1433,
 1452,
 1453,
 1454,
 74,
 75,
 114,
 1273,
 1124,
 1388,
 1427,
 1082,
 72,
 94,
 1397]

In [157]:
[x for x in fsar_doc_id if x not in doc_ids_from_fsar_processes]

[1172,
 1187,
 1202,
 1211,
 1250,
 1251,
 1255,
 1270,
 1274,
 1302,
 1303,
 1316,
 1317,
 1326,
 1346,
 1347,
 1348,
 1355,
 1359,
 1373,
 1378,
 1379,
 1407,
 1415,
 1416,
 1419,
 1424,
 1425,
 1444,
 1446,
 1466]

# documents linked to FSAR processes

In [158]:
docs_from_fsar_processes = documents[documents.process_id.isin(all_process_id)]

In [159]:
# all of the SAR?
# none of the PRO/RES?
# all of the SRR?
docs_from_fsar_processes['acronym_en'].value_counts()

acronym_en
SAR    19
RES    19
SRR     8
PRO     3
Name: count, dtype: int64

In [160]:
for i, doc in docs_from_fsar_processes.iterrows():
    print(doc.acronym_en, doc.title_en)

SAR Stock Assessment for Striped Shrimp (Pandalus montagui) in Shrimp Fishing Area 4, the Eastern Assessment Zone, and the Western Assessment Zone for the 2025-26 fishing season
SAR Stock Assessment for Northern Shrimp (Pandalus borealis) in the North and South Stock Assessment Regions for the 2025-26 fishing season
SAR STOCK STATUS UPDATE OF LOBSTER (HOMARUS AMERICANUS) IN LOBSTER FISHING AREA 35 FOR 2024
SAR STOCK STATUS UPDATE OF LOBSTER (HOMARUS AMERICANUS) IN LOBSTER FISHING AREA 36 FOR 2024
SAR STOCK STATUS UPDATE OF LOBSTER (HOMARUS AMERICANUS) IN LOBSTER FISHING AREA 38 FOR 2024
SAR NAFO Divisions 2J3KL Northern Cod (Gadus morhua) Stock Assessment to 2025
SAR Assessment of the Estuary and Gulf of St. Lawrence (Divisions 4RST) Capelin Stock in 2021
SAR Methodologies and Guidelines for Defining Limit Reference Points for Pacific Salmon
SAR Assessment of Northern Shrimp (Pandalus borealis) in Shrimp Fishing Areas 4-6 in 2021
SAR Pacific Ocean Perch (Sebastes alutus) Stock Assessme

# all FSAR

In [161]:
fsar_docs = documents[documents.id.isin(fsar_doc_id)].copy().reset_index(drop=True)

In [162]:
fsar_docs

Unnamed: 0,id,created_at,updated_at,title_en,title_fr,title_iku,pub_number,pages_en,status,old_id,created_by_id,process_id,updated_by_id,url_en,url_fr,dev_link_en,dev_link_fr,ekme_gcdocs_en,ekme_gcdocs_fr,cat_number_en,cat_number_fr,document_type_id,translation_status,pub_number_request_date,due_date,is_confirmed,lead_office_id,pages_fr,pdf_size_kb_en,pdf_size_kb_fr,cat_number_iku,library_link_en,library_link_fr,library_link_iku,pdf_size_kb_iku,isbn_en,isbn_fr,isbn_iku,pages_iku,ekme_gcdocs_iku,pub_number_assigned_date,posting_notification_sent_date,has_data_links,has_third_language,third_language,urgency_notes,urgent,media_attention,sharepoint_archive_en,sharepoint_archive_fr,theme_id,id_1,name,nom,hide_from_list,days_due,acronym_en,acronym_fr,deprecated
0,1172,2023-12-18 14:41:37.556112,2025-05-29 13:37:19.743523,2022 Assessment of Atlantic Halibut on the Scotian Shelf and Southern Grand Banks (NAFO Divisions 3NOPS4VWX5Zc),Évaluation du flétan de l’Atlantique du plateau néo-écossais et du sud des Grands Bancs (divisions 3NOPs4VWX5Zc de l’OPANO) pour 2022,,2024/009,21.0,12,,2889,44,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_009-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_009-fra.html,http://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_009-eng.html,http://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_009-fra.html,4356110.0,4356111.0,Fs70-6/2024-009E-PDF,Fs70-6/2024-009F-PDF,2,3,NaT,NaT,1,2,22.0,2262.0,1147.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41230425.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41230437.pdf,,,978-0-660-69772-7,978-0-660-69773-4,,,,2024-01-18 04:00:00,2024-02-15 19:40:06.498926,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
1,1123,2023-11-14 20:49:53.456915,2025-05-29 13:37:58.595949,Pacific Ocean Perch (Sebastes alutus) Stock Assessment for British Columbia in 2023,Évaluation des stocks de sébaste à longue mâchoire (Sebastes alutus) de la Colombie-Britannique en 2023,,2024/012,16.0,12,,2483,176,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_012-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_012-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_012-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_012-fra.html,4357885.0,4357886.0,Fs70-6/2024-012E-PDF,Fs70-6/2024-012F-PDF,2,1,NaT,2023-11-07 12:00:00,1,5,19.0,2108.0,2185.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41234911.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41234935.pdf,,,978-0-660-70159-2,978-0-660-70160-8,,,,2024-02-02 04:00:00,2024-03-07 19:26:48.957638,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
2,1211,2024-02-20 16:12:23.482459,2025-05-29 13:38:01.218386,NAFO Subdivision 3Ps Atlantic cod (Gadus morhua) Stock Assessment in 2023,Évaluation du stock de morue franche (Gadus morhua) de la sous-division 3Ps de l’OPANO en 2023,,2024/016,13.0,12,,2547,654,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_016-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_016-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_016-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_016-fra.html,4360009.0,4360011.0,Fs70-6/2024-016E-PDF,Fs70-6/2024-016F-PDF,2,0,NaT,NaT,1,6,14.0,684.0,895.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41234959.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41234984.pdf,,,978-0-660-70779-2,978-0-660-70780-8,,,,2024-02-20 04:00:00,2024-03-14 17:39:12.422518,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
3,1187,2024-01-24 18:07:39.109829,2025-05-29 13:38:14.629604,"Southern Gulf of St. Lawrence (CFAs 12, 12E, 12F, 19) Snow Crab (Chionoecetes opilio) Stock Assessment in 2023","Évaluation du stock de crabe des neiges (Chionoecetes opilio) en 2023 dans le sud du golfe du Saint-Laurent (ZPC 12, 12E, 12F, 19)",,2024/020,10.0,12,,292,88,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_020-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_020-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_020-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_020-fra.html,4362452.0,4362454.0,Fs70-6/2024-020E-PDF,Fs70-6/2024-020F-PDF,2,3,NaT,2024-05-25 15:00:00,1,1,11.0,677.0,578.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41240091.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41240108.pdf,,,978-0-660-70789-1,978-0-660-70790-7,,,,2024-03-11 03:00:00,2024-04-11 17:15:29.102156,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
4,1274,2024-04-02 18:19:27.757792,2025-05-29 13:38:13.208822,Assessment of the Estuary and Gulf of St. Lawrence (Divisions 4RST) Capelin (Mallotus villosus) Stock in 2022 and 2023,Évaluation du stock de capelan (Mallotus villosus) de l’estuaire et du golfe du Saint-Laurent (divisions 4RST) en 2022 et 2023,,2024/021,10.0,12,,1045,690,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_021-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_021-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_021-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_021-fra.html,,,Fs70-6/2024-021E-PDF,Fs70-6/2024-021F-PDF,2,1,2024-04-02 18:21:53.929719,2024-04-24 15:00:00,1,3,10.0,442.0,469.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41260831.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41260843.pdf,,,978-0-660-73120-9,978-0-660-73121-6,,,,2023-11-30 04:00:00,2024-09-05 17:34:17.580719,0.0,0.0,Inuktitut,,0.0,1.0,https://086gc.sharepoint.com/:w:/r/sites/CSASWebandPubTeam/Shared%20Documents/Document%20Archive/QC/SAR-AS2024-021%20QC%20English%20(Erratum).docx?d=w0c276da6b8ca4bb4b6e6c14453fc207f&csf=1&web=1&e...,https://086gc.sharepoint.com/:w:/r/sites/CSASWebandPubTeam/Shared%20Documents/Document%20Archive/QC/SAR-AS2024-021%20QC%20French%20(Erratum).docx?d=w7edc575f9af34138b353b2518fd8dfae&csf=1&web=1&e=...,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
5,1213,2024-02-21 18:47:31.362533,2025-05-29 13:38:12.315748,"Southern Gulf of St. Lawrence, NAFO Division 4T-4Vn (November-April), Atlantic cod (Gadus morhua) Stock Assessment to 2023","Évaluation du stock de morue franche (Gadus morhua) jusqu'en 2023 dans le sud du golfe du Saint-Laurent, zone 4T-4Vn (novembre-avril) de l’OPANO",,2024/026,11.0,12,,292,648,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_026-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_026-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_026-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_026-fra.html,4367228.0,4367230.0,Fs70-6/2024-026E-PDF,Fs70-6/2024-026F-PDF,2,0,NaT,2024-04-22 15:00:00,1,1,11.0,460.0,505.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41243833.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41243882.pdf,,,978-0-660-71472-1,978-0-660-71473-8,,,,2024-04-16 03:00:00,2024-05-16 13:17:09.291725,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
6,1251,2024-03-19 15:57:28.403384,2025-05-29 13:38:13.501800,Southern Gulf of St. Lawrence (NAFO Division 4TVn) Spring Spawning Atlantic Herring (Clupea harengus) Stock Assessment to 2023,Évaluation de la composante reproductrice de printemps du hareng de l’Atlantique (Clupea harengus) dans le sud du Golfe du Saint-Laurent (Division OPANO 4TVn) jusqu’en 2023,,2024/029,12.0,12,,292,101,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_029-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_029-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_029-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_029-fra.html,4371958.0,4371960.0,Fs70-6/2024-029E-PDF,Fs70-6/2024-029F-PDF,2,3,NaT,2024-05-16 15:00:00,1,1,14.0,667.0,704.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41249343.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41249355.pdf,,,978-0-660-71610-7,978-0-660-71611-4,,,,2024-04-25 03:00:00,2024-06-03 16:19:25.668322,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
7,1250,2024-03-19 15:56:14.268819,2025-05-29 13:38:13.402606,Southern Gulf of St. Lawrence (NAFO Division 4TVn) Fall Spawning Atlantic Herring (Clupea harengus) Stock Assessment to 2023,Évaluation de la composante reproductrice d’automne du hareng de l’Atlantique (Clupea harengus) dans le sud du Golfe du Saint-Laurent (Division OPANO 4TVn) jusqu’en 2023,,2024/030,15.0,12,,292,101,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_030-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_030-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_030-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_030-fra.html,4372397.0,4372400.0,Fs70-6/2024-030E-PDF,Fs70-6/2024-030F-PDF,2,3,NaT,2024-05-16 15:00:00,1,1,16.0,996.0,953.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41249276.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41249331.pdf,,,978-0-660-71612-1,978-0-660-71613-8,,,,2024-04-25 03:00:00,2024-06-03 16:21:53.606868,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
8,1272,2024-04-02 13:58:18.374630,2025-05-29 13:38:12.214960,"Southern Gulf of St. Lawrence, NAFO Division 4T, Sea Scallop (Placopecten magellanicus) Stock Assessment to 2023","Évaluation du stock de pétoncle (Placopecten magellanicus) jusqu’en 2023 dans le sud du golfe du Saint-Laurent, zone 4T de l’OPANO",,2024/037,9.0,12,,292,99,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_037-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_037-fra.html,http://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_037-eng.html,http://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_037-fra.html,4373621.0,4373623.0,Fs70-6/2024-037E-PDF,Fs70-6/2024-037F-PDF,2,1,NaT,2024-05-28 15:00:00,1,1,9.0,537.0,599.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41250163.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41250175.pdf,,,978-0-660-71858-3,978-0-660-71859-0,,,,2024-05-07 03:00:00,2024-06-13 15:49:36.341992,,,Inuktitut,,,,,,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0
9,1255,2024-03-21 17:24:47.614717,2025-05-29 13:38:00.721609,NAFO Divisions 2J3KL Northern Cod (Gadus morhua) Stock Assessment to 2024,Évaluation des stocks de morue du nord (Gadus morhua) dans les divisions 2J3KL de l’opano jusqu’en 2024,,2024/049,12.0,12,,2260,658,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_049-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_049-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_049-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_049-fra.html,,,Fs70-6/2024-049E-PDF,Fs70-6/2024-049F-PDF,2,0,NaT,NaT,1,6,14.0,505.0,698.0,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41260946.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41260958.pdf,,,978-0-660-73280-0,978-0-660-73440-8,,,,2024-08-21 03:00:00,2024-09-12 16:04:49.541984,0.0,0.0,Inuktitut,,0.0,1.0,https://086gc.sharepoint.com/:w:/r/sites/CSASWebandPubTeam/Shared%20Documents/Document%20Archive/NL/SAR-AS2024-049%20NL%20English.docx?d=w0314b443eb5e4043a459618b2472c661&csf=1&web=1&e=fdgDXf,https://086gc.sharepoint.com/:w:/r/sites/CSASWebandPubTeam/Shared%20Documents/Document%20Archive/NL/SAR-AS2024-049%20NL%20French.docx?d=w360e4d86146e45e98eaeddb8aeebb983&csf=1&web=1&e=ofb8EM,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0


In [163]:
# check if any confirmed FSAR docs have SAR or SRR in their process that are not FSAR 
#  ie, check that the two values are the same
fsar_docs.shape[0]

46

In [164]:
fsar_process_id_confirmed = set(processes[processes.id.isin(fsar_docs.process_id.to_list())].id.to_list())

In [165]:
fsar_process_id_confirmed

{44,
 88,
 96,
 99,
 101,
 176,
 629,
 648,
 654,
 658,
 663,
 670,
 690,
 693,
 701,
 713,
 715,
 717,
 743,
 744,
 745,
 746,
 750,
 761,
 762,
 764,
 765,
 770,
 780,
 781,
 782,
 784,
 793,
 800,
 801,
 802,
 804,
 805,
 806,
 807,
 809,
 812,
 814}

In [166]:
documents[
    (documents.process_id.isin(fsar_process_id_confirmed)) & (documents.acronym_en.isin(["SAR", "SRR"])) 
].shape[0]
# SOOOO CLOSE!!!

47

In [167]:
documents.loc[
    (documents.process_id.isin(fsar_process_id_confirmed)) & (documents.acronym_en.isin(["SAR", "SRR"])) 
    & (~documents.id.isin(fsar_doc_id)),
    doc_columns
]

Unnamed: 0,id,title_en,pub_number,process_id,acronym_en
318,1327,"Science Advice to Support the Rebuilding Plan for Southern Gulf of St. Lawrence, NAFO Division 4T, White Hake (Urophycis tenuis)",2024/054,744,SAR


In [168]:
process_columns = [
    'id', 'name', 'scope', 'type', 'abbrev',
]
processes.loc[
    (processes.id == 744),
    process_columns
]

Unnamed: 0,id,name,scope,type,abbrev
41,744,"Southern Gulf of St. Lawrence, NAFO Division 4T, White Hake (Urophycis tenuis) Stock Assessment and Science Advice to Support the Rebuilding Plan",1,1,GULF


In [169]:
process_scope_choices = (
    (1, 'Regional'),
    (2, 'Zonal'),
    (3, 'National'),
    (4, 'Multi-Regional'),
)

process_type_choices = (
    (1, 'Science Advisory Meeting'),
    (2, 'Science Response Process'),
)

In [170]:
# check all the docs from this process
documents.loc[documents.process_id == 744, doc_columns]

Unnamed: 0,id,title_en,pub_number,process_id,acronym_en
318,1327,"Science Advice to Support the Rebuilding Plan for Southern Gulf of St. Lawrence, NAFO Division 4T, White Hake (Urophycis tenuis)",2024/054,744,SAR
319,1326,"Southern Gulf of St. Lawrence, NAFO Division 4T, White Hake (Urophycis tenuis) Stock Assessment to 2022",2024/055,744,SAR
538,1328,"Mitigating Bycatch of the Southern Gulf of St. Lawrence, NAFO Division 4T, of White Hake (Urophycis tenuis)",,744,RES
955,1329,"Southern Gulf of St. Lawrence, NAFO Division 4T, White Hake (Urophycis tenuis): Stock Assessment to 2022 and Rebuilding Plan Scientific Requirements",2025/008,744,RES


In [171]:
fsar_docs.loc[fsar_docs.process_id == 744, doc_columns]

Unnamed: 0,id,title_en,pub_number,process_id,acronym_en
10,1326,"Southern Gulf of St. Lawrence, NAFO Division 4T, White Hake (Urophycis tenuis) Stock Assessment to 2022",2024/055,744,SAR


In [172]:
# is this an FSAR?
documents.loc[documents.id == 1327]

Unnamed: 0,id,created_at,updated_at,title_en,title_fr,title_iku,pub_number,pages_en,status,old_id,created_by_id,process_id,updated_by_id,url_en,url_fr,dev_link_en,dev_link_fr,ekme_gcdocs_en,ekme_gcdocs_fr,cat_number_en,cat_number_fr,document_type_id,translation_status,pub_number_request_date,due_date,is_confirmed,lead_office_id,pages_fr,pdf_size_kb_en,pdf_size_kb_fr,cat_number_iku,library_link_en,library_link_fr,library_link_iku,pdf_size_kb_iku,isbn_en,isbn_fr,isbn_iku,pages_iku,ekme_gcdocs_iku,pub_number_assigned_date,posting_notification_sent_date,has_data_links,has_third_language,third_language,urgency_notes,urgent,media_attention,sharepoint_archive_en,sharepoint_archive_fr,theme_id,id_1,name,nom,hide_from_list,days_due,acronym_en,acronym_fr,deprecated
318,1327,2024-08-21 17:50:44.205456,2025-05-29 13:38:28.829646,"Science Advice to Support the Rebuilding Plan for Southern Gulf of St. Lawrence, NAFO Division 4T, White Hake (Urophycis tenuis)","Avis scientifique à l'appui du plan de rétablissement du stock de merluche blanche (Urophycis tenuis) dans le sud du golfe du Saint-Laurent, zone 4T de l’OPANO",,2024/054,12,12,,292,744,2359,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_054-eng.html,https://www.dfo-mpo.gc.ca/csas-sccs/Publications/SAR-AS/2024/2024_054-fra.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_054-eng.html,https://wwwdev.ncr.dfo-mpo.ca/csas-sccs/Publications/SAR-AS/2024/2024_054-fra.html,,,Fs70-6/2024-054E-PDF,Fs70-6/2024-054F-PDF,2,3,NaT,2024-10-17 15:00:00,1,1,13,898,646,,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41266079.pdf,https://waves-vagues.dfo-mpo.gc.ca/library-bibliotheque/41266080.pdf,,,978-0-660-73635-8,978-0-660-73636-5,,,,2024-09-11 03:00:00,2024-10-17 17:37:38.821131,1,0,Inuktitut,,0,1,https://086gc.sharepoint.com/:w:/r/sites/CSASWebandPubTeam/Shared%20Documents/Document%20Archive/GLF/SAR-AS2024-054%20GLF%20English.docx?d=wb5f438f8729a4f36b2acd0a93b9445bf&csf=1&web=1&e=mWKuvO,https://086gc.sharepoint.com/:w:/r/sites/CSASWebandPubTeam/Shared%20Documents/Document%20Archive/GLF/SAR-AS2024-054%20GLF%20French.docx?d=wb1664457e08c4b11abafbd1af96788a8&csf=1&web=1&e=VXDBfn,,2,Science Advisory Report,Avis scientifique,0,56,SAR,AS,0


# how many of each type of doc?

In [173]:
fsar_docs['acronym_en'].value_counts()

acronym_en
SAR    27
SRR    19
Name: count, dtype: int64

In [174]:
fsar_docs[fsar_docs.id == 1446].T

Unnamed: 0,41
id,1446
created_at,2025-03-20 20:45:48.349587
updated_at,2025-05-29 13:38:27.200263
title_en,Quebec North Shore (4S) Herring (Clupea harengus) Stocks Update in 2024
title_fr,Mise à jour de l'état des stocks de hareng (Clupea harengus) de la Côte-Nord du Québec (4S) en 2024
title_iku,
pub_number,2025/015
pages_en,10
status,12
old_id,


# check other documents

In [None]:
# NOTE: this is already too messy to justify the method we implemented in the app. 
# I'm going to leave this analysis, at least for now, and fix the app...

