In [1]:
import atn_nc_to_dwc

## Create mapping between NCEI accessions and ATN deployments

In [2]:
df_map = atn_nc_to_dwc.get_ncei_accession_mapping()

Fetching NCEI accession mapping table...
Successfully built mapping for 289 files.


## Download all available ATN netCDF files from NCEI

In [13]:
# --- Main execution block ---
start_url = "https://www.ncei.noaa.gov/data/oceans/ioos/atn/"
# Create a base directory for all the downloads
download_directory = "data/src/"

print("--- Starting Recursive Download ---")
print(f"Source URL: {start_url}")
print(f"Local Directory: {download_directory}\n")

atn_nc_to_dwc.recursive_wget(start_url, download_directory)

print("\n--- Recursive Download Finished ---")

--- Starting Recursive Download ---
Source URL: https://www.ncei.noaa.gov/data/oceans/ioos/atn/
Local Directory: data/src/

Accessing: https://www.ncei.noaa.gov/data/oceans/ioos/atn/
  Downloading file: ACCESSION_UPDATE_LOG.TXT

Entering directory: https://www.ncei.noaa.gov/data/oceans/ioos/atn/california_state_university_long_beach/
Accessing: https://www.ncei.noaa.gov/data/oceans/ioos/atn/california_state_university_long_beach/
  Downloading file: atn_45866_great-white-shark_trajectory_20090923-20091123.nc
  Downloading file: atn_45869_great-white-shark_trajectory_20090923-20091213.nc

Entering directory: https://www.ncei.noaa.gov/data/oceans/ioos/atn/cascadia_research_collective/
Accessing: https://www.ncei.noaa.gov/data/oceans/ioos/atn/cascadia_research_collective/
  Downloading file: atn_53631_false-killer-whale_trajectory_20100927-20101001.nc
  Downloading file: atn_53644_false-killer-whale_trajectory_20100927-20101118.nc
  Downloading file: atn_53652_false-killer-whale_trajector

## Create the DarwinCore Archive packages

In [7]:
import glob
import atn_nc_to_dwc

local_files = glob.glob('data\\src\\*.nc')#[:10]

#local_files = ['data\\src\\atn_38553_bearded-seal_trajectory_20110618-20120314.nc']
# For testing:
# local_files = ['data\\src\\atn_137491_spotted-seal_trajectory_20180418-20180526.nc',
#                'data\\src\\atn_137494_ribbon-seal_trajectory_20140426-20140426.nc',
#                'data\\src\\atn_38553_bearded-seal_trajectory_20110618-20120314.nc'
#              ]


# Convert the downloaded files to individual Darwin Core CSVs
atn_nc_to_dwc.convert_to_dwc_individual(local_files, df_map, output_dir="data/dwc")



--- 2. Starting Darwin Core Conversion (Individual Files) ---
Processing atn_131373_ribbon-seal_trajectory_20140428-20141213.nc...
Found 1215 records.
  Extracted 389 occurrences with valid locations.
  Extracted 179 occurrences to first row in hour.
  Saved data to 'data\dwc\atn_131373_ribbon-seal_trajectory_20140428-20141213\atn_131373_ribbon-seal_trajectory_20140428-20141213_occurrence.csv'
  Created 1 events.
  Saved data to data\dwc\atn_131373_ribbon-seal_trajectory_20140428-20141213\atn_131373_ribbon-seal_trajectory_20140428-20141213_event.csv
  Created 6 emofs.
  Saved data to data\dwc\atn_131373_ribbon-seal_trajectory_20140428-20141213\atn_131373_ribbon-seal_trajectory_20140428-20141213_emof.csv
  Meta XML has been written to 'c:\Users\Mathew.Biddle\Documents\GitProjects\atn2obis\atn2obis\data\dwc\atn_131373_ribbon-seal_trajectory_20140428-20141213\meta.xml'.
  EML metadata has been written to 'c:\Users\Mathew.Biddle\Documents\GitProjects\atn2obis\atn2obis\data\dwc\atn_131373_

## Test push data and metadata to the IPT

In [12]:
from dotenv import dotenv_values

config = dotenv_values(".env")

ipt_auth = {
    'email': config['IPT_ADMIN_EMAIL'],
    'password': config['IPT_PASSWORD'],
}

ipt_url = 'https://ipt-obis.gbif.us/'

ipt_session = atn_nc_to_dwc.open_ipt_session(ipt_auth, ipt_url)

packages =  glob.glob('data/dwc/*/*38553*.zip')

#packages[:5]

# for testing
# packages = ['data/dwc/atn_38553_bearded-seal_trajectory_20110618-20120314/atn_38553_bearded-seal_trajectory_20110618-20120314.zip',
#             'data/dwc/atn_137491_spotted-seal_trajectory_20180418-20180526/atn_137491_spotted-seal_trajectory_20180418-20180526.zip',
#             'data/dwc/atn_137494_ribbon-seal_trajectory_20140426-20140426/atn_137494_ribbon-seal_trajectory_20140426-20140426.zip']

for filepath in packages:

    projname = filepath.split("\\")[-1].replace(".zip","")

    # Create/refresh IPT project
    if atn_nc_to_dwc.check_if_project_exists(projname, ipt_url, ipt_session):
        ans = input(f"Project {projname} already exists. Do you want to refresh it? (y/n): ")
        if ans.lower() in ['y', 'yes']:
            atn_nc_to_dwc.refresh_ipt_project_files(projname, filepath, ipt_url, ipt_session)
            eml_file = "\\".join(filepath.split("\\")[:-1])+"/eml.xml"
            atn_nc_to_dwc.refresh_ipt_project_metadata(projname, eml_file, ipt_url, ipt_session)
    else:
        ans = input(f"Project {projname} does not already exist. Do you want to create it? (y/n): ")
        if ans.lower() in ['y', 'yes']:
            print(f"Creating new IPT project: {projname}")
            atn_nc_to_dwc.create_new_ipt_project(projname, filepath, ipt_url, ipt_session)
            
            
    new_publishing_org_name = "NOAA Integrated Ocean Observing System"
    ans = input(f"Do you want to change the publishing org to {new_publishing_org_name}? (y/n): ")
    if ans.lower() in ['y', 'yes']:
        print(f"Changing publishing organization to: {new_publishing_org_name}")
        atn_nc_to_dwc.change_publishing_org_ipt_project(projname, ipt_url, ipt_session, new_publishing_org_name)

    ##
    ans = input(f"Do you want to make the IPT project files public? (y/n): ")
    if ans.lower() in ['y', 'yes']:
        print(f"Making public IPT project: {projname}")
        atn_nc_to_dwc.make_public_ipt_project(projname, ipt_url, ipt_session) # this will make discoverable via OBIS due to rss feed harvest. 

    ans = input(f"Do you want to register the IPT project with GBIF? (y/n): ")
    if ans.lower() in ['y', 'yes']:
        print(f"Registering IPT project: {projname}")
        atn_nc_to_dwc.register_ipt_project(projname, ipt_url, ipt_session)

    ans = input(f"Do you want to publish the IPT project files? (y/n): ")
    if ans.lower() in ['y', 'yes']:
        publishing_notes = "Published using the atn2obis publishing script."
        print(f"Publishing IPT project: {projname}")
        atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)




    ## TODO add OBIS as a network

    # ans = input(f"Do you want to publish the IPT project files? (y/n): ")
    # if ans.lower() in ['y', 'yes']:
    #     publishing_notes = "Published using the IOOS ATN IPT publishing script."
    #     print(f"Publishing IPT project: {projname}")
    #     atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)


Found existing project by name: 'atn_38553_bearded-seal_trajectory_20110618-20120314'
Publication successful
Changing publishing organization to: NOAA Integrated Ocean Observing System
Making public IPT project: atn_38553_bearded-seal_trajectory_20110618-20120314
Registering IPT project: atn_38553_bearded-seal_trajectory_20110618-20120314
Publishing IPT project: atn_38553_bearded-seal_trajectory_20110618-20120314


In [6]:
projname.split('\\')[-1]

'atn_137490_spotted-seal_trajectory_20160414-20160414'

## Bulk push to OBIS-USA IPT

In [None]:
from dotenv import dotenv_values

config = dotenv_values(".env")

ipt_auth = {
    'email': config['IPT_ADMIN_EMAIL'],
    'password': config['IPT_PASSWORD'],
}

ipt_url = 'https://ipt-obis.gbif.us/'

ipt_session = atn_nc_to_dwc.open_ipt_session(ipt_auth, ipt_url)

# packages = ['data/dwc/atn_38553_bearded-seal_trajectory_20110618-20120314/atn_38553_bearded-seal_trajectory_20110618-20120314.zip',
#             'data/dwc/atn_137491_spotted-seal_trajectory_20180418-20180526/atn_137491_spotted-seal_trajectory_20180418-20180526.zip',
#             'data/dwc/atn_137494_ribbon-seal_trajectory_20140426-20140426/atn_137494_ribbon-seal_trajectory_20140426-20140426.zip']

packages = glob.glob('data/dwc/*/*.zip')

for filepath in packages:

    projname = filepath.split("\\")[-1].replace(".zip","")

    # Create/refresh IPT project
    if atn_nc_to_dwc.check_if_project_exists(projname, ipt_url, ipt_session):
        ans = input(f"Project {projname} already exists. Do you want to refresh it? (y/n): ")
        if ans.lower() in ['y', 'yes']:
            ans = input(f"Do you want to refresh the metadata? (y/n): ")
            if ans.lower() in ['y', 'yes']:
                eml_file = "\\".join(filepath.split("\\")[:-1])+"/eml.xml"
                atn_nc_to_dwc.refresh_ipt_project_metadata(projname, eml_file, ipt_url, ipt_session)
            else:
                continue
            ans = input(f"Do you want to refresh the project files? (y/n): (THIS UPLOADS EVERYTHING AS SOURCE DATA)")
            if ans.lower() in ['y', 'yes']:
                atn_nc_to_dwc.refresh_ipt_project_files(projname, filepath, ipt_url, ipt_session)
            else:
                continue
        else:
            continue
            
    else:
        #ans = input(f"Project {projname} does not already exist. Do you want to create it? (y/n): ")
        #if ans.lower() in ['y', 'yes']:
        print(f"Creating new IPT project: {projname}: {ipt_url}resource?r={projname}")
        atn_nc_to_dwc.create_new_ipt_project(projname, filepath, ipt_url, ipt_session)
            
            
    new_publishing_org_name = "NOAA Integrated Ocean Observing System"
    print(f"   Changing publishing organization to: {new_publishing_org_name}")
    atn_nc_to_dwc.change_publishing_org_ipt_project(projname, ipt_url, ipt_session, new_publishing_org_name)

    print(f"   Making public IPT project: {projname}")
    atn_nc_to_dwc.make_public_ipt_project(projname, ipt_url, ipt_session) # this will make discoverable via OBIS due to rss feed harvest. 

    publishing_notes = "   Published using the atn2obis publishing script."
    print(f"Publishing IPT project: {projname}: {publishing_notes}")
    atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)

    print(f"   Registering IPT project: {projname}")
    publishing_notes = "Registering with GBIF."
    atn_nc_to_dwc.register_ipt_project(projname, ipt_url, ipt_session)
    print(f"   Publishing IPT project: {projname}: {publishing_notes}")
    atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)

    print(f"   Adding IPT project to OBIS and biologging Networks: {projname}")
    id = "2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6" # OBIS
    atn_nc_to_dwc.add_network_ipt_project(projname, ipt_url, ipt_session, id)
    id = "ab013f3a-3c00-42cb-9fdb-cb5f4ba20a4b" # biologging
    atn_nc_to_dwc.add_network_ipt_project(projname, ipt_url, ipt_session, id)
    publishing_notes = "Added to OBIS and biologging network"
    atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)



Found existing project by name: 'atn_131373_ribbon-seal_trajectory_20140428-20141213'
Found existing project by name: 'atn_137487_ribbon-seal_trajectory_20140412-20140413'
Found existing project by name: 'atn_137490_spotted-seal_trajectory_20160414-20160414'
Found existing project by name: 'atn_137491_spotted-seal_trajectory_20180418-20180526'
Found existing project by name: 'atn_137494_ribbon-seal_trajectory_20140426-20140426'
No existing repository by this name: 'atn_137495_ribbon-seal_trajectory_20140426-20140427'
Creating new IPT project: atn_137495_ribbon-seal_trajectory_20140426-20140427: https://ipt-obis.gbif.us/resource?r=atn_137495_ribbon-seal_trajectory_20140426-20140427
data/dwc\atn_137495_ribbon-seal_trajectory_20140426-20140427 atn_137495_ribbon-seal_trajectory_20140426-20140427.zip
data/dwc\atn_137495_ribbon-seal_trajectory_20140426-20140427\atn_137495_ribbon-seal_trajectory_20140426-20140427.zip
   Changing publishing organization to: NOAA Integrated Ocean Observing Syst

In [11]:
glob.glob('data/dwc/*/*38553*.zip')

['data/dwc\\atn_38553_bearded-seal_trajectory_20110618-20120314\\atn_38553_bearded-seal_trajectory_20110618-20120314.zip']

In [1]:
from dotenv import dotenv_values
import atn_nc_to_dwc

config = dotenv_values(".env")

ipt_auth = {
    'email': config['IPT_ADMIN_EMAIL'],
    'password': config['IPT_PASSWORD'],
}

ipt_url = 'https://ipt-obis.gbif.us/'

ipt_session = atn_nc_to_dwc.open_ipt_session(ipt_auth, ipt_url)

def add_obis_network_ipt_project(projname: str, ipt_url: str, ipt_session):
    """
    Add the OBIS network to the given IPT project
    Author: Mathew Biddle
    :param projname: the project name as given by get_obis_shortname()
    :param ipt_url: URL of the IPT to publish to
    :param ipt_session: authenticated requests session for the IPT

    :return: URL of the resource
    """
    pub_params = {
            "r": projname, 
            "id": "ab013f3a-3c00-42cb-9fdb-cb5f4ba20a4b", #"2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6", # "Ocean Biodiversity Information System (OBIS)"
        }
    contents = ipt_session.post(
        ipt_url + "manage/resource-addNetwork.do", data=pub_params
    )
    
    return contents

projname="atn_137487_ribbon-seal_trajectory_20140412-20140413"

add_obis_network_ipt_project(projname, ipt_url, ipt_session)

projname="atn_131373_ribbon-seal_trajectory_20140428-20141213"

add_obis_network_ipt_project(projname, ipt_url, ipt_session)

projname="atn_137490_spotted-seal_trajectory_20160414-20160414"

add_obis_network_ipt_project(projname, ipt_url, ipt_session)

<Response [200]>

In [None]:
from dotenv import dotenv_values

config = dotenv_values(".env")

ipt_auth = {
    'email': config['IPT_ADMIN_EMAIL'],
    'password': config['IPT_PASSWORD'],
}

ipt_url = 'https://ipt-obis.gbif.us/'

ipt_session = atn_nc_to_dwc.open_ipt_session(ipt_auth, ipt_url)

projname="atn_131373_ribbon-seal_trajectory_20140428-20141213"

publishing_notes = "Added to OBIS network"
id = "2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6" # OBIS
atn_nc_to_dwc.add_network_ipt_project(projname, ipt_url, ipt_session, id)
id = "ab013f3a-3c00-42cb-9fdb-cb5f4ba20a4b" # biologging
atn_nc_to_dwc.add_network_ipt_project(projname, ipt_url, ipt_session, id)

atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)

<Response [200]>

In [18]:
from dotenv import dotenv_values
import atn_nc_to_dwc

config = dotenv_values(".env")

ipt_auth = {
    'email': config['IPT_ADMIN_EMAIL'],
    'password': config['IPT_PASSWORD'],
}

ipt_url = 'https://ipt-obis.gbif.us/'

ipt_session = atn_nc_to_dwc.open_ipt_session(ipt_auth, ipt_url)

id_2_fix = [
    85875,
85868,
137494,
64471,
74653,
65931,
144001,
99298,
64453,
99293,
58005,
137491,
64465,
74651,
99283,
]

for id in id_2_fix:
    print(id)
    filepath = glob.glob(f'data/dwc/*/*{id}*.zip')
    projname = filepath[0].split("\\")[-1].replace(".zip","")
    if atn_nc_to_dwc.check_if_project_exists(projname, ipt_url, ipt_session):
        print(projname)
        publishing_notes = "Registering with GBIF."
        atn_nc_to_dwc.register_ipt_project(projname, ipt_url, ipt_session)
        print(f"   Publishing IPT project: {projname}: {publishing_notes}")
        atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)
        
        print(f"   Adding IPT project to OBIS and biologging Networks: {projname}")
        id = "2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6" # OBIS
        atn_nc_to_dwc.add_network_ipt_project(projname, ipt_url, ipt_session, id)
        id = "ab013f3a-3c00-42cb-9fdb-cb5f4ba20a4b" # biologging
        atn_nc_to_dwc.add_network_ipt_project(projname, ipt_url, ipt_session, id)
        publishing_notes = "Added to OBIS and biologging network"
        atn_nc_to_dwc.publish_ipt_project(projname, ipt_url, ipt_session, publishing_notes)

85875
Found existing project by name: 'atn_85875_spotted-seal_trajectory_20090531-20090921'
atn_85875_spotted-seal_trajectory_20090531-20090921
   Publishing IPT project: atn_85875_spotted-seal_trajectory_20090531-20090921: Registering with GBIF.
   Adding IPT project to OBIS and biologging Networks: atn_85875_spotted-seal_trajectory_20090531-20090921
85868
Found existing project by name: 'atn_85868_ribbon-seal_trajectory_20090603-20100223'
atn_85868_ribbon-seal_trajectory_20090603-20100223
   Publishing IPT project: atn_85868_ribbon-seal_trajectory_20090603-20100223: Registering with GBIF.
   Adding IPT project to OBIS and biologging Networks: atn_85868_ribbon-seal_trajectory_20090603-20100223
137494
Found existing project by name: 'atn_137494_ribbon-seal_trajectory_20140426-20140426'
atn_137494_ribbon-seal_trajectory_20140426-20140426
   Publishing IPT project: atn_137494_ribbon-seal_trajectory_20140426-20140426: Registering with GBIF.
   Adding IPT project to OBIS and biologging Net