### Before you start: 

- Get a Crucible APIkey from https://crucible.lbl.gov/api/v1/user_apikey
- pip install mfid
- pip install git+https://github.com/MolecularFoundryCrucible/pycrucible

The notebook below assumes you have not configured Crucible and will read information from a .env file to authenticate the user.  To use this setup create a .env file in this directory with the variable crucible_apikey set to your API key.  Alternatively run the following command in your terminal and follow the on screen prompts to configure your Crucible set up!

```
crucible config init
```


#### Set Up

In [None]:
import os
from pprint import pprint
from dotenv import load_dotenv
from crucible.models import BaseDataset
from crucible import CrucibleClient

In [None]:
load_dotenv()
crucible_url = 'https://crucible.lbl.gov/api/v1'
crucible_apikey = os.environ.get('crucible_apikey')
client = CrucibleClient(crucible_url, crucible_apikey)

#### Create a project

In [None]:
client.get_or_add_project(project_id = 'DEMO-0001', organization = 'Molecular Foundry', project_lead_email = 'mkwall@lbl.gov')

#### Add Existing Crucible Users to the Project
**this currently requires an admin API key**

In [None]:
fabrice_orcid = '0000-0001-6402-3752'
client.add_user_to_project(fabrice_orcid, 'DEMO-0001')

#### Add New Crucible Users 
**this currently requires an admin API key**

In [None]:
# Fill out User Info 
user_info = {"first_name":"Morgan",
             "last_name":"Wall",
             "email":"xxx@lbl.gov",
             #"lbl_email":'', # optional
             "orcid":"0000-XXXX-XXXX-XXXX",
             #"employee_number":"", # optional
             "projects":['DEMO-0001']}

### Create a new dataset

In [None]:
help(BaseDataset)

In [None]:
data_files = ['./test-data/0sdazahr0nxh300075jj73j2kg_240119_144139_hyperspec_picam_mcl.h5']
dsid, _ = mfid.mfid()
my_dataset = BaseDataset(unique_id=dsid,
                         dataset_name='TEST - Dataset with File',
                         owner_orcid='0009-0001-9493-2006',
                         project_id='DEMO-0001',                    
                         instrument_name = 'hip_microscope',
                         measurement = 'hyperspectral_image', 
                         session_name = '2026-02-19 demo',
                         data_format = 'h5',
                         public = False)

result = client.create_new_dataset_from_files(
    dataset = my_dataset,
    files_to_upload= data_files,
    scientific_metadata= {'notes': 'this is a test dataset we keep reusing'}, # this can be any nested serializable dictionary
    keywords= ['test'],
    ingestor= 'HyperspecScopeFoundryH5Ingestor',  # Optional: specify ingestion class
    wait_for_ingestion_response=True
)

dsid = result['created_record']['unique_id']
print(f"Created dataset with file: {dsid}")
print(f"Ingestion status: {result['ingestion_request']['status']}")

### Create a new sample

In [None]:
# Create a new sample
creation_date_isoformat = datetime.strptime('1/16/2026', '%m/%d/%Y').isoformat()
client.add_sample(sample_name = 'TEST339', 
                  description = 'Example: Au Nanoparticles Batch 42',
                  creation_date = creation_date_isoformat,
                  owner_orcid = '0009-0001-9493-2006', 
                  project_id = 'DEMO-00001')


sample_id = sample['unique_id']
print(f"Created sample: {sample_id}")

In [None]:
client.get_sample(sample_id)

### Link the dataset to the sample

In [None]:
dataset_id = dsid
sample_id = sample_id

link = client.add_sample_to_dataset(dataset_id, sample_id)

In [None]:
client.get_sample(sample_id)

### Link the dataset to another dataset

In [None]:
derived_dsid = '' # assuming you processed the data in some way and uploaded the results as a new dataset with a new mfid
#client.link_datasets(parent_dataset_id = dsid, child_dataset_id = derived_dsid)

help(client.link_datasets)

### Link the sample to another sample

In [None]:
help(client.link_samples)

### Find your datasets

In [None]:
# List datasets 
datasets = client.list_datasets(limit=1000)
print(f"Found {len(datasets)} datasets")
print(f"\nFirst dataset: {datasets[0]['dataset_name']}")

##### Filter the results

In [None]:
# Filter by keyword
keyword_datasets = client.list_datasets(keyword='tem', limit=5)
print(f"Datasets with keyword 'tem': {len(keyword_datasets)}")

# Filter by instrument
instrument_datasets = client.list_datasets(instrument_name='titanx', limit=5)
print(f"Datasets from 'titanx' instrument: {len(instrument_datasets)}")

# Filter by owner ORCID
owner_datasets = client.list_datasets(owner_orcid='0009-0001-9493-2006', limit=5)
print(f"Datasets by owner: {len(owner_datasets)}")

# Combine multiple filters
filtered = client.list_datasets(keyword='tem', instrument_name='titanx', limit=5)
print(f"Datasets matching multiple filters: {len(filtered)}")

##### List with scientific metadata included

In [None]:
datasets = client.list_datasets(include_metadata = True)
pprint(datasets[0])

### Find your samples

In [None]:
client.list_samples(project_id = 'DEMO-00001')

In [None]:
client.list_parents_of_sample(child_sample_id)

In [None]:
client.list_children_of_sample(parent_sample_id)

### Downloading Data

Generate Signed URLs for each file in a dataset to access the file from Google Cloud Storage
- URLs are valid for one hour
- URLs can be shared; anyone with the link will be able to download the file

In [None]:
client.get_dataset_download_links('0tb1m9nvqsvff000qmzk6bkp1c')

Download files from a dataset
- Specify the dataset ID for which you want the files
- Optionally:
   - specify a singular file to download - regular expressions and wildcards can be used to avoid providing the full path (default is all the files in the dataset)
   - specify an output directory (default is ```current-working-directory/crucible-downloads```)
   - set overwrite_existing to False if you want to redownload files that you already have locally

In [None]:
client.download_dataset('0tb1m9nvqsvff000qmzk6bkp1c')

In [None]:
client.download_dataset('0tb1m9nvqsvff000qmzk6bkp1c', file_name = '.*2025-12-17_TRAY5_6.jpg')

In [None]:
client.download_dataset('0tb1m9nvqsvff000qmzk6bkp1c', file_name = '.*2025-12-17_TRAY5_6.jpg', output_dir = 'test-outputdir')