In [None]:
# import os
from IPython.display import JSON, display
from typing import Optional, List, Dict, Any

from md_python import MDClient, Experiment



client = MDClient()
health = client.health.check()

# Run and minimally test
assert isinstance(health, dict)
display(JSON(health, expanded=True))


## Metadata files




In [None]:
metadata_path = "/Users/giuseppeinfusini/wd/Data_for_upload_md/MD-format/Small_data"
experiment_design_filename = "experiment_design_COMBINED.csv"
sample_metadata_filename = "sample_metadata_COMBINED.csv"

In [None]:
import os
from md_python.models import ExperimentDesign, SampleMetadata

def load_experiment_design(dir_path: str, filename: str, delimiter: str = ",") -> ExperimentDesign:
    """Load ExperimentDesign from a CSV located at dir_path/filename."""
    return ExperimentDesign.from_csv(os.path.join(dir_path, filename), delimiter=delimiter)

def load_sample_metadata(dir_path: str, filename: str, delimiter: str = ",") -> SampleMetadata:
    return SampleMetadata.from_csv(os.path.join(dir_path, filename), delimiter=delimiter)

exp_design = load_experiment_design(metadata_path, experiment_design_filename)
sample_metadata = load_sample_metadata(metadata_path, sample_metadata_filename)
assert isinstance(exp_design, ExperimentDesign)
print(exp_design)  # brief preview via __str__
assert isinstance(sample_metadata, SampleMetadata)
print(sample_metadata)  # brief preview via __str__

## Create experiment

In [None]:

exp = Experiment(
    name="test_api_client_04",
    source="md_format",
    labelling_method="lfq",
    s3_bucket="md-development-test-data",
    s3_prefix="small_drc_api_test/",
    filenames=["proteomics_proteins_COMBINED.tsv", "proteomics_peptides_COMBINED.tsv"],
    experiment_design=exp_design,
    sample_metadata=sample_metadata,
)

# experiment_id = client.experiments.create(exp)
# print(experiment_id)
# assert isinstance(experiment_id, str) and len(experiment_id) > 0


In [None]:
# temp for developpment
experiment_id = "cbd62af2-19da-476d-8bb1-bda6b3823c73"

## Wait for experiment to complete

In [None]:
import time
from md_python import MDClient

def wait_for_experiment(client: MDClient, experiment_id: str, poll_s: int = 5, timeout_s: int = 1800):
    end = time.monotonic() + timeout_s
    last = None
    while time.monotonic() < end:
        exp = client.experiments.get_by_id(experiment_id)
        if exp.status != last:
            print(f"status={exp.status}")
            last = exp.status
        if exp.status.upper() in {"COMPLETED"}:
            return exp
        if exp.status.upper() in {"FAILED", "ERROR", "CANCELLED"}:
            return exp
        time.sleep(poll_s)
    raise TimeoutError(f"Experiment {experiment_id} not completed within {timeout_s}s")


In [None]:
final_exp = wait_for_experiment(client, experiment_id)

In [None]:
datasets = client.datasets.list_by_experiment(experiment_id=experiment_id)

In [None]:
dataset = [d for d in datasets if d.name == exp.name][0]
str(dataset.id)

In [None]:
from typing import List, Dict, Any
from uuid import UUID
from md_python import Dataset
from md_python.models import SampleMetadata

def create_pairwise_comparison_dataset(
    input_dataset_ids: List[str],
    dataset_name: str,
    sample_metadata: SampleMetadata,
    condition_column: str,
    condition_comparisons: List[List[str]],
    filter_valid_values_logic: str = "at least one condition", # ["all conditions", "at least one condition", "full experiment"]
    filter_values_criteria: Dict[str, Any] = {"method": "percentage", "filter_threshold_percentage": 0.5}, # 'count', 'filter_threshold_count'
    fit_separate_models: bool = True,
    limma_trend: bool = True,
    robust_empirical_bayes: bool = True,
    control_variables: List[Dict[str, str]] = None,
    entity_type: str = "protein",
    job_slug: str = "pairwise_comparison",
) -> Dataset:
    return Dataset(
        input_dataset_ids=[UUID(x) for x in input_dataset_ids],
        name=dataset_name,
        job_slug=job_slug,
        job_run_params={
            "condition_column": condition_column,
            "condition_comparisons": {"condition_comparison_pairs": condition_comparisons},
            "experiment_design": sample_metadata.to_columns(),
            "filter_valid_values_logic": filter_valid_values_logic,
            "filter_values_criteria": filter_values_criteria,
            "fit_separate_models": fit_separate_models,
            "limma_trend": limma_trend,
            "robust_empirical_bayes": robust_empirical_bayes,
            "control_variables": control_variables,
            "entity_type": entity_type,
        },
    )

In [None]:


pw_dataset = create_pairwise_comparison_dataset(
    input_dataset_ids=[str(dataset.id)],
    dataset_name="Pairwise test",
    sample_metadata=sample_metadata,
    condition_column="condition",
    condition_comparisons=[["md00001_a", "md00001_b"], ["md00001_a", "md00003_a"]],
    # optional params keep defaults...
)
dataset_id = client.datasets.create(pw_dataset)


In [None]:
dataset_id

In [None]:
dataset_id = "c48bb19c-2e0d-44e2-96c4-54448f2ab2fb"


In [None]:
import json
from collections.abc import Callable

def find_non_jsonable_in(obj, path="root"):
    bad = []
    if isinstance(obj, dict):
        for k, v in obj.items():
            bad += find_non_jsonable_in(v, f"{path}.{k}")
    elif isinstance(obj, list):
        for i, v in enumerate(obj):
            bad += find_non_jsonable_in(v, f"{path}[{i}]")
    else:
        try:
            json.dumps(obj)
        except TypeError:
            bad.append((path, type(obj).__name__, repr(obj)))
    return bad

def debug_dataset_payload(ds):
    payload = {
        "dataset": {
            "input_dataset_ids": [str(x) for x in ds.input_dataset_ids],
            "name": ds.name,
            "job_slug": ds.job_slug,
            "job_run_params": ds.job_run_params or {},
        }
    }
    problems = find_non_jsonable_in(payload)
    print("Non-JSONable entries:", problems)
    print(json.dumps(payload, indent=2, default=str)[:2000])
    return payload, problems

# Use:
payload, problems = debug_dataset_payload(pw_dataset)
assert not problems, "Fix these entries (likely a method without parentheses)"

In [None]:
{
    "condition_comparison_pairs": [
        [
            "Control",
            "Stage 1"
        ],
        [
            "Control",
            "Stage 3"
        ],
        [
            "Control",
            "Stage 5"
        ],
        [
            "Control",
            "Outlier"
        ]
    ]
}