In [None]:
import os
from IPython.display import JSON, display
from typing import Optional, List, Dict, Any


from md_python import MDClient, Experiment
from md_python import ExperimentDesign, SampleMetadata

from md_python import PairwiseComparisonDataset

from dotenv import load_dotenv
load_dotenv()

client = MDClient()
health = client.health.check()

# Run and minimally test
assert isinstance(health, dict)
display(JSON(health, expanded=True))


## Parameters blocks

#### Experiment parameters



In [None]:

uploaded_dataset_name = "test_api_e2e_script_3" # this is the name of the uploaded dataset from Spectronaut data
source = "md_format" 
labelling_method = "lfq"
s3_bucket =  os.getenv("MD_S3_BUCKET") if os.getenv("MD_S3_BUCKET") else "s3_bucket"
s3_prefix = os.getenv("MD_S3_KEY") if os.getenv("MD_S3_KEY") else "s3_prefix/" # keep the trailing slash!!!


#### Cunstom Workflow parameters


#### Pairwise comparison parameters



In [None]:
sample_metadata_column = "condition" # the column in the sample metadata that contains the conditions to be compared
control_condition = "md00001_a" # the condition to be used as control

pairwise_dataset_name = "Pairwise_e2e_script-05"

# Otther parameters


## Metadata files

Metadata files must be stored in a local directory.
For LFQ experiments, the experiment design and sample metadata can be combined into a single file, provided that the columns "filename", "sample_name", and "condition" are included.

The files_to_upload variable is a list of files that have already been uploaded to s3_bucket/s3_key/ and are expected to be picked up by the API. These files typically include the Spectronaut output.

In [None]:
metadata_path = "/Users/giuseppeinfusini/wd/Data_for_upload_md/MD-format/Small_data"
experiment_design_filename = "experiment_design_COMBINED.csv"
sample_metadata_filename = "experiment_design_COMBINED.csv"

# S3 bucket files
files_to_upload = [
    "proteomics_proteins_COMBINED.tsv",
    "proteomics_peptides_COMBINED.tsv",
]

In [None]:

def load_experiment_design(dir_path: str, filename: str, delimiter: str = ",") -> ExperimentDesign:
    """Load ExperimentDesign from a CSV located at dir_path/filename."""
    return ExperimentDesign.from_csv(os.path.join(dir_path, filename), delimiter=delimiter)

def load_sample_metadata(dir_path: str, filename: str, delimiter: str = ",") -> SampleMetadata:
    return SampleMetadata.from_csv(os.path.join(dir_path, filename), delimiter=delimiter)

exp_design = load_experiment_design(metadata_path, experiment_design_filename)
sample_metadata = load_sample_metadata(metadata_path, sample_metadata_filename)
assert isinstance(exp_design, ExperimentDesign)
print(exp_design)  # brief preview via __str__
assert isinstance(sample_metadata, SampleMetadata)
print(sample_metadata)  # brief preview via __str__

## Create experiment

In [None]:
exp = Experiment(
    name=uploaded_dataset_name,
    source=source,
    labelling_method=labelling_method,
    s3_bucket=s3_bucket,
    s3_prefix=s3_prefix,
    filenames=files_to_upload,
    experiment_design=exp_design,
    sample_metadata=sample_metadata,
)

# experiment_id = client.experiments.create(exp)
# print(experiment_id)
# assert isinstance(experiment_id, str) and len(experiment_id) > 0


In [None]:
exp_design

In [None]:
experiment_id = "5de39b17-fe8b-4924-ab12-95a74943c223"

## Wait for experiment to complete

In [None]:
completed_experiment = client.experiments.wait_until_complete(experiment_id)

## Pairwise comparison


#### Find the initial intensity dataset.

In [None]:
dataset = client.datasets.find_initial_dataset(experiment_id)
dataset

#### Define pairwise comparisons by selecting a control.


In [None]:
comparisons = sample_metadata.pairwise_vs_control(column=sample_metadata_column, control=control_condition)
comparisons


In [None]:
print(PairwiseComparisonDataset.help())

In [None]:
pw = PairwiseComparisonDataset(
    input_dataset_ids=[str(dataset.id)],
    dataset_name=pairwise_dataset_name,
    sample_metadata=sample_metadata,
    condition_column=sample_metadata_column,
    condition_comparisons=comparisons,
    control_variables={'control_variables': [{"column": "dose", "type": "numerical"}]}
)
dataset_id = pw.run(client)

In [None]:
dataset_id

In [None]:
state = client.datasets.wait_until_complete(
    experiment_id=experiment_id,
    dataset_id=dataset_id,
)
state