## Extract all Metadata for Broad IDs Assayed with Cell Painting

In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import os
import pathlib
import pandas as pd

<IPython.core.display.Javascript object>

In [3]:
all_platemaps = list()

batch = "2016_04_01_a549_48hr_batch1"
platemap_dir = pathlib.Path(batch, "platemap")

for platemap_file in os.listdir(platemap_dir):

    # Load platemap
    platemap = platemap_file.strip(".txt")
    platemap_df = pd.read_csv(os.path.join(platemap_dir, platemap_file), sep="\t")

    assert platemap == platemap_df.plate_map_name.unique()[0]

    # Process platemap
    platemap_df = platemap_df.assign(
        broad_id=platemap_df.broad_sample.str.extract(r"(BRD[-N][A-Z0-9]+)")
    )

    platemap_df = (
        platemap_df.loc[:, ["broad_sample", "broad_id", "plate_map_name", "solvent"]]
        .drop_duplicates()
        .reset_index(drop=True)
    )

    all_platemaps.append(platemap_df)

<IPython.core.display.Javascript object>

In [4]:
# Combine platemap info
all_platemap_df = pd.concat(all_platemaps, axis="rows").drop_duplicates()

# Output file
output_file = "broad_sample_info.tsv"
all_platemap_df.to_csv(output_file, index=False, sep="\t")

print(all_platemap_df.shape)
all_platemap_df.head()

(1652, 4)


Unnamed: 0,broad_sample,broad_id,plate_map_name,solvent
0,,,C-7161-01-LM6-018,DMSO
1,BRD-A62035778-004-03-6,BRD-A62035778,C-7161-01-LM6-018,DMSO
2,BRD-K62196610-001-01-6,BRD-K62196610,C-7161-01-LM6-018,DMSO
3,BRD-K62277907-001-01-6,BRD-K62277907,C-7161-01-LM6-018,DMSO
4,BRD-A62071884-001-04-6,BRD-A62071884,C-7161-01-LM6-018,DMSO


<IPython.core.display.Javascript object>