COMBINATION GENERATION NOTEBOOK

This noteobook creates the combination from singleplex to multiplex.<br>
It is the starting point for the similarity measure concept:<br>
- First we have the singleplex data from the instrument.
- We then create all the possible combination based on the target we want to use.
- At the end we save a DataFrame with all the possible combinatio and name each MULTIPLEX.

# IMPORT LIBRARIES

In [2]:
import pandas as pd
import numpy as np
import itertools
from pathlib import Path

In [3]:
import python_libraries.data_loading as loadfunc
import python_libraries.utilities as utils

In [4]:
NMETA = 6

# LOAD DATA

## Select Data

In [5]:
singleplex_dict = {0: '20210706_01'}
DATA_TYPE = 'singleplex'

In [6]:
data_path = f'{Path.cwd().parent}/data'
data_selected_path = f'{data_path}/20210716_7_plex'

In [7]:
id_key = [0]
exp_id_list = utils.list_from_key(singleplex_dict, id_key)

df_raw_rb = loadfunc.load_processed_data_by_expid(data_selected_path, exp_id_list, 'raw_rb')
df_raw_rb.head(3)

Unnamed: 0,Channel,PrimerMix,Target,Assay,Conc,Exp_ID,1.0,2.0,3.0,4.0,...,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0,45.0
0,panel01,singleplex,ADE,ADE_HEX_03,100000.0,20210706_01,-0.013272,-0.013059,-0.010748,-0.007808,...,1.487606,1.512341,1.539635,1.56342,1.582187,1.600661,1.615349,1.629333,1.643214,1.654212
1,panel01,singleplex,ADE,ADE_HEX_03,100000.0,20210706_01,-0.013221,-0.011776,-0.009906,-0.007202,...,1.30137,1.34911,1.392268,1.428976,1.464463,1.495932,1.525779,1.552536,1.57069,1.580326
2,panel01,singleplex,ADE,ADE_HEX_03,100000.0,20210706_01,-0.012398,-0.012193,-0.010444,-0.007357,...,1.298298,1.346353,1.389671,1.428277,1.464866,1.494147,1.518763,1.544194,1.565675,1.579926


# Creating combinations

In [8]:
df_meta = df_raw_rb.iloc[:,:NMETA]
targets = np.unique(df_meta['Target'])
assay_dict = df_meta.groupby('Target')['Assay'].apply(list).to_dict()

for key, value in assay_dict.items():
    assay_dict[key] = list(np.unique(value))
    
assay_dict

{'ADE': ['ADE_HEX_03', 'ADE_HEX_06', 'ADE_HEX_09', 'ADE_HEX_12'],
 'C22': ['C22_N_01', 'C22_N_02'],
 'CHK': ['CHK_N_02', 'CHK_N_04', 'CHK_N_06', 'CHK_N_08'],
 'CNL': ['CNL_N_01', 'CNL_N_02', 'CNL_N_03', 'CNL_N_04'],
 'COC': ['COC_N_01', 'COC_N_02', 'COC_N_04'],
 'COV': ['COV_N_01', 'COV_N_02', 'COV_N_03'],
 'MER': ['MER_N_01', 'MER_N_02', 'MER_N_03', 'MER_N_04']}

In [9]:
assay_combos = list(itertools.product(*assay_dict.values()))
print(f'There are {len(assay_combos)} combinations!')

There are 4608 combinations!


In [29]:
ID_ZERO_BEFORE = 2  # how many zero before the actual number of the multiplex ID

df_assay_match = pd.DataFrame(assay_combos, columns=list(assay_dict.keys()))

labels = []
for i in df_assay_match.index:
    labels.append(f'PM{len(targets)}.{str(i+1).zfill(ID_ZERO_BEFORE)}')

df_assay_match['Label'] = labels
df_assay_match = df_assay_match.set_index('Label')

In [30]:
# Saving
df_assay_match.to_csv(f'{data_selected_path}/selected_combinations/assay_combinations_all.csv')