# cm967's Fluorophore Set Orthogonality Evaluator (FSOE) 
author: Camillo Moschner | start: Jan 2021 | last modified: 18.01.2023 (polished & uploaded to thesis GitHub)

## Import Statements

In [57]:
import numpy as np
import pandas as pd
import os
from itertools import product, combinations, chain
from IPython.display import clear_output, Audio
import re
import math
import random
import pickle

from joblib import Parallel, delayed
from tqdm.notebook import tqdm
import seaborn as sns
from ipywidgets import interact

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.patches as patches

## Function Definitions

In [2]:
def wavelength_to_rgb(wavelength, gamma=0.8):
    ''' taken from http://www.noah.org/wiki/Wavelength_to_RGB_in_Python
    This converts a given wavelength of light to an approximate RGB color value. The wavelength must be given
    in nanometers in the range from 380 nm to 750 nm(789 THz through 400 THz). Additionally alpha value set to 
    0.5 outside range. Based on code by Dan Bruton http://www.physics.sfasu.edu/astro/color/spectra.html
    '''
    wavelength = float(wavelength)
    if wavelength >= 380 and wavelength <= 750:
        A = 1.
    else:
        A=0.2
    if wavelength < 380:
        wavelength = 380.
    if wavelength >750:
        wavelength = 750.
    if wavelength >= 380 and wavelength <= 440:
        attenuation = 0.3 + 0.7 * (wavelength - 380) / (440 - 380)
        R = ((-(wavelength - 440) / (440 - 380)) * attenuation) ** gamma
        G, B = 0.0, (1.0 * attenuation) ** gamma
    elif wavelength >= 440 and wavelength <= 490:
        R = 0.0
        G = ((wavelength - 440) / (490 - 440)) ** gamma
        B = 1.0
    elif wavelength >= 490 and wavelength <= 510:
        R, G = 0.0, 1.0
        B = (-(wavelength - 510) / (510 - 490)) ** gamma
    elif wavelength >= 510 and wavelength <= 580:
        R = ((wavelength - 510) / (580 - 510)) ** gamma
        G, B = 1.0, 0.0
    elif wavelength >= 580 and wavelength <= 645:
        R = 1.0
        G = (-(wavelength - 645) / (645 - 580)) ** gamma
        B = 0.0
    elif wavelength >= 645 and wavelength <= 750:
        attenuation = 0.3 + 0.7 * (750 - wavelength) / (750 - 645)
        R = (1.0 * attenuation) ** gamma
        G, B = 0.0, 0.0
    else:
        R, G, B = 0.0, 0.0, 0.0
    return (R,G,B,A)

# special colourmaps
YFP_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["black","yellow"])
CFP_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["black","cyan"])
RFP_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["black","red"])
GFP_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["black","lime"])
BFP_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["black","dodgerblue"])
Cy5_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["black","darkmagenta"])
ATTO550_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["black","coral"])

def plot_spectrum(axis, df, column_name, f_type=''):
    if ('em' in column_name) or ('EM' in column_name):
        line = '-'
    elif ('ex' in column_name) or ('EX' in column_name):
        line = '--'
    elif ('ab' in column_name) or ('AB' in column_name):
        line = '--'
        column_name = column_name.split(' ')+' ab'
    try:
        fluorophore_data = df[column_name]
    except:
        column_name = column_name.split(' ')[0] + ' ab'
        fluorophore_data = df[column_name]
    fluorophore_data = fluorophore_data.loc[pd.notna(fluorophore_data)]
    axis.plot(fluorophore_data.index, fluorophore_data, linestyle=line, 
              color=wavelength_to_rgb(df.index[df[column_name] == df[column_name].max()].tolist()[0],gamma=1.8),
              label = f"{column_name} ({f_type})")

# Load Files

In [3]:
fp_spectra_df = pd.read_csv(f"spectra_data{os.path.sep}all_fp_spectra.csv",index_col=0)
print(f"fp_spectra_df contains data located in a range of {len(fp_spectra_df)} nm (visible spectrum of light = 500 nm).")
avail_fluorophore_spectra_df = pd.read_csv(f"spectra_data{os.path.sep}avail_fluorophore_spectra.csv",index_col=0)
avail_nuclear_dye_spectra_df = pd.read_csv(f"spectra_data{os.path.sep}nuclear_dye_spectra.csv",index_col=0)
# LEDs
LED_list = [365,440,488, 514,561,594, 640,730]
all_LED_data_df = pd.read_csv(f"spectra_data{os.path.sep}all_LED_data_SynBioLab.csv",index_col=0)
LED_active_lambda_df = pd.read_csv(f"spectra_data{os.path.sep}LED_active_lambda_SynBioLab.csv",index_col=0)
LED_active_lambda_df.lambda_range = LED_active_lambda_df.lambda_range.apply(lambda cell: [int(x) for x in re.findall(r'\d{3}', cell)])
# Dichroic Cubes
CFP_YFP_mCherry_A_filter_cube_data = pd.read_csv(f"spectra_data{os.path.sep}LED-CFP_YFP_mCherry-A-000 Filter Cube (32mm).csv",index_col=0)
DA_FI_TR_Cy5_B_filter_cube_data = pd.read_csv(f"spectra_data{os.path.sep}LED-DA_FI_TR_Cy5-B Filter Cube (32mm).csv",index_col=0)
dichroic_dic = {'CFP_YFP_mCherry': CFP_YFP_mCherry_A_filter_cube_data, 
                'DA_FI_TR_Cy5_B' : DA_FI_TR_Cy5_B_filter_cube_data}
# Emission Filters
em_filter_list = [{'center':435,'width':26}, {'center':475,'width':20},
                  {'center':515,'width':30}, {'center':540,'width':21},
                  {'center':595,'width':40}, {'center':632,'width':60},
                  {'center':705,'width':72}]            

fp_spectra_df contains data located in a range of 1400 nm (visible spectrum of light = 500 nm).


# Plot w/ Lists

In [4]:
fp_list = ['EBFP',
   'T-Sapphire',
   'iFP1.4',
   'mTangerine',
   'mGarnet2',
   'mKillerOrange',
   'efasCFP' #,'Superfolder GFP', 'mCerulean'
          ]
chem_fluorophore_list = [# '6-FAM'
                         ]
nuclear_dye_list = [] # 'DAPI'
all_fluorophores = fp_list + chem_fluorophore_list + nuclear_dye_list
all_fluorophores_df = pd.concat([fp_spectra_df, avail_fluorophore_spectra_df, avail_nuclear_dye_spectra_df])
# Quick check that you have data for all fluorophores you want to investigate
result = any( [ (f"{fluo} ex" in all_fluorophores_df.columns) for fluo in all_fluorophores] )
print(f" -> Is data available for all fluorophores to investigate? : { result }")

 -> Is data available for all fluorophores to investigate? : True


---
# FP Investigation

## Orthogonal fluorophore identification

### 0. Final FP filter

In [5]:
fp_spectra_df.drop([x for x in fp_spectra_df if any(
    ['On' in x, 
     'OFF' in x, 
     '(Green' in x, 
     '(Red)' in x,
     '(Off)' in x,
     '(off)' in x,
     '(on)' in x,
     '(Far-red' in x,
     '(Orange)' in x, 
     '(CyRFP1)' in x, 
     '(calcium saturated)' in x, 
     '(In presence of Ca2+)' in x, 
     '(pre-conversion)' in x, 
     '(post-conversion)' in x, 
     '(Early)' in x, 
     '(Late)' in x, 
     '(calcium free)' in x, 
     '(calcium saturated)' in x, 
     '2p' in x, 
     '(Pr)' in x, 
     '(Fret)' in x, 
     '(Red' in x, 
     'A44-KR' in x,
     'Montipora' in x
                                 ])]
,axis=1,inplace=True)

In [6]:
# Problem: excitation can be given in terms of exciation OR and AND emission
# Solution: remove fps that don't have both 1) an ex or ab label, and 2) an em label
# 1- Calculate names of each category
ex_fps = [x[:-3] for x in fp_spectra_df if 'ex' in x]
ab_fps = [x[:-3] for x in fp_spectra_df if 'ab' in x]
em_fps = [x[:-3] for x in fp_spectra_df if 'em' in x]
print(f"ex_fps: {len(ex_fps)} & ab_fps:{len(ab_fps)}\nem_fps: {len(em_fps)}")
# 2- Use sets to identify a) all ex, b) choose ex if both ex and ab are available, c) choose ab if only ab available
choose_ex_if_both_fps = [f"{fp} ex" for fp in set(ab_fps).intersection(set(ex_fps))]
choose_ab = [f"{fp} ab" for fp in set(ab_fps).difference(set(ex_fps))]
choose_ex = [f"{fp} ex" for fp in set(ex_fps).difference(set(ab_fps))]
# 3- Combine all excitation FPs in one list
all_excitation_fps = list(choose_ex) + list(choose_ex_if_both_fps) + list(choose_ab)
print(f"Total unique excitation fp data available: {len(all_excitation_fps)}")
# 4- Ensure you have excitation and emission data for each fp by identifying the ones who only have one
# and remove them from the total list
for fp_remove in set([x[:-3] for x in all_excitation_fps]).difference(set(em_fps)):
    print(f"Removing: {fp_remove} ab")
    all_excitation_fps.remove(f"{fp_remove} ab")
# 5- combine excitation and emission fps back together
usable_fps = all_excitation_fps + [f"{fp} em" for fp in em_fps]
# 6- Select only usable FP data from the original, reduced FP spectral dataset
fp_spectra_df = fp_spectra_df[usable_fps]
len(all_excitation_fps), len([x[:-3] for x in fp_spectra_df if 'em' in x])

ex_fps: 189 & ab_fps:49
em_fps: 218
Total unique excitation fp data available: 220
Removing: AausFP3 ab
Removing: AausFP2 ab


(218, 218)

### 1. nth Optimal LED & EM_filter / FP

In [7]:
def EX_spectrum(fluoro_name,spectra_df):
    try:
        return spectra_df[f"{fluoro_name} ex"]
    except:
        return spectra_df[f"{fluoro_name} ab"]
def EM_spectrum(fluoro_name,spectra_df):
    return spectra_df[f"{fluoro_name} em"]
# ----------------------------------------------------------------------------------------------------------------------------------------------

def fluorophore_EX_values_for_LEDs_df(fluoro_name,spectra_df, LED_data_df_l, LED_list_l):
    """ Calculate all LEDs' excitation integral for a given fluorophore
    """
    EX_val_dic = {}
    for idx, LED_name in enumerate(LED_data_df_l.columns):#[all_LED_data_df.notna()]
        mask = LED_data_df_l[LED_name].notna()
        # Set LED intensity value threshold for OFF/ON cut (simplification)
        LED_lambda_ON = list(set(LED_data_df_l[LED_name].loc[LED_data_df_l[LED_name] > 3].index.astype(int)))
        EX_val_dic[idx] = {'LED': int(LED_list_l[idx]), 
                           'LED_name' : LED_name,
                            fluoro_name : EX_spectrum(fluoro_name, spectra_df)[LED_lambda_ON].sum()
                          } # 'excitation_integral'
    return pd.DataFrame(EX_val_dic).T.set_index(['LED'])

def fluoros_EM_filter_df(fluoro_name,spectra_df, em_filter_list_l):
    """ Calculate all EM_filters' passthrough integral for a given fluorophore
    """
    fluoro_EM_filter_data = []
    for em_filter in em_filter_list_l:
        em_filter_start = int(em_filter['center']-em_filter['width']/2)
        em_filter_end = int(em_filter['center']+em_filter['width']/2)
        fluoro_EM_filter_data.append((em_filter['center'],EM_spectrum(fluoro_name,fp_spectra_df).loc[em_filter_start:em_filter_end].sum()))
    return pd.DataFrame(fluoro_EM_filter_data,columns=['EM_filter',fluoro_name]).set_index('EM_filter')
# ----------------------------------------------------------------------------------------------------------------------------------------------

def best_nth_LED_for_FP(fluoro_name,spectra_df, LED_active_lambda_df_l, LED_list_l, n_best=0):
    """ Identify (1st, 2nd, 3rd, ..., nth) best LED for a given fluorophore
    """
    current_fp_EX = EX_spectrum(fluoro_name,fp_spectra_df)
    ranges = [x for x in LED_active_lambda_df.lambda_range]
    local_LED_data = []
    for idx, LED_range in enumerate(ranges):
        local_LED_data.append((current_fp_EX.loc[LED_range].sum()))
    results_df = pd.DataFrame(local_LED_data)
    results_df.index = LED_list
    results_df.sort_values(by=0, ascending=False, inplace=True)
    return (results_df.index[n_best], round(results_df[0].iloc[n_best],4))

def best_nth_EMfilter_for_FP(fluoro_name, spectra_df, em_filter_list_l, n_best=0):
    """ Identify (1st, 2nd, 3rd, ..., nth) best EM_filter for a given fluorophore
    """
    current_EM_data = fluoros_EM_filter_df(fluoro_name,spectra_df, em_filter_list_l).sort_values(by=fluoro_name, ascending=False)
    return (current_EM_data.index[n_best], round(current_EM_data[fluoro_name].iloc[n_best],4))
# ----------------------------------------------------------------------------------------------------------------------------------------------

def slice_into_chunks(seq_l, chunk_length):
    """
    """
    return [seq_l[i:i+chunk_length] for i in range(0, len(seq_l), chunk_length)]

#### a.) All FPs' 1st & 2nd best LED

In [8]:
best_LEDs = {}
for idx, name in enumerate(fp_spectra_df.columns):
    if all(['em' in name]):
        results = best_nth_LED_for_FP(name[:-3],fp_spectra_df, LED_active_lambda_df, LED_list, n_best=0)
        best_LEDs[name[:-3]] = {'idx': idx,
                                      'best_LED' : int(results[0]),
                                      'best_LED_exc_val' : results[1]
                                     }
secondbest_LEDs = {}
for idx, name in enumerate(fp_spectra_df.columns):
    if all(['em' in name]):
        results = best_nth_LED_for_FP(name[:-3],fp_spectra_df, LED_active_lambda_df, LED_list, n_best=1)
        secondbest_LEDs[name[:-3]] = {'idx': idx,
                                      'secondbest_LED' : int(results[0]),
                                      'secondbest_LED_exc_val' : results[1]
                                     }
twobest_LEDs_df = pd.concat([pd.DataFrame(best_LEDs),pd.DataFrame(secondbest_LEDs).drop(['idx'])])
twobest_LEDs_df

Unnamed: 0,Aquamarine,Citrine,CyOFP1,CyPet,DsRed,EBFP,EBFP2,ECFP,EGFP,EYFP,...,Lumazine binding protein,amFP486,dTFP0.2,meleCFP,meffCFP,efasCFP,dsFP483,KCY-G4219,SuperNova2 (SuperNova2),FusionRed-MQV
idx,218.0,219.0,220.0,221.0,222.0,223.0,224.0,225.0,226.0,227.0,...,426.0,427.0,428.0,429.0,430.0,431.0,432.0,433.0,434.0,435.0
best_LED,440.0,514.0,488.0,440.0,561.0,365.0,365.0,440.0,488.0,514.0,...,365.0,440.0,440.0,440.0,488.0,440.0,440.0,440.0,594.0,561.0
best_LED_exc_val,19.8527,17.6806,24.998,20.5305,20.6072,20.7553,22.5845,20.5305,25.7896,18.0951,...,16.3909,19.6523,18.3897,20.4093,17.3351,17.8755,19.4192,19.157,25.7916,20.6692
secondbest_LED,365.0,488.0,514.0,488.0,514.0,440.0,440.0,488.0,440.0,488.0,...,440.0,488.0,488.0,488.0,440.0,488.0,365.0,488.0,561.0,594.0
secondbest_LED_exc_val,9.379,7.1625,21.7333,8.6867,14.9961,0.0,0.1482,8.6867,7.0682,7.8926,...,14.1555,10.6064,15.8367,9.9529,15.7538,16.3736,12.7849,12.9615,20.1491,20.0415


In [9]:
# calculate all fluorophores' EXCITATION data at all EM_filters 
FPs_excitationdata_to_LEDs = []
for fp_name in tqdm([name[:-3] for name in fp_spectra_df.columns if 'em' in name]):
    FPs_excitationdata_to_LEDs.append(fluorophore_EX_values_for_LEDs_df(fp_name,fp_spectra_df, all_LED_data_df, LED_list).drop(['LED_name'],axis=1))
FPs_excitationdata_to_LEDs_df = pd.concat(FPs_excitationdata_to_LEDs,axis=1).T
FPs_excitationdata_to_LEDs_df = FPs_excitationdata_to_LEDs_df.sort_values(by=FPs_excitationdata_to_LEDs_df.columns.to_list(), ascending=False).T

  0%|          | 0/218 [00:00<?, ?it/s]

#### b.) All FPs' 1st & 2nd best EM filter

In [10]:
best_EM_filters = {}
for idx, name in enumerate(fp_spectra_df.columns):
    if all(['em' in name]):
        results = best_nth_EMfilter_for_FP(name[:-3], fp_spectra_df, em_filter_list, n_best=0)
        best_EM_filters[name[:-3]] = {'idx': idx,
                                      'best_EM_filter' : int(results[0]),
                                      'best_EM_passthrough_val' : results[1]
                                     }
secondbest_EM_filters = {}
for idx, name in enumerate(fp_spectra_df.columns):
    if all(['em' in name]):
        results = best_nth_EMfilter_for_FP(name[:-3], fp_spectra_df, em_filter_list, n_best=1)
        secondbest_EM_filters[name[:-3]] = {'idx': idx,
                                      'secondbest_EM_filter' : int(results[0]),
                                      'secondbest_EM_passthrough_val' : results[1]
                                     }
twobest_EM_filters_df = pd.concat([pd.DataFrame(best_EM_filters),pd.DataFrame(secondbest_EM_filters).drop(['idx'])])
twobest_EM_filters_df

Unnamed: 0,Aquamarine,Citrine,CyOFP1,CyPet,DsRed,EBFP,EBFP2,ECFP,EGFP,EYFP,...,Lumazine binding protein,amFP486,dTFP0.2,meleCFP,meffCFP,efasCFP,dsFP483,KCY-G4219,SuperNova2 (SuperNova2),FusionRed-MQV
idx,218.0,219.0,220.0,221.0,222.0,223.0,224.0,225.0,226.0,227.0,...,426.0,427.0,428.0,429.0,430.0,431.0,432.0,433.0,434.0,435.0
best_EM_filter,475.0,540.0,595.0,515.0,595.0,435.0,435.0,515.0,515.0,540.0,...,475.0,515.0,515.0,515.0,515.0,515.0,515.0,515.0,632.0,595.0
best_EM_passthrough_val,18.8489,16.2004,37.9134,22.027,29.3436,25.2175,21.3354,22.027,24.0535,16.2004,...,20.1995,23.2874,21.1893,20.7332,20.5995,20.0058,21.8632,20.8345,41.3502,31.3352
secondbest_EM_filter,515.0,515.0,632.0,475.0,632.0,475.0,475.0,475.0,540.0,515.0,...,515.0,475.0,475.0,475.0,475.0,475.0,475.0,475.0,595.0,632.0
secondbest_EM_passthrough_val,15.1479,13.3973,30.86,18.4601,18.3467,11.1299,13.3635,18.4601,8.4748,13.3973,...,14.4264,13.1796,11.5636,14.8189,9.9187,10.6741,14.86,12.6209,28.1231,18.267


In [11]:
twobest_LEDs_df['LSSmOrange']

idx                       301.0000
best_LED                  440.0000
best_LED_exc_val           20.4844
secondbest_LED            365.0000
secondbest_LED_exc_val     12.3130
Name: LSSmOrange, dtype: float64

In [12]:
twobest_EM_filters_df['LSSmOrange']

idx                              301.0000
best_EM_filter                   595.0000
best_EM_passthrough_val           27.3516
secondbest_EM_filter             632.0000
secondbest_EM_passthrough_val     19.3081
Name: LSSmOrange, dtype: float64

In [13]:
# calculate all fluorophores' EXCITATION data at all EM_filters 
FPs_EMdata_to_EMfilters = []
for fp_name in tqdm([name[:-3] for name in fp_spectra_df.columns if 'em' in name]):
    FPs_EMdata_to_EMfilters.append(fluoros_EM_filter_df(fp_name,fp_spectra_df, em_filter_list))
EM_passthrough_at_EMfilters_df = pd.concat(FPs_EMdata_to_EMfilters,axis=1).T
EM_passthrough_at_EMfilters_df = EM_passthrough_at_EMfilters_df.sort_values(by=EM_passthrough_at_EMfilters_df.columns.to_list(), ascending=False).T

  0%|          | 0/218 [00:00<?, ?it/s]

### 2. Dichroic lookup table (i.e. allowed LED-EM combos)

In [14]:
# create master dictionary of dichroically-allowed LED-to-EMfilter combos
allowed_LED_EMfilter_sets = []
for dichroic_name in dichroic_dic.keys():
    current_dichroic_data = dichroic_dic[dichroic_name]
    # identify which LEDs can bu used with current dichroic
    LED_per_dichroic = []
    for LED in LED_list:
        if current_dichroic_data.excitation[LED_active_lambda_df.loc[LED].lambda_range].max() > 0.7:
            LED_per_dichroic.append(LED)
    # identify which EM_filters can bu used with current dichroic
    EMfilter_per_dichroic = []
    for EM_filter in em_filter_list:
        start, end = EM_filter['center']-int(EM_filter['width']/2), EM_filter['center']+int(EM_filter['width']/2)
        current_EM_filter_range = list(range(start,end))
        if current_dichroic_data.emission[current_EM_filter_range].mean() > 0.7:
            EMfilter_per_dichroic.append(EM_filter['center'])
    # create factorial combinations of LEDs and EM_filters that work together
    allowed_LED_EMfilter_sets.extend(list(product(tuple(LED_per_dichroic), tuple(EMfilter_per_dichroic))))
allowed_LED_EMfilter_sets = sorted(set(allowed_LED_EMfilter_sets))
# allowed_LED_EMfilter_sets = set([combo for combo in allowed_LED_EMfilter_sets if combo[0] < combo[1]])
len(allowed_LED_EMfilter_sets)

34

In [15]:
allowed_LED_EMfilter_sets

[(365, 435),
 (365, 515),
 (365, 595),
 (365, 705),
 (440, 475),
 (440, 540),
 (440, 632),
 (440, 705),
 (488, 435),
 (488, 515),
 (488, 595),
 (488, 705),
 (514, 475),
 (514, 540),
 (514, 632),
 (514, 705),
 (561, 435),
 (561, 475),
 (561, 515),
 (561, 540),
 (561, 595),
 (561, 632),
 (561, 705),
 (594, 435),
 (594, 475),
 (594, 515),
 (594, 540),
 (594, 595),
 (594, 632),
 (594, 705),
 (640, 435),
 (640, 515),
 (640, 595),
 (640, 705)]

### 3. Categorisation Visualisation (-)

### 4. Reduce FP number by dichroic constraints

In [16]:
""" All best combinations are allowed ... unsurprisingly :D
"""
fps_LED_EMfilter_data_df = pd.concat([twobest_LEDs_df.drop(['idx']), twobest_EM_filters_df.drop(['idx'])],axis=0)

dichroich_check_best_LED_and_best_EM_filter = []
for fp_name in fps_LED_EMfilter_data_df.columns:
    check = tuple(fps_LED_EMfilter_data_df.loc[['best_LED','best_EM_filter']]['mKalama1'].astype(int).to_list()) in allowed_LED_EMfilter_sets
    dichroich_check_best_LED_and_best_EM_filter.append(check)
dichroich_check_best_LED_and_best_EM_filter_df = pd.DataFrame(dichroich_check_best_LED_and_best_EM_filter)
dichroich_check_best_LED_and_best_EM_filter_df.describe()

Unnamed: 0,0
count,218
unique,1
top,True
freq,218


### 5. Reduce FP number by mVenus & mCherry similars removal

In [17]:
def find_equivalent_fluorophores(f_name, f_EXsource_EMfilter_df, n_EXsource=0, n_EMfilter=0):
    """ Identifies all fluorophores with equivalent excitation and emission spectra.
    """
    if n_EXsource==0:
        fs_best_LED = fps_LED_EMfilter_data_df.loc[['best_LED','best_EM_filter']][f_name].best_LED
    if n_EMfilter==0:
        fs_best_EMfilter = fps_LED_EMfilter_data_df.loc[['best_LED','best_EM_filter']][f_name].best_EM_filter
    equivalent_fluorophores_df = f_EXsource_EMfilter_df.T.loc[(f_EXsource_EMfilter_df.T.best_LED == fs_best_LED) & 
                                                              (f_EXsource_EMfilter_df.T.best_EM_filter == fs_best_EMfilter) 
                                                             ]
    return equivalent_fluorophores_df

In [18]:
find_equivalent_fluorophores('mCherry', fps_LED_EMfilter_data_df).T.columns

Index(['FusionRed', 'mRFP1', 'mApple', 'mCherry', 'mKate', 'mKate2',
       'mNeptune', 'mPlum', 'mRFP1.2', 'mRaspberry', 'mScarlet', 'mScarlet-I',
       'mStable', 'mCherry2', 'E2-Crimson', 'mKate S158A', 'RDSmCherry1',
       'Maroon0.1', 'eqFP650', 'Katushka', 'mGrape3', 'Neptune', 'Katushka2S',
       'Jred', 'KillerRed', 'SuperNova Red', 'AdRed', 'mKelly1', 'mKelly2',
       'mGinger1', 'mGinger2', 'HcRed7', 'HcRed', 'pHuji', 'mKate S158C',
       'mNeptune2.5', 'AzaleaB5', 'plobRFP', 'SuperNova2 (SuperNova2)'],
      dtype='object')

In [19]:
def remove_equivalent_fps_from_optimal_setup_data(fps_to_remove_from_calculations_l, fps_LED_EMfilter_data_df_l):
    """ Takes list of already used FPs, finds their equivalents, and removes them from the spectral data & the 
    optimal LED & EM filter dataset.
    Out: reduced fps_LED_EMfilter_data_df (equivalents removed)
    """
    # Identify all FPs equivalent to the ones you are already using
    equ_fp_to_remove_list = []
    for fp_name in fps_to_remove_from_calculations_l:
        equivalent_fp_names = list(find_equivalent_fluorophores(fp_name, fps_LED_EMfilter_data_df_l).T.columns)
        equivalent_fp_names.remove(fp_name)
        equ_fp_to_remove_list.extend(equivalent_fp_names)
    # Remove the equivalent FPs from the full FP dataset
    equ_fp_list = []
    for equ_fp_name in equ_fp_to_remove_list:
        for fp_name in fp_spectra_df.columns:
            if equ_fp_name in fp_name:
                equ_fp_list.extend([fp_name])
        try:
            equ_fp_list.remove(equ_fp_name)
        except:
            pass
    # Remove the equivalent FPs from the optimal LED & EM filter-identified FP dataset
    return  fps_LED_EMfilter_data_df_l.drop(equ_fp_to_remove_list, axis=1) # fp_spectra_df_l.drop(equ_fp_list ,axis=1),

In [20]:
fp_spectra_df = pd.read_csv(f"spectra_data{os.path.sep}all_fp_spectra.csv",index_col=0)


### 6. Combination calculations and lazy (orthogonality) evaluation, threshold=7

In [21]:
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

def allDone():
    display(Audio(url='http://codeskulptor-demos.commondatastorage.googleapis.com/GalaxyInvaders/theme_01.mp3', autoplay=True))
    # https://simpleguics2pygame.readthedocs.io/en/latest/_static/links/snd_links.html

def assess_combo_orthogonality(combo_l,FPs_excitationdata_to_LEDs_df_l, EM_passthrough_at_EMfilters_df_l, fps_LED_EMfilter_data_df_l, threshold=10):
    """ Check orthogonality of fluorophore combination by
    1) assessing whether the best EXCITATION LED of each fluorophore significantly excites another fluorophore in the set
    2) assessing whether the best EMISSION filter of each fluorophore allows significant passthrough of another fluorophore in the set
    3) ONLY calls a set orthogonal if both 1) AND 2) is False! (either alone is fine)
    """
    curr_EX_data_df = FPs_excitationdata_to_LEDs_df_l[combo_l].T
    curr_EM_data_df = EM_passthrough_at_EMfilters_df_l[combo_l].T
    combo_summary = []
    for fp in combo_l:
        current_LED, current_EM = fps_LED_EMfilter_data_df_l[combo_l].loc[['best_LED','best_EM_filter']][fp].to_list()
        #print(fp, current_LED, current_EM)
        fp_EX_EM_comparison_df = pd.concat([curr_EX_data_df[current_LED],curr_EM_data_df[current_EM]],axis=1).drop([fp])
        #display(np.array(fp_EX_EM_comparison_df > 10))
        fP_summary = []
        for row in np.array(fp_EX_EM_comparison_df > threshold):
            fP_summary.append(all(row))
        combo_summary.extend(fP_summary)
    return any(combo_summary) == False

def assess_combo_orthogonality2(combo_l,FPs_excitationdata_to_LEDs_df_l, EM_passthrough_at_EMfilters_df_l, fps_LED_EMfilter_data_df_l, threshold=10):
    """ Check orthogonality of fluorophore combination by
    1) assessing whether the best EXCITATION LED of each fluorophore significantly excites another fluorophore in the set
    2) assessing whether the best EMISSION filter of each fluorophore allows significant passthrough of another fluorophore in the set
    3) ONLY calls a set orthogonal if both 1) AND 2) is False! (either alone is fine)
    """
    curr_EX_data_df = FPs_excitationdata_to_LEDs_df_l[combo_l].T
    curr_EM_data_df = EM_passthrough_at_EMfilters_df_l[combo_l].T
    combo_summary = []
    for fp in combo_l:
        current_LED, current_EM = fps_LED_EMfilter_data_df_l[combo_l].loc[['best_LED','best_EM_filter']][fp].to_list()
        #print(fp, current_LED, current_EM)
        fp_EX_EM_comparison_df = pd.concat([curr_EX_data_df[current_LED],curr_EM_data_df[current_EM]],axis=1).drop([fp])
        #display(np.array(fp_EX_EM_comparison_df > 10))
        fP_summary = []
        for row in np.array(fp_EX_EM_comparison_df > threshold):
            fP_summary.append(all(row))
        combo_summary.extend(fP_summary)
    if any(combo_summary) == False:
        return combo_l

----
#### Choose 3

In [22]:
%%time
set_size = 3
first_selected_choose_3_combos = []
mandatory_fps = ['mVenus','mCherry']
combination_no_left_to_calculate = set_size - len(mandatory_fps)
reduced_fps_LED_EMfilter_data_df = remove_equivalent_fps_from_optimal_setup_data(mandatory_fps, fps_LED_EMfilter_data_df)
print(f"Need to calculate combinations of size {combination_no_left_to_calculate} -> {math.comb(len(reduced_fps_LED_EMfilter_data_df.columns)- len(mandatory_fps), combination_no_left_to_calculate)}:")
names_for_combinations = [name for name in reduced_fps_LED_EMfilter_data_df.columns if name not in mandatory_fps]
no_of_combinations = math.comb(len(reduced_fps_LED_EMfilter_data_df.columns), set_size)
threshold_chosen=7
counter = 0

first_selected_choose_3_combos = Parallel(n_jobs=6)(delayed(assess_combo_orthogonality2)(list(fp_comb_list)+mandatory_fps,
                                      FPs_excitationdata_to_LEDs_df, 
                                      EM_passthrough_at_EMfilters_df, 
                                      reduced_fps_LED_EMfilter_data_df,
                                      threshold=threshold_chosen) 
                                               for fp_comb_list in tqdm(list(combinations(names_for_combinations, combination_no_left_to_calculate)))
                                     )
selected_first_selected_choose_3_combos = [x for x in first_selected_choose_3_combos if x != None]
print(f"\nthreshold_chosen = {threshold_chosen}")
print(f"Fluorophore#: {len(reduced_fps_LED_EMfilter_data_df.columns)}\nCombinations#: {no_of_combinations}")
print(f"\nOrthog first set length: {len(selected_first_selected_choose_3_combos)}\n")
# f = open('allinone_168_choose_5.p','wb')
# pickle.dump(first_selected_choose_5_combos,f)

Need to calculate combinations of size 1 -> 166:


  0%|          | 0/166 [00:00<?, ?it/s]


threshold_chosen = 7
Fluorophore#: 168
Combinations#: 776216

Orthog first set length: 66

CPU times: user 293 ms, sys: 69 ms, total: 362 ms
Wall time: 1.07 s


In [23]:
%%time
set_size = 3
first_selected_choose_3_combos = []
mandatory_fps = ['mVenus','mCherry']
combination_no_left_to_calculate = set_size - len(mandatory_fps)
reduced_fps_LED_EMfilter_data_df = remove_equivalent_fps_from_optimal_setup_data(mandatory_fps, fps_LED_EMfilter_data_df)
names_for_combinations = [name for name in reduced_fps_LED_EMfilter_data_df.columns if name not in mandatory_fps]
no_of_combinations = math.comb(len(fps_LED_EMfilter_data_df.columns), set_size)
threshold_chosen=7
counter = 0

for fp_comb_list in combinations(names_for_combinations, combination_no_left_to_calculate):
    fp_comb_list = list(fp_comb_list)+mandatory_fps
    if assess_combo_orthogonality(fp_comb_list,
                                  FPs_excitationdata_to_LEDs_df, 
                                  EM_passthrough_at_EMfilters_df, 
                                  reduced_fps_LED_EMfilter_data_df,
                                  threshold=threshold_chosen):
        first_selected_choose_3_combos.append(fp_comb_list)
        # if counter%10_000 ==0:
        #     clear_output()
        #     print(counter)
    counter += 1
    
#print(counter)
print(f"\nthreshold_chosen = {threshold_chosen}")
print(f"Fluorophore#: {len(reduced_fps_LED_EMfilter_data_df.columns)}\nCombinations#: {no_of_combinations}")
print(f"\nOrthog first set length: {len(first_selected_choose_3_combos)}\n")
#f = open('allinone_168_choose_3.p','wb')
#pickle.dump(first_selected_choose_3_combos,f)


threshold_chosen = 7
Fluorophore#: 168
Combinations#: 1703016

Orthog first set length: 66

CPU times: user 415 ms, sys: 19.2 ms, total: 434 ms
Wall time: 425 ms


In [28]:
# Choose 3
with open('allinone_168_choose_3.p', 'rb') as file:
    # Call load method to deserialze
    first_selected_choose_3_combos = pickle.load(file)

In [29]:
orthogonal_choose3_combos_threshold_dict = {}
for threshold_ in tqdm(range(0,2)):
    orthogonal_choose3_combos = []
    # 
    for combo in tqdm(first_selected_choose_3_combos):#brightness_filtered_combos_of_4_list_LED):
        if assess_combo_orthogonality(list(combo),
                                      FPs_excitationdata_to_LEDs_df, 
                                      EM_passthrough_at_EMfilters_df, 
                                      fps_LED_EMfilter_data_df,
                                      threshold=threshold_):
            orthogonal_choose3_combos.append(combo)
            
    orthogonal_choose3_combos_threshold_dict[threshold_] = orthogonal_choose3_combos
    print(f"Threshold = {threshold_} : \n   {len(first_selected_choose_3_combos)} -> {len(orthogonal_choose3_combos_threshold_dict[threshold_])}")

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

Threshold = 0 : 
   99 -> 0


  0%|          | 0/99 [00:00<?, ?it/s]

Threshold = 1 : 
   99 -> 52


----
#### Choose 4

In [30]:
# Choose 4
with open('allinone_168_choose_4.p', 'rb') as file:
    # Call load method to deserialze
    first_selected_choose_4_combos = pickle.load(file)

In [75]:
# Threshold scan 4
orthogonal_choose4_combos_threshold_dict = {}
for threshold_ in tqdm(range(0,5)):
    orthogonal_choose5_combos = []
    results = Parallel(n_jobs=6)(delayed(assess_combo_orthogonality2)(list(combo),
                                                                      FPs_excitationdata_to_LEDs_df, 
                                                                      EM_passthrough_at_EMfilters_df, 
                                                                      reduced_fps_LED_EMfilter_data_df,
                                                                      threshold=threshold_) 
                                 for combo in tqdm(first_selected_choose_4_combos)
                                )
    orthogonal_choose4_combos_threshold_dict[threshold_] = [x for x in results if x != None]
    print(f"Threshold = {threshold_} : \n   {len(first_selected_choose_4_combos)} -> {len(orthogonal_choose4_combos_threshold_dict[threshold_])}")

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3048 [00:00<?, ?it/s]

Threshold = 0 : 
   3048 -> 0


  0%|          | 0/3048 [00:00<?, ?it/s]

Threshold = 1 : 
   3048 -> 575


  0%|          | 0/3048 [00:00<?, ?it/s]

Threshold = 2 : 
   3048 -> 813


  0%|          | 0/3048 [00:00<?, ?it/s]

Threshold = 3 : 
   3048 -> 1065


  0%|          | 0/3048 [00:00<?, ?it/s]

Threshold = 4 : 
   3048 -> 1091


In [32]:
[x for x in orthogonal_choose4_combos_threshold_dict[1] if all(['mCerulean' in x, ]) ]

[['EBFP', 'mCherry', 'mVenus', 'mCerulean'],
 ['EBFP2', 'mCherry', 'mVenus', 'mCerulean'],
 ['iFP2.0', 'mCherry', 'mVenus', 'mCerulean'],
 ['iRFP702', 'mCherry', 'mVenus', 'mCerulean'],
 ['iRFP713', 'mCherry', 'mVenus', 'mCerulean'],
 ['iRFP720', 'mCherry', 'mVenus', 'mCerulean'],
 ['mCherry', 'mKalama1', 'mVenus', 'mCerulean'],
 ['mCherry', 'Sirius', 'mVenus', 'mCerulean'],
 ['mCherry', 'P4-3E', 'mVenus', 'mCerulean'],
 ['mCherry', 'mVenus', 'miRFP720', 'mCerulean'],
 ['mCherry', 'mVenus', 'SNIFP', 'mCerulean'],
 ['mCherry', 'mVenus', 'oxBFP', 'mCerulean'],
 ['mCherry', 'mVenus', 'mCerulean', 'P4'],
 ['mCherry', 'mVenus', 'mCerulean', 'mIFP']]

----
#### Choose 5

In [33]:
# Choose 5
with open('allinone_168_choose_5.p', 'rb') as file:
    # Call load method to deserialze
    first_selected_choose_5_combos = pickle.load(file)
print(f"first_selected_choose_5_combos: {len(first_selected_choose_5_combos)}")

first_selected_choose_5_combos: 11482


In [34]:
# Threshold scan 5
orthogonal_choose5_combos_threshold_dict = {}
for threshold_ in tqdm(range(0,2)):
    orthogonal_choose5_combos = []
    results = Parallel(n_jobs=6)(delayed(assess_combo_orthogonality2)(list(combo),
                                                                      FPs_excitationdata_to_LEDs_df, 
                                                                      EM_passthrough_at_EMfilters_df, 
                                                                      reduced_fps_LED_EMfilter_data_df,
                                                                      threshold=threshold_) 
                                 for combo in tqdm(first_selected_choose_5_combos)
                                )
    orthogonal_choose5_combos_threshold_dict[threshold_] = [x for x in results if x != None]
    print(f"Threshold = {threshold_} : \n   {len(first_selected_choose_5_combos)} -> {len(orthogonal_choose5_combos_threshold_dict[threshold_])}")

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11482 [00:00<?, ?it/s]

Threshold = 0 : 
   11482 -> 0


  0%|          | 0/11482 [00:00<?, ?it/s]

Threshold = 1 : 
   11482 -> 2072


----
#### Choose 6

In [35]:
# Choose 6
with open('allinone_168_choose_6.p', 'rb') as file:
    # Call load method to deserialze
    first_selected_choose_6_combos = pickle.load(file)
print(f"first_selected_choose_6_combos: {len(first_selected_choose_6_combos)}")

first_selected_choose_6_combos: 34333


In [36]:
# Threshold scan
orthogonal_choose6_combos_threshold_dict = {}
for threshold_ in tqdm(range(0,2)):
    orthogonal_choose6_combos = []
    results = Parallel(n_jobs=6)(delayed(assess_combo_orthogonality2)(list(combo),
                                                                      FPs_excitationdata_to_LEDs_df, 
                                                                      EM_passthrough_at_EMfilters_df, 
                                                                      reduced_fps_LED_EMfilter_data_df,
                                                                      threshold=threshold_) 
                                 for combo in tqdm(first_selected_choose_6_combos)
                                )
    orthogonal_choose6_combos_threshold_dict[threshold_] = [x for x in results if x != None]
    print(f"Threshold = {threshold_} : \n   {len(first_selected_choose_6_combos)} -> {len(orthogonal_choose6_combos_threshold_dict[threshold_])}")

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/34333 [00:00<?, ?it/s]

Threshold = 0 : 
   34333 -> 0


  0%|          | 0/34333 [00:00<?, ?it/s]

Threshold = 1 : 
   34333 -> 1764


----
### 7. Filtering using experiment-specific heuristics

#### Opt. 1: Normalised brightness 

In [37]:
def EX_efficiency_score(fp_name_l, LED_name_l, FPs_excitationdata_to_LEDs_df_l):
    ex_test_df = FPs_excitationdata_to_LEDs_df_l.T.sort_values(by=LED_name_l, ascending=False)
    return list(ex_test_df.index).index(fp_name_l)

def EM_passthrough_efficiency_score(fp_name_l, EMfilter_name_l, EM_passthrough_at_EMfilters_df_l):
    em_test_df = EM_passthrough_at_EMfilters_df_l.T.sort_values(by=EMfilter_name_l, ascending=False)
    return list(em_test_df.index).index(fp_name_l)

##### Choose 3

In [38]:
minimal_threshold_choose3 = 1
excitation_scores_choose3 = []
emission_scores_choose3 = []
# Iterate through every orthogonal combination & calculate exciation and emission scores for every combination
for combo in tqdm(orthogonal_choose3_combos_threshold_dict[minimal_threshold_choose3]):
    current_combo_excitation_scores = []
    current_combo_emission_scores= []
    for fp in combo:
        current_LED, current_EM = fps_LED_EMfilter_data_df.loc[['best_LED','best_EM_filter']][fp].to_list()
        current_combo_excitation_scores.append(EX_efficiency_score(fp, current_LED, FPs_excitationdata_to_LEDs_df))
        current_combo_emission_scores.append(EM_passthrough_efficiency_score(fp, current_EM, EM_passthrough_at_EMfilters_df))
    excitation_scores_choose3.append(sum(current_combo_excitation_scores))
    emission_scores_choose3.append(sum(current_combo_emission_scores))
choose3_orthogonal_fp_set = pd.concat([pd.DataFrame({"fp_combo_choose3" : orthogonal_choose3_combos_threshold_dict[minimal_threshold_choose3]}),#columns=['fp_combo_choose3']),#[f"fp_{idx}" for idx in range(len(orthogonal_choose3_combos_threshold_dict[minimal_threshold_choose3][0]))]), 
                                       pd.DataFrame({"exc_score": excitation_scores_choose3, "em_score" : emission_scores_choose3})], 
                                      axis=1)

  0%|          | 0/52 [00:00<?, ?it/s]

In [39]:
choose3_orthogonal_fp_set.loc[choose3_orthogonal_fp_set.exc_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose3,exc_score,em_score
43,"(mCherry, mVenus, AausGFP)",71,76
20,"(mCherry, Sirius, mVenus)",70,54
15,"(mCherry, mTFP1, mVenus)",68,115
38,"(mCherry, mVenus, miniSOG2)",66,58
29,"(mCherry, mVenus, SNIFP)",65,73


In [40]:
choose3_orthogonal_fp_set.loc[choose3_orthogonal_fp_set.em_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose3,exc_score,em_score
12,"(mAmetrine, mCherry, mVenus)",47,130
17,"(mCherry, amCyan1, mVenus)",58,129
7,"(TagCFP, mCherry, mVenus)",59,121
15,"(mCherry, mTFP1, mVenus)",68,115
49,"(mCherry, mVenus, efasCFP)",65,112


In [41]:
overal_best_choose3 = []
counter = 1
while len(overal_best_choose3) == 0:
    results = choose3_orthogonal_fp_set.loc[(choose3_orthogonal_fp_set.exc_score > choose3_orthogonal_fp_set.exc_score.nlargest(counter).iloc[-1]) &
                                  (choose3_orthogonal_fp_set.em_score > choose3_orthogonal_fp_set.em_score.nlargest(counter).iloc[-1])
                                 ]
    if len(results) != 0:
        overal_best_choose3.append(results)
    counter += 1
print(counter-1)
overal_best_choose3[0]

5


Unnamed: 0,fp_combo_choose3,exc_score,em_score
15,"(mCherry, mTFP1, mVenus)",68,115


##### Choose 4

In [42]:
minimal_threshold_choose4 = 1
excitation_scores_choose4 = []
emission_scores_choose4 = []
# Iterate through every orthogonal combination & calculate exciation and emission scores for every combination
for combo in tqdm(orthogonal_choose4_combos_threshold_dict[minimal_threshold_choose4]):
    current_combo_excitation_scores = []
    current_combo_emission_scores= []
    for fp in combo:
        current_LED, current_EM = fps_LED_EMfilter_data_df.loc[['best_LED','best_EM_filter']][fp].to_list()
        current_combo_excitation_scores.append(EX_efficiency_score(fp, current_LED, FPs_excitationdata_to_LEDs_df))
        current_combo_emission_scores.append(EM_passthrough_efficiency_score(fp, current_EM, EM_passthrough_at_EMfilters_df))
    excitation_scores_choose4.append(sum(current_combo_excitation_scores))
    emission_scores_choose4.append(sum(current_combo_emission_scores))
choose4_orthogonal_fp_set = pd.concat([pd.DataFrame({"fp_combo_choose4" : orthogonal_choose4_combos_threshold_dict[minimal_threshold_choose4]}),#columns=['fp_combo_choose3']),#[f"fp_{idx}" for idx in range(len(orthogonal_choose3_combos_threshold_dict[minimal_threshold_choose3][0]))]), 
                                       pd.DataFrame({"exc_score": excitation_scores_choose4, "em_score" : emission_scores_choose4})], 
                                      axis=1)

  0%|          | 0/575 [00:00<?, ?it/s]

In [43]:
choose4_orthogonal_fp_set.loc[choose4_orthogonal_fp_set.exc_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose4,exc_score,em_score
345,"[mCherry, mTFP1, Sirius, mVenus]",104,115
398,"[mCherry, Sirius, mVenus, miniSOG2]",102,58
494,"[mCherry, mVenus, SNIFP, AausGFP]",102,95
392,"[mCherry, Sirius, mVenus, SNIFP]",101,73
406,"[mCherry, Sirius, mVenus, efasCFP]",101,112


In [44]:
choose4_orthogonal_fp_set.loc[choose4_orthogonal_fp_set.em_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose4,exc_score,em_score
361,"[mCherry, amCyan1, LSSmOrange, mVenus]",68,172
254,"[iRFP720, mAmetrine, mCherry, mVenus]",67,161
259,"[iRFP720, mCherry, amCyan1, mVenus]",78,160
344,"[mCherry, mTFP1, LSSmOrange, mVenus]",78,158
291,"[mAmetrine, mCherry, mVenus, miRFP720]",66,157


In [45]:
overal_best_choose4 = []
counter = 1
while len(overal_best_choose4) == 0:
    results = choose4_orthogonal_fp_set.loc[(choose4_orthogonal_fp_set.exc_score > choose4_orthogonal_fp_set.exc_score.nlargest(counter).iloc[-1]) &
                                  (choose4_orthogonal_fp_set.em_score > choose4_orthogonal_fp_set.em_score.nlargest(counter).iloc[-1])
                                 ]
    if len(results) != 0:
        overal_best_choose4.append(results)
    counter += 1
print(counter-1)
overal_best_choose4[0]

22


Unnamed: 0,fp_combo_choose4,exc_score,em_score
137,"[TagCFP, mCherry, mVenus, SNIFP]",90,140


##### Choose 5

In [46]:
minimal_threshold_choose5 = 1
excitation_scores_choose5 = []
emission_scores_choose5 = []
# Iterate through every orthogonal combination & calculate exciation and emission scores for every combination
for combo in tqdm(orthogonal_choose5_combos_threshold_dict[minimal_threshold_choose5]):
    current_combo_excitation_scores = []
    current_combo_emission_scores= []
    for fp in combo:
        current_LED, current_EM = fps_LED_EMfilter_data_df.loc[['best_LED','best_EM_filter']][fp].to_list()
        current_combo_excitation_scores.append(EX_efficiency_score(fp, current_LED, FPs_excitationdata_to_LEDs_df))
        current_combo_emission_scores.append(EM_passthrough_efficiency_score(fp, current_EM, EM_passthrough_at_EMfilters_df))
    excitation_scores_choose5.append(sum(current_combo_excitation_scores))
    emission_scores_choose5.append(sum(current_combo_emission_scores))
choose5_orthogonal_fp_set = pd.concat([pd.DataFrame({"fp_combo_choose5" : orthogonal_choose5_combos_threshold_dict[minimal_threshold_choose5]}),#columns=['fp_combo_choose3']),#[f"fp_{idx}" for idx in range(len(orthogonal_choose3_combos_threshold_dict[minimal_threshold_choose3][0]))]), 
                                       pd.DataFrame({"exc_score": excitation_scores_choose5, "em_score" : emission_scores_choose5})], 
                                      axis=1)

  0%|          | 0/2072 [00:00<?, ?it/s]

In [47]:
choose5_orthogonal_fp_set.loc[choose5_orthogonal_fp_set.exc_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose5,exc_score,em_score
1613,"[mTFP1, Sirius, SNIFP, mVenus, mCherry]",135,134
1745,"[Sirius, SNIFP, miniSOG2, mVenus, mCherry]",133,77
1752,"[Sirius, SNIFP, efasCFP, mVenus, mCherry]",132,131
1750,"[Sirius, SNIFP, dTFP0.2, mVenus, mCherry]",130,118
1747,"[Sirius, SNIFP, CFP, mVenus, mCherry]",127,75


In [48]:
choose5_orthogonal_fp_set.loc[choose5_orthogonal_fp_set.em_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose5,exc_score,em_score
1290,"[iRFP720, amCyan1, LSSmOrange, mVenus, mCherry]",88,203
1652,"[amCyan1, LSSmOrange, miRFP720, mVenus, mCherry]",87,199
1117,"[iRFP713, amCyan1, LSSmOrange, mVenus, mCherry]",82,196
1653,"[amCyan1, LSSmOrange, SNIFP, mVenus, mCherry]",99,191
1279,"[iRFP720, mTFP1, LSSmOrange, mVenus, mCherry]",98,189


In [49]:
overal_best_choose5 = []
counter = 1
while len(overal_best_choose5) == 0:
    results = choose5_orthogonal_fp_set.loc[(choose5_orthogonal_fp_set.exc_score > choose5_orthogonal_fp_set.exc_score.nlargest(counter).iloc[-1]) &
                                  (choose5_orthogonal_fp_set.em_score > choose5_orthogonal_fp_set.em_score.nlargest(counter).iloc[-1])
                                 ]
    if len(results) != 0:
        overal_best_choose5.append(results)
    counter += 1
print(counter-1)
overal_best_choose5[0]

46


Unnamed: 0,fp_combo_choose5,exc_score,em_score
1291,"[iRFP720, amCyan1, Sirius, mVenus, mCherry]",114,160


##### Choose 6

In [50]:
minimal_threshold_choose6 = 1
excitation_scores_choose6 = []
emission_scores_choose6 = []
# Iterate through every orthogonal combination & calculate exciation and emission scores for every combination
for combo in tqdm(orthogonal_choose6_combos_threshold_dict[minimal_threshold_choose6]):
    current_combo_excitation_scores = []
    current_combo_emission_scores= []
    for fp in combo:
        current_LED, current_EM = fps_LED_EMfilter_data_df.loc[['best_LED','best_EM_filter']][fp].to_list()
        current_combo_excitation_scores.append(EX_efficiency_score(fp, current_LED, FPs_excitationdata_to_LEDs_df))
        current_combo_emission_scores.append(EM_passthrough_efficiency_score(fp, current_EM, EM_passthrough_at_EMfilters_df))
    excitation_scores_choose6.append(sum(current_combo_excitation_scores))
    emission_scores_choose6.append(sum(current_combo_emission_scores))
choose6_orthogonal_fp_set = pd.concat([pd.DataFrame({"fp_combo_choose6" : orthogonal_choose6_combos_threshold_dict[minimal_threshold_choose6]}),#columns=['fp_combo_choose3']),#[f"fp_{idx}" for idx in range(len(orthogonal_choose3_combos_threshold_dict[minimal_threshold_choose3][0]))]), 
                                       pd.DataFrame({"exc_score": excitation_scores_choose6, "em_score" : emission_scores_choose6})], 
                                      axis=1)

  0%|          | 0/1764 [00:00<?, ?it/s]

In [51]:
choose6_orthogonal_fp_set.loc[choose6_orthogonal_fp_set.exc_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose6,exc_score,em_score
1525,"[mCherry, mTFP1, LSSmOrange, Sirius, mVenus, S...",145,177
1417,"[mCherry, mKeima, mTFP1, Sirius, mVenus, SNIFP]",139,136
1538,"[mCherry, mTFP1, Sirius, mVenus, SNIFP, tKeima]",137,142
1452,"[mCherry, mKeima, Sirius, mVenus, SNIFP, efasCFP]",136,133
1539,"[mCherry, mTFP1, Sirius, mVenus, SNIFP, dKeima]",136,143


In [52]:
choose6_orthogonal_fp_set.loc[choose6_orthogonal_fp_set.em_score.nlargest(5).index]

Unnamed: 0,fp_combo_choose6,exc_score,em_score
1196,"[iRFP720, mCherry, mKalama1, amCyan1, LSSmOran...",88,209
1263,"[iRFP720, mCherry, amCyan1, LSSmOrange, mVenus...",94,208
540,"[EBFP2, iRFP720, mCherry, amCyan1, LSSmOrange,...",93,207
1264,"[iRFP720, mCherry, amCyan1, LSSmOrange, mVenus...",96,206
1262,"[iRFP720, mCherry, amCyan1, LSSmOrange, P4-3E,...",102,205


In [53]:
overal_best_choose6 = []
counter = 1
while len(overal_best_choose6) == 0:
    results = choose6_orthogonal_fp_set.loc[(choose6_orthogonal_fp_set.exc_score > choose6_orthogonal_fp_set.exc_score.nlargest(counter).iloc[-1]) &
                                  (choose6_orthogonal_fp_set.em_score > choose6_orthogonal_fp_set.em_score.nlargest(counter).iloc[-1])
                                 ]
    if len(results) != 0:
        overal_best_choose6.append(results)
    counter += 1
print(counter-1)
overal_best_choose6[0]

36


Unnamed: 0,fp_combo_choose6,exc_score,em_score
1561,"[mCherry, amCyan1, LSSmOrange, Sirius, mVenus,...",135,191


In [54]:
overal_best_choose6[0].iloc[0,0]

['mCherry', 'amCyan1', 'LSSmOrange', 'Sirius', 'mVenus', 'SNIFP']

---
#### Opt. 2: Real, host brightness

Specialised function

In [None]:
def random_combination(iterable, r):
    "Random selection from itertools.combinations(iterable, r)"
    pool = tuple(iterable)
    n = len(pool)
    indices = sorted(random.sample(range(n), r))
    return tuple(pool[i] for i in indices)

def create_and_assess_random_combo(set_size_l,FPs_excitationdata_to_LEDs_df_l, EM_passthrough_at_EMfilters_df_l, fps_LED_EMfilter_data_df_l, mandatory_fps = [], threshold=10):
    """ Check orthogonality of fluorophore combination by
    1) assessing whether the best EXCITATION LED of each fluorophore significantly excites another fluorophore in the set
    2) assessing whether the best EMISSION filter of each fluorophore allows significant passthrough of another fluorophore in the set
    3) ONLY calls a set orthogonal if both 1) AND 2) is False! (either alone is fine)
    """
    combination_no_left_to_calculate = set_size_l - len(mandatory_fps)
    names_for_combinations = [name for name in fps_LED_EMfilter_data_df_l.columns if name not in mandatory_fps]
    while True:
        new_fp_combo = list(random_combination(names_for_combinations, combination_no_left_to_calculate))+mandatory_fps 
        # Do not waste time calculating useless combinations!
        if assess_combo_orthogonality(list(new_fp_combo),
                                      FPs_excitationdata_to_LEDs_df_l, 
                                      EM_passthrough_at_EMfilters_df_l, 
                                      fps_LED_EMfilter_data_df_l,
                                      threshold=threshold):
            return new_fp_combo

In [58]:
%%time
set_size = 7
CPU_cores = 1
results_per_CPU = 1

first_selected_choose_7_combos = []
mandatory_fps_list = [ ] # 'mVenus'
reduced_fps_LED_EMfilter_data_df = remove_equivalent_fps_from_optimal_setup_data(mandatory_fps_list, fps_LED_EMfilter_data_df)
fp_size = len(reduced_fps_LED_EMfilter_data_df.columns)
no_of_combinations = math.comb(len(reduced_fps_LED_EMfilter_data_df.columns)-len(mandatory_fps_list), set_size)
threshold_chosen = 5
result_size = CPU_cores*results_per_CPU
print(f"FPs: {fp_size} -> find {result_size} orthogonal sets of size {set_size} / {no_of_combinations}\n")
counter = 0

first_selected_choose_7_combos = create_and_assess_random_combo(set_size,FPs_excitationdata_to_LEDs_df,
                                                                EM_passthrough_at_EMfilters_df,
                                                                reduced_fps_LED_EMfilter_data_df,
                                                                mandatory_fps = mandatory_fps_list,
                                                                threshold = threshold_chosen) 

    
print(counter)
print(len(first_selected_choose_7_combos))
first_selected_choose_7_combos

FPs: 218 -> find 1 orthogonal sets of size 7 / 4212182115144



KeyboardInterrupt: 

##### *E.coli*

In [126]:
fp_spectra_df = pd.read_csv(f"spectra_data{os.path.sep}all_fp_spectra.csv",index_col=0)
Balleza_2017_data_df = pd.read_csv(f"spectra_data{os.path.sep}Balleza_2017_data.csv", index_col=0)
# Create sets for overlap identification
Balleza_fps_set = set(Balleza_2017_data_df.index)
FPbase_fps_set = set(list(fps_LED_EMfilter_data_df.columns)+['mVenus', 'mCherry'])
# Trim Ballexa data for usability
Balleza_2017_data_df = Balleza_2017_data_df.T[[x for x in Balleza_fps_set if '2nd' not in x]]
Balleza_2017_data_df.rename(columns={"mVenusNB": "mVenus", "mCherry-L": "mCherry"}, inplace=True)
Balleza_2017_data_df = Balleza_2017_data_df.T
Balleza_fps_set = set(Balleza_2017_data_df.index)
# Identify Intersection between published/Balleza data and FPbase database
allowed_fps = Balleza_fps_set.intersection(FPbase_fps_set)
print(f"FPs in Balleza study for which FPbase offers spectral data:\n{allowed_fps}")

FPs in Balleza study for which FPbase offers spectral data:
{'TagRFP-T', 'mScarlet-I', 'mScarlet', 'mCerulean3', 'Clover', 'mRuby3', 'Sapphire', 'mEmerald', 'mEGFP', 'mNeptune2.5', 'mRFP1', 'mTurquoise', 'T-Sapphire', 'moxGFP', 'TagRFP', 'mCherry', 'mClover3', 'mVenus', 'moxCerulean3', 'mTurquoise2', 'mCerulean', 'mKate2', 'Katushka', 'TurboRFP', 'mNeonGreen'}


In [127]:
# Choose 3
choose3_dict = {}
counter=0
print(f"Choose 3 Results:")
for idx, fp_combo in enumerate(orthogonal_choose3_combos_threshold_dict[1]):
    if all([x in allowed_fps for x in fp_combo]):
        display(fp_combo)
        choose3_dict[counter] = {'combo': fp_combo,
                                 'brightness': Balleza_2017_data_df['Brightness(t_chlor)'].loc[list(fp_combo)].sum(),
                                 'maturation_t50' :  Balleza_2017_data_df['t50'].loc[list(fp_combo)].sum(),
                                 'maturation_t90' :  Balleza_2017_data_df['t90'].loc[list(fp_combo)].sum()
                                }
        counter+=1
        
# Choose 4
print(f"\nChoose 4 Results:")
for fp_combo in orthogonal_choose4_combos_threshold_dict[1]:
    if all([x in allowed_fps for x in fp_combo]):
        print(fp_combo)

# Choose 5
print(f"\nChoose 5 Results:")
for fp_combo in orthogonal_choose5_combos_threshold_dict[1]:
    if all([x in allowed_fps for x in fp_combo]):
        print(fp_combo)

# Choose 6
print(f"\nChoose 6 Results:")
for fp_combo in orthogonal_choose6_combos_threshold_dict[1]:
    if all([x in allowed_fps for x in fp_combo]):
        print(fp_combo)

Choose 3 Results:


('Sapphire', 'mCherry', 'mVenus')

('T-Sapphire', 'mCherry', 'mVenus')

('mCherry', 'mVenus', 'mTurquoise')

('mCherry', 'mVenus', 'mTurquoise2')

('mCherry', 'mVenus', 'moxCerulean3')

('mCherry', 'mVenus', 'mCerulean')

('mCherry', 'mVenus', 'mCerulean3')


Choose 4 Results:

Choose 5 Results:

Choose 6 Results:


In [129]:
#Balleza_2017_data_df['Brightness(t_chlor)'].loc[list(fp_combo)].sum()
Balleza_2017_data_df
pd.DataFrame(choose3_dict).T.sort_values(by='brightness',ascending=False)

Unnamed: 0,combo,brightness,maturation_t50,maturation_t90
3,"(mCherry, mVenus, mTurquoise2)",267638.130525,74.599653,196.977645
5,"(mCherry, mVenus, mCerulean)",248681.700211,47.716347,125.007976
6,"(mCherry, mVenus, mCerulean3)",185938.543896,110.851257,280.268028
2,"(mCherry, mVenus, mTurquoise)",147891.18426,153.339594,423.431902
4,"(mCherry, mVenus, moxCerulean3)",111404.974692,141.537054,339.474159
0,"(Sapphire, mCherry, mVenus)",76039.766738,79.482778,204.685315
1,"(T-Sapphire, mCherry, mVenus)",72669.716479,197.556847,583.340512


---
#### Opt. 3: Maturation times

In [130]:
pd.DataFrame(choose3_dict).T.sort_values(by='maturation_t50')

Unnamed: 0,combo,brightness,maturation_t50,maturation_t90
5,"(mCherry, mVenus, mCerulean)",248681.700211,47.716347,125.007976
3,"(mCherry, mVenus, mTurquoise2)",267638.130525,74.599653,196.977645
0,"(Sapphire, mCherry, mVenus)",76039.766738,79.482778,204.685315
6,"(mCherry, mVenus, mCerulean3)",185938.543896,110.851257,280.268028
4,"(mCherry, mVenus, moxCerulean3)",111404.974692,141.537054,339.474159
2,"(mCherry, mVenus, mTurquoise)",147891.18426,153.339594,423.431902
1,"(T-Sapphire, mCherry, mVenus)",72669.716479,197.556847,583.340512


In [131]:
pd.DataFrame(choose3_dict).T.sort_values(by='maturation_t90')

Unnamed: 0,combo,brightness,maturation_t50,maturation_t90
5,"(mCherry, mVenus, mCerulean)",248681.700211,47.716347,125.007976
3,"(mCherry, mVenus, mTurquoise2)",267638.130525,74.599653,196.977645
0,"(Sapphire, mCherry, mVenus)",76039.766738,79.482778,204.685315
6,"(mCherry, mVenus, mCerulean3)",185938.543896,110.851257,280.268028
4,"(mCherry, mVenus, moxCerulean3)",111404.974692,141.537054,339.474159
2,"(mCherry, mVenus, mTurquoise)",147891.18426,153.339594,423.431902
1,"(T-Sapphire, mCherry, mVenus)",72669.716479,197.556847,583.340512


## Random search for optimal sets

WARNING: can take multiple days to copmute

In [None]:
%%time
set_size = 8
CPU_cores = 1
results_per_CPU = 1

first_selected_choose_7_combos = []
mandatory_fps_list = ['LSSmOrange' ] # 'mVenus'
reduced_fps_LED_EMfilter_data_df = remove_equivalent_fps_from_optimal_setup_data(mandatory_fps_list, fps_LED_EMfilter_data_df)
fp_size = len(reduced_fps_LED_EMfilter_data_df.columns)
no_of_combinations = math.comb(len(reduced_fps_LED_EMfilter_data_df.columns)-len(mandatory_fps_list), set_size)
threshold_chosen = 4
result_size = CPU_cores*results_per_CPU
print(f"FPs: {fp_size} -> find {result_size} orthogonal sets of size {set_size} / {no_of_combinations}\n")
counter = 0

first_selected_choose_7_combos = create_and_assess_random_combo(set_size,FPs_excitationdata_to_LEDs_df,
                                                                EM_passthrough_at_EMfilters_df,
                                                                reduced_fps_LED_EMfilter_data_df,
                                                                mandatory_fps = mandatory_fps_list,
                                                                threshold = threshold_chosen) 

    
print(counter)
print(len(first_selected_choose_7_combos))
first_selected_choose_7_combos

FPs: 217 -> find 1 orthogonal sets of size 8 / 103073959989495



In [None]:
""" Sets of 8
"""
# threshold = 8 =>  min
['EYFP', 'mKeima', 'mMaroon1', 'mTurquoise', 'oxBFP', 'mKO&kappa;', 'miniSOG', 'mIFP']

# threshold = 7 =>  min
['EBFP', 'T-Sapphire', 'LSSmOrange', 'amFP486', 'SYFP2', 'KO', 'mKelly2', 'miRFP709']

# threshold = 6 =>  5h 40min 40s
['Aquamarine', 'iFP1.4', 'mMiCy', 'Sirius', 'mCarmine', 'phiYFP', 'cgfTagRFP', 'LSSmOrange']

# threshold = 5 => 1d 9h 9min 10s
['P4-3E', 'dsFP483', 'LSSmOrange', 'NowGFP', 'mPapaya', 'mOrange', 'mKelly1',  'iRFP713']

# threshold = 4 =>  min

# threshold = 3

# threshold = 2

# threshold = 1 ?

## Visualis check for orthogonality

In [134]:
fp_list_global = ['P4-3E', 'dsFP483', 'LSSmOrange', 'NowGFP', 'mPapaya', 'mOrange', 'mKelly1',  'iRFP713']
# ['mBanana', 'Cerulean', 'hmGFP', 
#                   'miRFP720', 'Neptune', 'dKeima', 'P4']
#['Sapphire', 'mVenus', 'mCherry'] Sirius, Aquamarine, LSSmOrange, mMiCy, phiYFP, cgfTagRFP, mCarmine, iFP1.4]

In [135]:
@interact
def show(combo_no=(0,len(fp_list_global[1])-1)
        ):
    fp_list = fp_list_global
    LED_no =(0,len(LED_list)-1),
    all_EM = False,
    EM_filter = em_filter_list
    plt.style.use('default')
    fig = plt.figure(figsize=(14,9), tight_layout=True)
    gs = gridspec.GridSpec(2,1,figure=fig)
    ax_fluo, ax_dichroic = fig.add_subplot(gs[0,0]), fig.add_subplot(gs[1,0])
    fluorophore_names = ' - '.join([', '.join(fluo) for fluo in [fp_list, chem_fluorophore_list, nuclear_dye_list]])
    # Fluorophores
    if len(fp_list) != 0:
        [(plot_spectrum(ax_fluo, fp_spectra_df, f"{fluo} ex", f_type=''), plot_spectrum(ax_dichroic, fp_spectra_df, f"{fluo} em", f_type='')) for fluo in fp_list]
    # LEDs
    [ax_fluo.vlines(wavelength, 1.05,1.14,linewidth=3,color= wavelength_to_rgb(wavelength, gamma=0.8)) for wavelength in range(300,800)]
    # Dichroic
    dichroic = 'CFP_YFP_mCherry' # 'CFP_YFP_mCherry', 'DA_FI_TR_Cy5_B'
    [ax_fluo.plot(dichroic_dic[dichroic][spectrum].index, dichroic_dic[dichroic][spectrum], '-', color = c_colour, alpha=0.5,linewidth=2, markersize=12,label='dichroic 1') 
     for (spectrum,c_colour) in [('excitation','black'), ]]
    [ax_dichroic.plot(dichroic_dic[dichroic][spectrum].index, dichroic_dic[dichroic][spectrum], '-', color = c_colour, alpha=0.5,linewidth=2, markersize=12,label='dichroic 1') 
     for (spectrum,c_colour) in [ ('emission','black')]]

    dichroic =  'DA_FI_TR_Cy5_B'
    [ax_fluo.plot(dichroic_dic[dichroic][spectrum].index, dichroic_dic[dichroic][spectrum], '-', color = c_colour, alpha=0.5,linewidth=2, markersize=12,label='dichroic 2') 
     for (spectrum,c_colour) in [('excitation','grey'), ]]
    [ax_dichroic.plot(dichroic_dic[dichroic][spectrum].index, dichroic_dic[dichroic][spectrum], '-', color = c_colour, alpha=0.5,linewidth=2, markersize=12,label='dichroic 2') 
     for (spectrum,c_colour) in [ ('emission','grey')]]
    # Emission Filters
    for em_data in em_filter_list:
        em_filter_start, em_filter_end = em_data['center']-em_data['width']/2, em_data['center']+em_data['width']/2                       
        plt.rcParams["hatch.linewidth"] = 4
        em_filter_patch = patches.Rectangle((em_filter_start, 0),em_data['width'], 1, linewidth=1, alpha = 0.1, linestyle='--',
                                            edgecolor=wavelength_to_rgb(em_data['center']), facecolor=wavelength_to_rgb(em_data['center']))
        ax_dichroic.add_patch(em_filter_patch)
    # Nikon-given Lumencore LED spectra:
    for column_name in all_LED_data_df.columns:
        current_LED_data = all_LED_data_df.loc[pd.notna(all_LED_data_df[column_name])][column_name]
        normalised_LED_data = current_LED_data/all_LED_data_df.describe().loc['max'].max()
        ax_fluo.fill_between(current_LED_data.index, normalised_LED_data,color= wavelength_to_rgb(current_LED_data.idxmax(), gamma=0.7))
    ax_fluo.legend(loc = 'center right'), ax_dichroic.legend(loc = 'center left')
    # subplot modification
    #ax_fluo.set_title(f"Fluorophores: {fluorophore_names}",size=12)
    [(subplot.set_ylim(0,1.15),subplot.set_xlim(300,801),
      subplot.set_xticks(np.arange(300, 800+1, 25)), #subplot.set_xlabel('Wavelength (nm)'),
      subplot.spines['right'].set_visible(False), subplot.spines['top'].set_visible(False),
      subplot.grid(color='grey', linestyle='--', linewidth=0.5, alpha=0.25)) for subplot in [ax_fluo,ax_dichroic]]
    ax_dichroic.set_xlabel('Wavelength (nm)')
    #plt.savefig(f"final_set8_thresh5.png", dpi=300)
    plt.show(); 

interactive(children=(IntSlider(value=3, description='combo_no', max=6), Output()), _dom_classes=('widget-inte…