In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import analytics_core_V04 as ac

import helper_functions as cf

# Figure 1C

In [35]:
#Whisper80 InSol vs SPEC 3 phases

tags1 = cf.generate_pattern_list(['D'], 1, 4, 'E178_')
tags2 = cf.generate_pattern_list(['C'], 1, 4, 'E178_')
tags7 = cf.generate_pattern_list(['E'], 1, 4, 'E178_')
tags9 = cf.generate_pattern_list(['A'], 1, 4, 'E178_')


experiment_dict = [
    {'instrument': 'No detergent', 'method' : 'In sol', 'file_tags': tags1, 'path': r'Z:\H032_E178\H032_E178_full.parquet'},
    {'instrument': 'No detergent', 'method' : 'SAX', 'file_tags': tags9, 'path': r'Z:\H032_E178\H032_E178_full.parquet'},
    {'instrument': 'No detergent', 'method' : 'SCX', 'file_tags': tags7, 'path': r'Z:\H032_E178\H032_E178_full.parquet'},
    {'instrument': 'No detergent', 'method' : 'C18', 'file_tags': tags2, 'path': r'Z:\H032_E178\H032_E178_full.parquet'},
]

In [36]:
from collections import defaultdict

# Group experiments by file
experiments_by_file = defaultdict(list)
for exp in experiment_dict:
    experiments_by_file[exp['path']].append(exp)

# Process
agg_stat_df = []
for path, experiments in experiments_by_file.items():
    df_full = cf.load_parquet_cached(path)
    df_full = df_full[df_full['PG.Q.Value'] < 0.01]
    
    for experiment in experiments:
        _df_agg = cf.process_experiment(
            df_full, 
            experiment, 
            protease='trypsin',  # or 'lysc', 'argc', etc.
            max_missed_cleavages=2
        )
        agg_stat_df.append(_df_agg)

agg_stat_df = pd.concat(agg_stat_df, ignore_index=True)











In [17]:
df = agg_stat_df

In [18]:
methods = df['method'].unique().tolist()   

In [19]:
fin = []
for el in methods:
    tmp_df = df[df['method'] == el]
    fin.append(tmp_df['protein'].tolist()) 

In [20]:
### Creating custom order
fin = [fin[-1], fin[0], fin[2], fin[1]]

In [21]:
color_palette = ['#206d46', '#6D25AD','#2576AD', '#db4c2e']

In [22]:
fig = go.Figure()
for i, el in enumerate(fin):
    fig.add_trace(go.Box(y = el, boxpoints='all', pointpos=0, marker_color = color_palette[i], marker=dict(size=12, line = dict(width = 0.5))))
fig.update_layout(width = 600, height = 600, template = 'none', showlegend = False)
fig.update_yaxes(range = [0,9300])
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure1c.pdf', width = 600, height = 600)

# Figure 1D

In [37]:
#Whisper80 SAX different detergents

tags9 = cf.generate_pattern_list(['A'], 1, 4, 'E178_')
tags10 = cf.generate_pattern_list(['A'], 5, 8, 'E178_')
tags11 = cf.generate_pattern_list(['A'], 9, 12, 'E178_')


experiment_dict = [
    {'instrument': 'No detergent', 'method' : 'SAX', 'file_tags': tags9, 'path': r'Z:\H032_E178\H032_E178_full.parquet'},
    {'instrument': '2 % SDS', 'method' : 'SAX', 'file_tags': tags10, 'path': r'Z:\H032_E178\H032_E178_full.parquet'},
    {'instrument': '2 % SDC', 'method' : 'SAX', 'file_tags': tags11, 'path': r'Z:\H032_E178\H032_E178_full.parquet'},
]

In [38]:
from collections import defaultdict

# Group experiments by file
experiments_by_file = defaultdict(list)
for exp in experiment_dict:
    experiments_by_file[exp['path']].append(exp)

# Process
agg_stat_df = []
for path, experiments in experiments_by_file.items():
    df_full = cf.load_parquet_cached(path)
    df_full = df_full[df_full['PG.Q.Value'] < 0.01]
    
    for experiment in experiments:
        _df_agg = cf.process_experiment(
            df_full, 
            experiment, 
            protease='trypsin',  # or 'lysc', 'argc', etc.
            max_missed_cleavages=2
        )
        agg_stat_df.append(_df_agg)

agg_stat_df = pd.concat(agg_stat_df, ignore_index=True)









In [25]:
df = agg_stat_df

In [26]:
detergent_list = df['instrument'].unique().tolist()

In [27]:
fin = []
for el in detergent_list:
    tmp_df = df[df['instrument'] == el]
    fin.append(tmp_df['protein'].tolist()) 

In [28]:
color_palette_red = ['#FBA08D', '#FA7A61', '#db4c2e']

In [29]:
fig = go.Figure()
for i, el in enumerate(fin):
    fig.add_trace(go.Box(y = el, boxpoints='all', pointpos=0, marker_color = color_palette_red[i], marker=dict(size=12, line = dict(width = 0.5))))
fig.update_layout(width = 600, height = 600, template = 'none', showlegend = False)
fig.update_yaxes(range = [0,10100])
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure1d.pdf', width = 600, height = 600)

# Figure 1E

In [39]:
#Kinetics

path_used = r'Z:\20250423SPEC_times_dil_series_Denys\time\SPEC\200ng\SPEC_kinetics_200ng.parquet'
path_used2 = r'Z:\20250423SPEC_times_dil_series_Denys\time\insol\200ng\insol_kinteics_200ng.parquet'

tags1 = cf.generate_pattern_list([''], 1, 3, 'InSol_5min_digestion_HeLa_200ng_0')
tags2 = cf.generate_pattern_list([''], 1, 3, 'InSol_10min_digestion_HeLa_200ng_0')
tags3 = cf.generate_pattern_list([''], 1, 3, 'InSol_15min_digestion_HeLa_200ng_0')
tags4 = cf.generate_pattern_list([''], 1, 3, 'InSol_30min_digestion_HeLa_200ng_0')
tags5 = cf.generate_pattern_list([''], 1, 3, 'InSol_60min_digestion_HeLa_200ng_0')
tags6 = cf.generate_pattern_list([''], 1, 3, 'InSol_90min_digestion_HeLa_200ng_0')

tags8 = cf.generate_pattern_list([''], 1, 3, 'SPEC_5min_digestion_HeLa_200ng_0')
tags9 = cf.generate_pattern_list([''], 1, 3, 'SPEC_10min_digestion_HeLa_200ng_0')
tags10 = cf.generate_pattern_list([''], 1, 3, 'SPEC_15min_digestion_HeLa_200ng_0')
tags11 = cf.generate_pattern_list([''], 1, 3, 'SPEC_30min_digestion_HeLa_200ng_0')
tags12 = cf.generate_pattern_list([''], 1, 3, 'SPEC_60min_digestion_HeLa_200ng_0')
tags13 = cf.generate_pattern_list([''], 1, 3, 'SPEC_90min_digestion_HeLa_200ng_0')


# Define the experiment dictionary
experiment_dict = [
    {'instrument': '5 min', 'method' : 'In solution', 'file_tags': tags1, 'path': path_used2},
    {'instrument': '10 min', 'method' : 'In solution', 'file_tags': tags2, 'path': path_used2},
    {'instrument': '15 min', 'method' : 'In solution', 'file_tags': tags3, 'path': path_used2},
    {'instrument': '30 min', 'method' : 'In solution', 'file_tags': tags4, 'path': path_used2},
    {'instrument': '60 min', 'method' : 'In solution', 'file_tags': tags5, 'path': path_used2},
    {'instrument': '90 min', 'method' : 'In solution', 'file_tags': tags6, 'path': path_used2},
    {'instrument': '5 min', 'method' : 'SPEC', 'file_tags': tags8, 'path': path_used},
    {'instrument': '10 min', 'method' : 'SPEC', 'file_tags': tags9, 'path': path_used},
    {'instrument': '15 min', 'method' : 'SPEC', 'file_tags': tags10, 'path': path_used},
    {'instrument': '30 min', 'method' : 'SPEC', 'file_tags': tags11, 'path': path_used},
    {'instrument': '60 min', 'method' : 'SPEC', 'file_tags': tags12, 'path': path_used},
    {'instrument': '90 min', 'method' : 'SPEC', 'file_tags': tags13, 'path': path_used},
]

In [40]:
from collections import defaultdict

# Group experiments by file
experiments_by_file = defaultdict(list)
for exp in experiment_dict:
    experiments_by_file[exp['path']].append(exp)

# Process
agg_stat_df = []
for path, experiments in experiments_by_file.items():
    df_full = cf.load_parquet_cached(path)
    df_full = df_full[df_full['PG.Q.Value'] < 0.01]
    
    for experiment in experiments:
        _df_agg = cf.process_experiment(
            df_full, 
            experiment, 
            protease='trypsin',  # or 'lysc', 'argc', etc.
            max_missed_cleavages=2
        )
        agg_stat_df.append(_df_agg)

agg_stat_df = pd.concat(agg_stat_df, ignore_index=True)



























In [41]:
df = agg_stat_df
df['ID'] = df['method'] + '_' + df['instrument']
id_list = df['ID'].unique().tolist()
id_list_insol = id_list[:6]
id_list_SPEC = id_list[6:]
max_val = max(df['protein'])
df['protein'] = df['protein'] / max_val
fin_insol = []
for el in id_list_insol:
    tmp_df = df[df['ID'] == el]
    fin_insol.append(tmp_df['protein'].tolist())
fin_SPEC = []
for el in id_list_SPEC:
    tmp_df = df[df['ID'] == el]
    fin_SPEC.append(tmp_df['protein'].tolist())
flattened_spec = [item for sublist in fin_SPEC for item in sublist]

In [42]:
time_labels = ['5', '10', '15', '30', '60', '90']
id_list_SPEC1 = np.repeat(time_labels, 3).tolist()
df_SPEC = pd.DataFrame({'Selectivity':flattened_spec, 'ID':id_list_SPEC1})
####
flattened_insol = [item for sublist in fin_insol for item in sublist]
id_list_insol1 = np.repeat(time_labels, 3).tolist()
df_insol = pd.DataFrame({'Selectivity':flattened_insol, 'ID':id_list_insol1})

In [43]:
fig = px.strip(df_SPEC, y='Selectivity', x='ID', orientation='h')

fig.add_scatter(
    y=df_insol['Selectivity'], 
    x=df_insol['ID'],
    mode='markers',
    marker=dict(size=18, color='#6D25AD', line=dict(width=0.5, color='black')),
    name='df_insol'
)

fig.update_layout(width=600, height=600, template='plotly_white', showlegend = False)

fig.update_traces(
    marker=dict(size=18, color='#db4c2e', line=dict(width=0.5, color='black')),
    selector=dict(name='')
)

fig.update_traces(
    marker=dict(size=18, line=dict(width=0.5, color='black')),
    selector=dict(name='df_insol')
)

fig.update_yaxes(
    range=[0, 1.1],
    showgrid=True,
    gridwidth=0.1,           
    gridcolor='#F3F2F2',  
    griddash='solid'         
)
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure1_SPEC_vs_insol.pdf', width = 600, height = 600)