In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import analytics_core_V04 as ac
import matplotlib.pyplot as plt
from matplotlib_venn import venn3

from typing import List, Dict, Tuple
import pandas as pd
import matplotlib.pyplot as plt

# Figure 3B

In [None]:
df_comp = pd.read_parquet(r'Z:\Tim_SPEC\figure3\insol_vs_spec\mouse_liver_FFPE.parquet')
df_comp = df_comp[df_comp['PG.Q.Value']<0.01]
to_keep = ['C1', 'C2', 'C3', 'C4', 'A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
df_comp['reduced_id'] = df_comp['Run'].apply(lambda x: x.split('_')[-1])
df_comp = df_comp[df_comp['Run'].apply(lambda x: x.split('_')[-1]).isin(to_keep)]
to_keep1 = [['B1', 'B2', 'B3', 'B4'], ['C1', 'C2', 'C3', 'C4'], ['A1', 'A2', 'A3', 'A4']]
fin = []
for el in to_keep1:
    tmp = []
    for el1 in el:
        tmp_df = df_comp[df_comp['reduced_id'] == el1].drop_duplicates('Protein.Group')
        tmp.append(len(tmp_df))
    fin.append(tmp)
        
fin = [[5395, 6383, 4381, 6115], [6939, 6910, 6920, 6832], [7672, 7740, 7736, 7645]]
labels = ['Single shot InSol', 'Bulk InSol', 'SPEC']
colors = ['#bd8078', '#623a73', '#ff0000']  # Add your colors

# Calculate medians for bars
medians = [np.median(reps) for reps in fin]

# Create strip data for points
strip_data = []
for i, (label, reps) in enumerate(zip(labels, fin)):
    for rep_val in reps:
        strip_data.append({'Category': label, 'Value': rep_val})

strip_df = pd.DataFrame(strip_data)

# Create strip plot
fig = px.strip(strip_df, x='Category', y='Value', 
               category_orders={'Category': labels})

# Update strip plot traces (points)
fig.update_traces(
    jitter=0.8,  # Adjust this value (0-1)
    marker=dict(size=18, color='black', line=dict(width=0.5, color='white'))
)

# Add bars for each category
for i, (label, median, color) in enumerate(zip(labels, medians, colors)):
    fig.add_trace(go.Bar(
        x=[label],
        y=[median],
        width=0.5,
        marker=dict(color=color, line=dict(width=1, color='black')),
        name=label,
        showlegend=False
    ))

fig.update_layout(
    width=600,
    height=600,
    template='plotly_white',
    xaxis_title='',
    yaxis_title='Phosphosites Identified',
    yaxis=dict(range=[0, 9000]),
    showlegend=False
)
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure_3b.pdf', width=600, height=600)

# Figure 3C

In [None]:
cv_list = []
for el in to_keep1:
    tmp = []
    tmp_df = df_comp[df_comp['reduced_id'].isin(el)]
    tmp_df = pd.pivot_table(tmp_df, columns = 'Run', index = 'Protein.Group', values= 'PG.MaxLFQ')
    linear_values = tmp_df.iloc[:,:4]
    n_valid = linear_values.notna().sum(axis=1)
    
    cv = linear_values.std(axis=1) / linear_values.mean(axis=1)
    cv = cv.replace(0, np.nan)

    cv[n_valid < 2] = np.nan
    
    cv_list.append(cv.values)
color_palette = ['#bd8078', '#623a73', '#ff0000']
fig = go.Figure()
for i, el in enumerate(cv_list):
    fig.add_trace(go.Box(y = el, marker_color = color_palette[i]))
fig.update_layout(width = 600, height = 600, showlegend = False, template = 'plotly_white')
fig.add_hline(y = np.nanmedian(cv_list[0]), line = {'dash':'dash', 'width': 2, 'color':'#bd8078'})
fig.add_hline(y = np.nanmedian(cv_list[1]), line = {'dash':'dash', 'width': 2, 'color':'#623a73'})
fig.add_hline(y = np.nanmedian(cv_list[2]), line = {'dash':'dash', 'width': 2, 'color':'#ff0000'})
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure_3c.pdf', width=600, height=600)

# Figure 3D

In [None]:
bulk_pivot = pd.pivot_table(df_comp[df_comp['reduced_id'].isin(to_keep1[1])], columns = 'Run', index = 'Protein.Group', values= 'PG.MaxLFQ')
spec_pivot = pd.pivot_table(df_comp[df_comp['reduced_id'].isin(to_keep1[2])], columns = 'Run', index = 'Protein.Group', values= 'PG.MaxLFQ')
bulk_pivot['mean'] = bulk_pivot.apply(np.mean, axis = 1)
spec_pivot['mean'] = spec_pivot.apply(np.mean, axis = 1)
merged = pd.merge(bulk_pivot[['mean']], spec_pivot[['mean']], left_index=True, right_index=True, suffixes=('_bulk', '_spec'))
from scipy.stats import gaussian_kde
from scipy.stats import gaussian_kde, pearsonr, spearmanr

# Calculate kernel density
x = np.log10(merged['mean_bulk'])
y = np.log10(merged['mean_spec'])

# Remove any NaN or inf values
mask = np.isfinite(x) & np.isfinite(y)
x_clean = x[mask]
y_clean = y[mask]

# Calculate the point density
xy = np.vstack([x_clean, y_clean])
z = gaussian_kde(xy)(xy)


pearson_r, pearson_p = pearsonr(x_clean, y_clean)
spearman_r, spearman_p = spearmanr(x_clean, y_clean)

# Create figure with density coloring
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=x_clean, 
    y=y_clean, 
    mode='markers',
    marker=dict(
        size=5,
        color=z,  # Color by density
        colorscale='inferno',  # Or 'Blues', 'Reds', 'Plasma', etc.
        showscale=True,
        colorbar=dict(title="Density"),
        line=dict(width=0)
    )
))

fig.update_layout(
    width=600, 
    height=600, 
    template='simple_white',
    xaxis_title='log10(mean_bulk)',
    yaxis_title='log10(mean_spec)'
)

fig.show()
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure_3d.pdf', width=600, height=600)

# Figure 3E

In [None]:
from scipy.stats import gaussian_kde
from scipy.stats import gaussian_kde, pearsonr, spearmanr

# Calculate kernel density
x = np.log10(spec_pivot.iloc[:,0])
y = np.log10(spec_pivot.iloc[:,1])

# Remove any NaN or inf values
mask = np.isfinite(x) & np.isfinite(y)
x_clean = x[mask]
y_clean = y[mask]

# Calculate the point density
xy = np.vstack([x_clean, y_clean])
z = gaussian_kde(xy)(xy)

# Create figure with density coloring
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=x_clean, 
    y=y_clean, 
    mode='markers',
    marker=dict(
        size=5,
        color=z,  # Color by density
        colorscale='inferno',  # Or 'Blues', 'Reds', 'Plasma', etc.
        showscale=True,
        colorbar=dict(title="Density"),
        line=dict(width=0)
    )
))

fig.update_layout(
    width=600, 
    height=600, 
    template='simple_white',
    xaxis_title='log10(mean_bulk)',
    yaxis_title='log10(mean_spec)'
)

fig.show()
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure_3e.pdf', width=600, height=600)

# Figure 3F

In [None]:
df = pd.read_parquet(r'Z:\Tim_SPEC\figure3\tissues_SPECvsPAC\H032_E170.parquet')
df = df[df['PG.Q.Value']<0.01]
labels = [['E9', 'E10', 'E11', 'E12'], ['E1', 'E2', 'E3', 'E4'], ['A9', 'A10', 'A11', 'A12'], ['A1', 'A2', 'A3', 'A4'],
          ['B9', 'B10', 'B11', 'B12'], ['B2', 'B3', 'B4'],['D9', 'D10', 'D11', 'D12'], ['D1', 'D2', 'D3', 'D4'],
          ['C9', 'C10', 'C11', 'C12'], ['C1', '20250520122254', 'C3', 'C4'],['F9', 'F10', 'F11', 'F12'], ['F1', 'F2', 'F3', 'F4']]
labels_flattened = [item for sublist in labels for item in sublist]
df['reduced_id'] = df['Run'].apply(lambda x: x.split('_')[-1])
df = df[df['reduced_id'].isin(labels_flattened)]
fin = []
for el in labels:
    tmp = []
    for el1 in el:
        tmp_df = df[df['reduced_id'] == el1].drop_duplicates('Protein.Group')
        tmp.append(len(tmp_df))
    fin.append(tmp)
color_palette = ['#623a73', '#ff0000']
fig = go.Figure()
for i, el in enumerate(fin):
    fig.add_trace(go.Box(y = el, boxpoints='all', pointpos=0, marker_color = color_palette[i%2], marker=dict(size=12, line = dict(width = 0.5))))
fig.update_layout(width = 1000, height = 600, template = 'none', showlegend = False)
fig.update_yaxes(range = [0,10100])
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure_3f.pdf', width=1000, height=600)

# Figure 3G

In [None]:
df = pd.read_parquet(r'Z:\Tim_SPEC\figure3\EDL_and_Soleus\H032_E177.parquet')
df = df[df['PG.Q.Value']<0.01]
to_keep = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
df['reduced_id'] = df['Run'].apply(lambda x: x.split('_')[-1])
df = df[df['reduced_id'].isin(to_keep)] 
to_keep1 = [['A1', 'A2', 'A3', 'A4'], ['B1', 'B2', 'B3', 'B4']]
fin = []
for el in to_keep1:
    tmp = []
    for el1 in el:
        tmp_df = df[df['reduced_id'] == el1].drop_duplicates('Protein.Group')
        tmp.append(len(tmp_df))
    fin.append(tmp)
color_palette = ['#09316c','#1faa76']
fig = go.Figure()
for i, el in enumerate(fin):
    fig.add_trace(go.Box(y = el, boxpoints='all', pointpos=0, marker_color = color_palette[i], marker=dict(size=12, line = dict(width = 0.5))))
fig.update_layout(width = 400, height = 600, template = 'none', showlegend = False)
fig.update_yaxes(range = [0,7100])
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure_3h.pdf', width=400, height=600)

# Figure 3H

In [None]:
df_pivot =np.log2(pd.pivot_table(df, columns = 'reduced_id', index = 'Genes', values= 'PG.MaxLFQ')).T.replace([np.inf, -np.inf], np.nan)
df_pivot['group'] = ['Soleus', 'Soleus', 'Soleus', 'Soleus', 'EDL', 'EDL', 'EDL', 'EDL']
df_pivot['sample'] = ['Soleus_1', 'Soleus_2', 'Soleus_3', 'Soleus_4', 'EDL_1', 'EDL_2', 'EDL_3', 'EDL_4']
df_pivot['subject'] = ['Soleus_1', 'Soleus_2', 'Soleus_3', 'Soleus_4', 'EDL_1', 'EDL_2', 'EDL_3', 'EDL_4']
df_pivot_filt = df_pivot.loc[:, (1 - (df_pivot.isna().sum() / len(df_pivot))) >=0.7]
df_pivot_imp = ac.imputation_mixed_norm_KNN(df_pivot_filt).reset_index()
ttest = ac.run_ttest(df_pivot_imp, 'Soleus', 'EDL')
tmp = []
for index, row in ttest.iterrows():
    if (row['log2FC'] >= 0.585) & (row['padj'] < 0.05):
        tmp.append('upreg')
    
    elif ((row['log2FC'] <= 0.585) & (row['log2FC'] >= 0)) & (row['padj'] < 0.05):
        tmp.append('almost_upreg')
    
    elif (row['log2FC'] <= -0.585) & (row['padj'] < 0.05):
        tmp.append('downreg')

    elif ((row['log2FC'] >= -0.585) & (row['log2FC'] <= 0)) & (row['padj'] < 0.05):
        tmp.append('almost_downreg')
    
    else: 
        tmp.append('noreg')

ttest['ID'] = tmp
soleus_extended = ['Myh7', 'Myh7b', 'Atp2a2', 'Tnni1', 'Tnnt1', 'Tnnc1', 'Mb', 'Ppargc1a']

edl_extended = ['Myh4', 'Atp2a1', 'Tnni2', 'Tnnt3', 'Pvalb']

genes = soleus_extended + edl_extended

fig = px.scatter(ttest, x='log2FC', y=-np.log10(ttest['padj']), color='ID', 
                 color_discrete_sequence=['#BDBDBD','#0B6299', '#77CDE6','#F7D291','#EE4811'], hover_name = 'identifier')

fig.update_traces(marker=dict(
    size=10, 
    line=dict(width=0.5, color='black')
))


fig.add_vline(x=-0.585, line_dash='dash', line_color='black')
fig.add_vline(x=0.585, line_dash='dash', line_color='black')
fig.add_hline(y=1.3, line_dash='dash', line_color='black')

fig.update_xaxes(title="log2FC (Soleus vs EDL)")
fig.update_yaxes(title='-log10(p-adjusted)')
fig.update_layout(width=600, height=600, template='plotly_white', showlegend=False)
#fig.write_image(r'D:\Projects\SPEC\figs_raw\Figure_3g.pdf', width=600, height=600)