In [5]:
import pandas as pd
import numpy as np

In [9]:
df = pd.read_csv('filtered_data.csv')

# Identify the starting column for drug-concentration data
start_col = 'aew541_1000nm'
start_idx = df.columns.get_loc(start_col)

# Extract all columns from the drug data onward
drug_columns = df.columns[start_idx:]

# Exclude any columns that end with '_pdpd' (those are controls we want to ignore)
filtered_drug_columns = [col for col in drug_columns if not col.endswith('_pdpd')]

# Compute statistics across the filtered drug-concentration columns
df['Mean_Abundance'] = df[filtered_drug_columns].mean(axis=1, skipna=True)
df['Median_Abundance'] = df[filtered_drug_columns].median(axis=1, skipna=True)
df['StdDev_Abundance'] = df[filtered_drug_columns].std(axis=1, skipna=True)

# Save the updated DataFrame to a new file (optional)
# df.to_csv('peptide_stats_with_abundance.csv', index=False)

# (Optional) Preview first few rows
print(df[['Variant', 'Mean_Abundance', 'Median_Abundance', 'StdDev_Abundance', 'aew541_1000nm']].head())


                  Variant  Mean_Abundance  Median_Abundance  StdDev_Abundance  \
0        .ESESTAGSFSLSVR.    8.159651e+06         5893250.0      6.782810e+06   
1         .IQDKEGIPPDQQR.    6.133915e+06         3777700.0      8.890830e+06   
2          .IFTSIGEDYDER.    2.837567e+07        12692500.0      4.175965e+07   
3      .TAVC+57.021DIPPR.    6.198521e+06         1627300.0      1.404239e+07   
4  .IITHPNFNGNTLDNDIMLIK.    1.492264e+08        49244000.0      2.368004e+08   

   aew541_1000nm  
0     15921000.0  
1      4450700.0  
2      8153300.0  
3      4442300.0  
4    134780000.0  


  df = pd.read_csv('filtered_data.csv')


In [7]:
for col in filtered_drug_columns:
    df[col] = np.where(
        df['Median_Abundance'] > 0,
        np.log(df[col] / df['Median_Abundance']),
        np.nan
    )

print(df[['Variant', 'Mean_Abundance', 'Median_Abundance', 'StdDev_Abundance'] + filtered_drug_columns[:5]].head())

                  Variant  Mean_Abundance  Median_Abundance  StdDev_Abundance  \
0        .ESESTAGSFSLSVR.    8.159651e+06         5893250.0      6.782810e+06   
1         .IQDKEGIPPDQQR.    6.133915e+06         3777700.0      8.890830e+06   
2          .IFTSIGEDYDER.    2.837567e+07        12692500.0      4.175965e+07   
3      .TAVC+57.021DIPPR.    6.198521e+06         1627300.0      1.404239e+07   
4  .IITHPNFNGNTLDNDIMLIK.    1.492264e+08        49244000.0      2.368004e+08   

   aew541_1000nm  aew541_100nm  aew541_10nm  aew541_30000nm  aew541_3000nm  
0       0.993831      1.784593     1.383618        0.172645       0.708930  
1       0.163946      0.620730     0.173205       -0.190866      -0.126804  
2      -0.442589      0.081917    -0.304000       -0.748152      -0.433420  
3       1.004250      1.536108     1.380084        0.665863       0.825133  
4       1.006856      1.810090     1.221506        0.782841       1.152491  


In [13]:
df.to_csv('normalized_peptide_trend_data.csv', index=False)

Unnamed: 0,rowid,ccms_row_id,Variant,Variant ID,Unmod variant,Total,Total- Unmodified sequence,Variants- Unmodified sequence,Proteins,Mass,...,baricitib_30000nm,baricitib_3000nm,baricitib_300nm,baricitib_30nm,baricitib_3nm,baricitib_dmso,baricitib_pdpd,Mean_Abundance,Median_Abundance,StdDev_Abundance
1,7,7,.IQDKEGIPPDQQR.,39596,.IQDKEGIPPDQQR.,6836,6882,7,sp|P0CG47|UBB_HUMAN;sp|P0CG48|UBC_HUMAN;sp|P62...,1523.8,...,2505000.0,3093800.0,4726100.0,7930900.0,8424800.0,11643000.0,5830600.0,6133915.0,3777700.0,8890830.0
2,11,11,.IFTSIGEDYDER.,36599,.IFTSIGEDYDER.,5284,5412,7,sp|P35232-2|PHB_HUMAN;sp|P35232|PHB_HUMAN;tr|C...,1444.6,...,46563000.0,37421000.0,54811000.0,96800000.0,78860000.0,75034000.0,70372000.0,28375670.0,12692500.0,41759650.0
3,14,14,.TAVC+57.021DIPPR.,87369,.TAVCDIPPR.,4837,4837,1,sp|A6NNZ2|TBB8B_HUMAN;sp|P04350|TBB4A_HUMAN;sp...,1085.5,...,987920.0,1095500.0,2920500.0,4860600.0,7768400.0,2558000.0,3113400.0,6198521.0,1627300.0,14042390.0


In [31]:
df.iloc[2][8]

  df.iloc[2][8]


'sp|P35232-2|PHB_HUMAN;sp|P35232|PHB_HUMAN;tr|C9JW96|C9JW96_HUMAN;tr|C9JZ20|C9JZ20_HUMAN;tr|D6RBK0|D6RBK0_HUMAN;tr|E7ESE2|E7ESE2_HUMAN;tr|E9PCW0|E9PCW0_HUMAN'