In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy.stats import linregress

In [2]:
input_dir = r'Figure_Data'
output_dir = r'Figure_Export'

In [8]:
# Incorporates all DBTL cycles
# filenames = [f'DBTL{i}.csv' for i in range(7)]
filenames = [f'DBTL{0}.csv']
input_paths = [os.path.join(input_dir, filename) for filename in filenames]

# Load CSVs and concatenate multiple files if necessary
dataframes = [pd.read_csv(input_path) for input_path in input_paths]
df = pd.concat(dataframes, ignore_index=True)

In [14]:
# Filter the dataframe for 'Control' samples
control_df = df[df['Sample'] == 'Control']

# Group by relevant protein identifiers and calculate the mean abundance
grouped_control_df = control_df.groupby(['Protein.Group', 'Protein.Names', 'Protein', 'Protein.Description']).agg({
    '%_of protein_abundance_Top3-method': 'mean'
}).reset_index()

# Sort the groups by mean abundance in descending order
sorted_grouped_control_df = grouped_control_df.sort_values(by='%_of protein_abundance_Top3-method', ascending=False)

# Assign ranks based on the sorted mean abundance
sorted_grouped_control_df['Rank'] = sorted_grouped_control_df['%_of protein_abundance_Top3-method'].rank(ascending=False, method='first')

# Sort by rank to ensure the dataframe is ordered by rank
ranked_df = sorted_grouped_control_df.sort_values(by='Rank')
ranked_df.to_csv('ranked_proteins.csv', index=False)

print("Dataframe saved to 'ranked_proteins.csv'")
ranked_df.head(25)

Dataframe saved to 'ranked_proteins.csv'


Unnamed: 0,Protein.Group,Protein.Names,Protein,Protein.Description,%_of protein_abundance_Top3-method,Rank
0,P00552,KKA2_KLEPN,Neo,Aminoglycoside 3'-phosphotransferase,3.352559,1.0
26,P32377,MVD1_YEAST,Mvd1,Diphosphomevalonate decarboxylase,2.507166,2.0
1434,Q88N55,CH60_PSEPK,Grol,60 kDa chaperonin,2.283663,3.0
25,P23181,AACC1_PSEAI,Aacc1,Gentamicin 3-N-acetyltransferase,1.973718,4.0
1857,Q88QN7,EFTU2_PSEPK,Tufb,Elongation factor Tu-B,1.813564,5.0
44,Q835L3,Q835L3_ENTFA,Ef_1364,Acetyl-CoA acetyltransferase/hydroxymethylglut...,1.460456,6.0
18,P0A157,RL7_PSEPK,Rpll,50S ribosomal protein L7/L12,1.33431,7.0
411,Q88DU2,DNAK_PSEPK,Dnak,Chaperone protein DnaK,1.094669,8.0
1864,Q88QP4,RL1_PSEPK,Rpla,50S ribosomal protein L1,1.03785,9.0
649,Q88FB2,SUCC_PSEPK,Succ,Succinyl-CoA ligase [ADP-forming] subunit beta,0.991579,10.0
