In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os 

# Define global variables to store efficiencies
efficiency_light_label = 0
efficiency_medium_label = 0
efficiency_heavy_label = 0
efficiency = 0

def save_pie_chart(sizes, labels, colors, explode, title, file_name):
    plt.figure(figsize=(12, 6))
    plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.2f%%', startangle=140, textprops={'fontsize': 16, 'color': 'black'})
    plt.title(title)
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.savefig(f"Figures/{file_name}", dpi=600)
    plt.close()

def main():
    global efficiency_light_label, efficiency_medium_label, efficiency_heavy_label, efficiency
    
    # Prompt user for input file
    file_path = input("Enter the file name and then press enter (e.g. peptide.tsv): ")
    
    try:
        # Read data from the tab-separated file into a DataFrame
        df = pd.read_csv(file_path, sep='\t')
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return
    
    # Print the number of peptides before any filtering
    print(f"{len(df)} Peptides before filtering")

    # Filter peptides containing a lysine ('K') in their sequence
    df = df[df['Peptide'].str.contains('K(?<!^)')]

    # Print the number of peptides containing a lysine
    print(f"{len(df)} Peptide sequences contain a lysine")

    # Prompt user for modification pattern
    modification_pattern = 'K'

    # Filter peptides based on the specific modification pattern
    df_filtered = df[df["Assigned Modifications"].str.contains(modification_pattern, na=False)]
    print(f"{len(df_filtered)} Peptides with the modification pattern '{modification_pattern}'")

    # Calculate and print the efficiency of labeling peptides with the specified modification
    if len(df) > 0:
        efficiency = (len(df_filtered) / len(df)) * 100
        print(f"{efficiency:.2f}% Peptides were labeled with '{modification_pattern}'")

    # Prompt user for label types
    light_label_pattern = input("Enter the light label pattern (e.g., 28.0313) or press Enter to skip: ").strip()
    medium_label_pattern = input("Enter the medium label pattern (e.g., 34.0631) or press Enter to skip: ").strip()
    heavy_label_pattern = input("Enter the heavy label pattern (e.g., 36...) or press Enter to skip: ").strip()
    
    if light_label_pattern:
        # Filter peptides with the light label pattern
        df_filtered_light_label = df[df["Assigned Modifications"].str.contains(light_label_pattern, na=False)] 
        print(f"{len(df_filtered_light_label)} Peptides with light label")
        if len(df) > 0:
            global efficiency_light_label
            efficiency_light_label = (len(df_filtered_light_label) / len(df)) * 100
            print(f"{efficiency_light_label:.2f}% of all Peptides were light-labeled")
    
    if medium_label_pattern:
        # Filter peptides with the medium label pattern
        df_filtered_medium_label = df[df["Assigned Modifications"].str.contains(medium_label_pattern, na=False)]
        print(f"{len(df_filtered_medium_label)} Peptides with medium label")
        if len(df) > 0:
            global efficiency_medium_label
            efficiency_medium_label = (len(df_filtered_medium_label) / len(df)) * 100
            print(f"{efficiency_medium_label:.2f}% of all Peptides were medium-labeled")
    
    if heavy_label_pattern:
        # Filter peptides with the heavy label pattern
        df_filtered_heavy_label = df[df["Assigned Modifications"].str.contains(heavy_label_pattern, na=False)]
        print(f"{len(df_filtered_heavy_label)} Peptides with heavy label")
        if len(df) > 0:
            global efficiency_heavy_label
            efficiency_heavy_label = (len(df_filtered_heavy_label) / len(df)) * 100
            print(f"{efficiency_heavy_label:.2f}% of all Peptides were heavy-labeled")

if __name__ == "__main__":
    main()

    # Create a directory for storing figures if it doesn't exist
    figures_dir = "Figures"
    if not os.path.exists(figures_dir):
        os.makedirs(figures_dir)

    # Save pie charts based on the provided label patterns
    if efficiency_light_label > 0:
        not_labeled_percentage_light = 100 - efficiency_light_label
        labels_light = ['Light Label', 'Unlabeled Peptides']
        sizes_light = [efficiency_light_label, not_labeled_percentage_light]
        colors_light = ['lightblue', 'gray']
        explode_light = (0.1, 0)  # Explode the light label slice
        save_pie_chart(sizes_light, labels_light, colors_light, explode_light, 'Light Label Efficiency', 'Light_Label_Efficiency.pdf')
    
    if efficiency_medium_label > 0:
        not_labeled_percentage_medium = 100 - efficiency_medium_label
        labels_medium = ['Medium Label', 'Unlabeled Peptides']
        sizes_medium = [efficiency_medium_label, not_labeled_percentage_medium]
        colors_medium = ['lightgreen', 'gray']
        explode_medium = (0.1, 0)  # Explode the medium label slice
        save_pie_chart(sizes_medium, labels_medium, colors_medium, explode_medium, 'Medium Label Efficiency', 'Medium_Label_Efficiency.pdf')
    
    if efficiency_heavy_label > 0:
        not_labeled_percentage_heavy = 100 - efficiency_heavy_label
        labels_heavy = ['Heavy Label', 'Unlabeled Peptides']
        sizes_heavy = [efficiency_heavy_label, not_labeled_percentage_heavy]
        colors_heavy = ['darkblue', 'gray']
        explode_heavy = (0.1, 0)  # Explode the heavy label slice
        save_pie_chart(sizes_heavy, labels_heavy, colors_heavy, explode_heavy, 'Heavy Label Efficiency', 'Heavy_Label_Efficiency.pdf')
    
    # Save combined pie chart only if more than one label is provided
    labels = []
    sizes = []
    colors = []
    explode = []

    if efficiency_light_label > 0:
        labels.append('Light Label')
        sizes.append(efficiency_light_label)
        colors.append('lightblue')
        explode.append(0.1)

    if efficiency_medium_label > 0:
        labels.append('Medium Label')
        sizes.append(efficiency_medium_label)
        colors.append('lightgreen')
        explode.append(0.1)

    if efficiency_heavy_label > 0:
        labels.append('Heavy Label')
        sizes.append(efficiency_heavy_label)
        colors.append('darkblue')
        explode.append(0.1)

    if len(labels) > 1:
        save_pie_chart(sizes, labels, colors, explode, 'Label Efficiency distribution', 'Label_Efficiency_all.pdf')
        
    if efficiency > 0:
        not_labeled_percentage_total = 100 - efficiency
        labels_total = ['Labeled Peptides', 'Unlabeled Peptides']
        sizes_total = [efficiency, not_labeled_percentage_total]
        colors_total = ['darkblue', 'gray']
        explode_total = (0.1, 0)  # Explode the slices for different labels
        save_pie_chart(sizes_total, labels_total, colors_total, explode_total, 'Label Efficiency Overview', 'Label_Efficiency_Overview.pdf')

    # Notify the user
    print("Pie charts saved in the 'Figures' directory.")


Enter the file name and then press enter (e.g. peptide.tsv): peptide.tsv
3111 Peptides before filtering
1615 Peptide sequences contain a lysine
1489 Peptides with the modification pattern 'K'
92.20% Peptides were labeled with 'K'
Enter the light label pattern (e.g., 28.0313) or press Enter to skip: 28.
Enter the medium label pattern (e.g., 34.0631) or press Enter to skip: 34.
Enter the heavy label pattern (e.g., 36...) or press Enter to skip: 
1148 Peptides with light label
71.08% of all Peptides were light-labeled
1317 Peptides with medium label
81.55% of all Peptides were medium-labeled
Pie charts saved in the 'Figures' directory.
