In [10]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
from scipy.cluster.hierarchy import linkage, dendrogram # For clustering
from scipy.spatial.distance import pdist # For distance calculation

# --- Configuration ---
RESULTS_DIR = "/projectnb/cancergrp/Philipp/results/RITA_peptides"
os.makedirs(RESULTS_DIR, exist_ok=True) # Ensure results directory exists

# Define the 20 standard amino acids for consistent ordering in plots
AMINO_ACIDS = sorted(list('ACDEFGHIKLMNPQRSTVWY'))

# Define the peptide property types for consistent ordering in plots.
PEPTIDE_PROPERTY_TYPES = ['Disorder', 'Helix', 'Sheet', 'Coil', 'Buried', 'Exposed']



# --- Helper Functions (Plotting Only) ---

def plot_composition(composition_series_dict, title, filename_prefix, results_dir):
    """
    Plots amino acid composition for one or more groups using grouped bar plots.
    composition_series_dict: dict of {group_name: pandas.Series of composition}
    """
    if not composition_series_dict:
        print(f"Skipping plot '{title}': No data provided.")
        return

    plot_df_data = []
    for group_name, series in composition_series_dict.items():
        if series is not None and not series.empty:
            temp_df = series.reset_index()
            temp_df.columns = ['Amino Acid', 'Percentage']
            temp_df['Group'] = group_name
            plot_df_data.append(temp_df)
        else:
            print(f"Warning: No valid composition data for group '{group_name}' in '{title}'.")

    if not plot_df_data:
        print(f"Skipping plot '{title}': No valid dataframes to concatenate.")
        return

    plot_df = pd.concat(plot_df_data)

    plt.figure(figsize=(14, 7))
    sns.barplot(data=plot_df, x='Amino Acid', y='Percentage', hue='Group', palette='viridis', ci=None)
    plt.title(f'Amino Acid Composition: {title}', fontsize=16)
    plt.xlabel('Amino Acid', fontsize=12)
    plt.ylabel('Percentage (%)', fontsize=12)
    plt.xticks(rotation=0)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.legend(title='Group', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plot_path = os.path.join(results_dir, f"{filename_prefix}_amino_acid_composition.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"Saved plot: {plot_path}")

def plot_average_properties(average_properties_dict, title, filename_prefix, results_dir):
    """
    Plots average peptide properties (Disorder, Helix, Sheet, Coil, Buried, Exposed) for different groups.
    average_properties_dict: dict of {group_name: pandas.Series of average properties}
    """
    if not average_properties_dict:
        print(f"Skipping plot '{title}': No data provided.")
        return

    plot_data = []
    for group_name, series in average_properties_dict.items():
        if series is not None and not series.empty:
            # Reorder the series according to PEPTIDE_PROPERTY_TYPES for consistent plotting order
            # Filter the series to include only the defined PEPTIDE_PROPERTY_TYPES with '_perc' suffix
            ordered_prop_names = [f'{prop}_perc' for prop in PEPTIDE_PROPERTY_TYPES if f'{prop}_perc' in series.index]
            ordered_series = series[ordered_prop_names]

            temp_df = ordered_series.to_frame(name='Percentage').reset_index()
            temp_df.columns = ['Property', 'Percentage']
            # Clean up property names for plotting (remove '_perc')
            temp_df['Property'] = temp_df['Property'].str.replace('_perc', '')
            temp_df['Group'] = group_name
            plot_data.append(temp_df)
        else:
            print(f"Warning: No valid average property data for group '{group_name}' in '{title}'.")

    if not plot_data:
        print(f"Skipping plot '{title}': No valid dataframes to concatenate.")
        return

    plot_df = pd.concat(plot_data)

    plt.figure(figsize=(16, 8))
    sns.barplot(data=plot_df, x='Property', y='Percentage', hue='Group', palette='Spectral', ci=None)
    plt.title(f'Average Peptide Properties: {title}', fontsize=16)
    plt.xlabel('Peptide Property', fontsize=12)
    plt.ylabel('Average Percentage (%)', fontsize=12)
    plt.xticks(rotation=0)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.legend(title='Group', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plot_path = os.path.join(results_dir, f"{filename_prefix}_peptide_properties.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"Saved plot: {plot_path}")

def plot_composition_heatmap(df_composition, title, filename_prefix, results_dir):
    """
    Plots a heatmap of amino acid compositions for different groups.
    df_composition: pandas.DataFrame with amino acids as index and groups as columns.
    """
    if df_composition.empty:
        print(f"Skipping heatmap '{title}': No data provided.")
        return

    # It's often clearer to have groups as rows and amino acids as columns in a heatmap,
    # so let's transpose the DataFrame.
    plot_df = df_composition.T # Transpose: groups become index, amino acids become columns

    plt.figure(figsize=(16, max(6, len(plot_df) * 0.7))) # Adjust height based on number of groups
    sns.heatmap(plot_df, annot=True, fmt=".1f", cmap="YlGnBu", linewidths=.5, cbar_kws={'label': 'Average Percentage (%)'})
    plt.title(f'Amino Acid Composition Heatmap: {title}', fontsize=16)
    plt.xlabel('Amino Acid', fontsize=12)
    plt.ylabel('Peptide Group', fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plot_path = os.path.join(results_dir, f"{filename_prefix}_amino_acid_composition_heatmap.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"Saved heatmap: {plot_path}")

def plot_properties_heatmap(df_properties, title, filename_prefix, results_dir):
    """
    Plots a heatmap of average peptide properties (Disorder, SS, Buried/Exposed) for different groups.
    df_properties: pandas.DataFrame with properties as index and groups as columns.
    """
    if df_properties.empty:
        print(f"Skipping heatmap '{title}': No data provided.")
        return

    # Similar to AA composition, transpose for groups as rows, properties as columns
    plot_df = df_properties.T # Transpose: groups become index, properties become columns
    
    # Rename columns to be more readable for the plot (remove '_perc')
    plot_df.columns = [col.replace('_perc', '') for col in plot_df.columns]

    plt.figure(figsize=(14, max(6, len(plot_df) * 0.7))) # Adjust height based on number of groups
    sns.heatmap(plot_df, annot=True, fmt=".1f", cmap="YlOrRd", linewidths=.5, cbar_kws={'label': 'Average Percentage (%)'})
    plt.title(f'Peptide Structural Properties Heatmap: {title}', fontsize=16)
    plt.xlabel('Property Type', fontsize=12)
    plt.ylabel('Peptide Group', fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plot_path = os.path.join(results_dir, f"{filename_prefix}_peptide_properties_heatmap.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"Saved heatmap: {plot_path}")


# --- Helper Function for Clustering DataFrame Rows ---
def cluster_dataframe_rows(df, metric='euclidean', method='ward'):
    """
    Clusters the rows of a DataFrame and returns the DataFrame reordered by the clustering.
    
    Args:
        df (pd.DataFrame): The DataFrame to cluster. Must not be empty.
        metric (str): Distance metric for pdist (e.g., 'euclidean', 'correlation').
        method (str): Linkage method for hierarchical clustering (e.g., 'ward', 'average').
    
    Returns:
        pd.DataFrame: The DataFrame with rows reordered based on clustering.
        list: The row order (indices) after clustering.
    """
    if df.empty:
        return df, []
    
    # If there's only one row, pdist will fail, so handle that case
    if len(df) == 1:
        return df, [0] # Already "clustered" with a single element

    # Calculate the distance matrix
    distance_matrix = pdist(df.values, metric=metric)
    
    # Perform hierarchical clustering
    Z = linkage(distance_matrix, method=method)
    
    # Get the order of indices from the dendrogram.
    dendrogram_leaves = dendrogram(Z, no_plot=True)['leaves']
    
    # Reorder the DataFrame rows
    clustered_df = df.iloc[dendrogram_leaves]
    
    return clustered_df, dendrogram_leaves


def plot_individual_peptide_heatmaps_aa(comprehensive_aa_df, results_dir):
    """
    Generates clustered heatmaps for individual peptide amino acid compositions,
    separated by 'Non-Significant', 'Upregulated', and 'Downregulated' groups,
    and displays them as subplots in one figure.
    """
    print("\nGenerating individual peptide amino acid composition heatmaps...")

    # Ensure log2FoldChange is numeric for filtering
    comprehensive_aa_df['log2FoldChange'] = pd.to_numeric(comprehensive_aa_df['log2FoldChange'], errors='coerce')

    # Filter peptides into groups, selecting only AA composition columns
    #non_sig_aa_df_raw = comprehensive_aa_df[comprehensive_aa_df['sig'] == 'No'][AMINO_ACIDS]
    up_aa_df_raw = comprehensive_aa_df[(comprehensive_aa_df['sig'] == 'Yes') & (comprehensive_aa_df['log2FoldChange'] > 0)][AMINO_ACIDS]
    down_aa_df_raw = comprehensive_aa_df[(comprehensive_aa_df['sig'] == 'Yes') & (comprehensive_aa_df['log2FoldChange'] < 0)][AMINO_ACIDS]

    # Cluster rows for each group
    # Ensure to only cluster if the dataframe is not empty
    #clustered_non_sig_aa_df, _ = cluster_dataframe_rows(non_sig_aa_df_raw) if not non_sig_aa_df_raw.empty else (non_sig_aa_df_raw, [])
    clustered_up_aa_df, _ = cluster_dataframe_rows(up_aa_df_raw) if not up_aa_df_raw.empty else (up_aa_df_raw, [])
    clustered_down_aa_df, _ = cluster_dataframe_rows(down_aa_df_raw) if not down_aa_df_raw.empty else (down_aa_df_raw, [])

    # Prepare dataframes and titles for subplots
    plot_data_frames = [clustered_up_aa_df, clustered_down_aa_df]
    plot_titles = ['Upregulated Significant Peptides', 'Downregulated Significant Peptides']

    #plot_data_frames = [clustered_non_sig_aa_df, clustered_up_aa_df, clustered_down_aa_df]
    #plot_titles = ['Non-Significant Peptides', 'Upregulated Significant Peptides', 'Downregulated Significant Peptides']
    
    # Filter out empty dataframes and their corresponding titles to avoid empty subplots
    valid_plots = [(df, title) for df, title in zip(plot_data_frames, plot_titles) if not df.empty]
    if not valid_plots:
        print("Skipping individual AA composition heatmaps: No valid data for any group after filtering.")
        return

    # Calculate height ratios for subplots based on number of peptides
    # A base height for a reasonable number of peptides to make plots visible
    base_row_height_aa = 0.012 # Adjust this value if the plots are too squished or too tall

    subplot_heights = [max(df.shape[0] * base_row_height_aa, 2) for df, _ in valid_plots] # Min height of 2 inches
    total_figure_height = sum(subplot_heights) + 2 # Add padding for main title, etc.

    # Create figure and subplots
    fig, axes = plt.subplots(len(valid_plots), 1, figsize=(18, total_figure_height),
                             gridspec_kw={'height_ratios': subplot_heights},
                             sharex=True)
    
    # If there's only one subplot, axes might not be an array, so make it one
    if len(valid_plots) == 1:
        axes = [axes]

    # Create a single colorbar for the entire figure
    # We determine the max value across all AA composition data for consistent color scale
    max_aa_value = comprehensive_aa_df[AMINO_ACIDS].max().max()
    norm = plt.Normalize(vmin=0, vmax=max(20, max_aa_value)) # Max 20% seems reasonable for AAs
    cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7]) # [left, bottom, width, height] for colorbar
    plt.colorbar(plt.cm.ScalarMappable(norm=norm, cmap="YlGnBu"), cax=cbar_ax, label='Amino Acid Percentage (%)')
    
    for i, (df, title) in enumerate(valid_plots):
        ax = axes[i]
        sns.heatmap(df, ax=ax, cmap="YlGnBu", cbar=False, # We use the shared colorbar
                    yticklabels=False, # Hide individual peptide IDs (too many)
                    xticklabels=True if i == len(valid_plots) - 1 else False, # Only show x-labels on bottom plot
                    linewidths=0.0) # No lines between cells for very large plots
        
        ax.set_ylabel('') # Remove default y-label
        ax.set_title(title, fontsize=14, pad=10) # Title for each subplot
        if i == len(valid_plots) - 1:
            ax.set_xlabel('Amino Acid', fontsize=14)
            plt.setp(ax.get_xticklabels(), rotation=45, ha='right') # Rotate for readability
        else:
            ax.set_xlabel('') # Remove x-label for upper plots

    plt.suptitle('Individual Peptide Amino Acid Composition by Group (Clustered)', fontsize=24, y=0.96) # Main title
    plt.tight_layout(rect=[0, 0, 0.9, 0.96]) # Adjust layout to make space for suptitle and colorbar
    
    plot_path = os.path.join(results_dir, "individual_peptide_aa_composition_clustered_heatmap.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"Saved individual peptide AA composition heatmap: {plot_path}")


def plot_individual_peptide_heatmaps_properties(comprehensive_properties_df, results_dir):
    """
    Generates clustered heatmaps for individual peptide structural properties,
    separated by 'Non-Significant', 'Upregulated', and 'Downregulated' groups,
    and displays them as subplots in one figure.
    """
    print("\nGenerating individual peptide structural properties heatmaps...")

    # Ensure log2FoldChange is numeric for filtering
    comprehensive_properties_df['log2FoldChange'] = pd.to_numeric(comprehensive_properties_df['log2FoldChange'], errors='coerce')

    # Select the columns for properties (e.g., 'Disorder_perc')
    property_cols = [f'{prop}_perc' for prop in PEPTIDE_PROPERTY_TYPES]
    
    # Filter peptides into groups, selecting only property columns
    #non_sig_prop_df_raw = comprehensive_properties_df[comprehensive_properties_df['sig'] == 'No'][property_cols]
    up_prop_df_raw = comprehensive_properties_df[(comprehensive_properties_df['sig'] == 'Yes') & (comprehensive_properties_df['log2FoldChange'] > 0)][property_cols]
    down_prop_df_raw = comprehensive_properties_df[(comprehensive_properties_df['sig'] == 'Yes') & (comprehensive_properties_df['log2FoldChange'] < 0)][property_cols]

    # Cluster rows for each group
    #clustered_non_sig_prop_df, _ = cluster_dataframe_rows(non_sig_prop_df_raw) if not non_sig_prop_df_raw.empty else (non_sig_prop_df_raw, [])
    clustered_up_prop_df, _ = cluster_dataframe_rows(up_prop_df_raw) if not up_prop_df_raw.empty else (up_prop_df_raw, [])
    clustered_down_prop_df, _ = cluster_dataframe_rows(down_prop_df_raw) if not down_prop_df_raw.empty else (down_prop_df_raw, [])

    # Prepare dataframes and titles for subplots
    plot_data_frames = [clustered_up_prop_df, clustered_down_prop_df]
    plot_titles = ['Upregulated Significant Peptides', 'Downregulated Significant Peptides']
    
    #plot_data_frames = [clustered_non_sig_prop_df, clustered_up_prop_df, clustered_down_prop_df]
    #plot_titles = ['Non-Significant Peptides', 'Upregulated Significant Peptides', 'Downregulated Significant Peptides']
    
    # Filter out empty dataframes and their corresponding titles
    valid_plots = [(df, title) for df, title in zip(plot_data_frames, plot_titles) if not df.empty]
    if not valid_plots:
        print("Skipping individual structural properties heatmaps: No valid data for any group after filtering.")
        return

    # Calculate height ratios for subplots based on number of peptides
    base_row_height_prop = 0.02 # Adjust this value as needed
    
    subplot_heights = [max(df.shape[0] * base_row_height_prop, 2) for df, _ in valid_plots] # Min height of 2 inches
    total_figure_height = sum(subplot_heights) + 2

    fig, axes = plt.subplots(len(valid_plots), 1, figsize=(16, total_figure_height),
                             gridspec_kw={'height_ratios': subplot_heights},
                             sharex=True)
    
    if len(valid_plots) == 1:
        axes = [axes]

    # Create a single colorbar for the entire figure
    max_prop_value = comprehensive_properties_df[property_cols].max().max()
    norm = plt.Normalize(vmin=0, vmax=max(100, max_prop_value)) # Properties can go up to 100%
    cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7]) # [left, bottom, width, height] for colorbar
    plt.colorbar(plt.cm.ScalarMappable(norm=norm, cmap="YlOrRd"), cax=cbar_ax, label='Average Percentage (%)')

    # Rename x-tick labels for clarity (e.g., 'Disorder_perc' -> 'Disorder')
    clean_xticklabels = [col.replace('_perc', '') for col in property_cols]

    for i, (df, title) in enumerate(valid_plots):
        ax = axes[i]
        sns.heatmap(df, ax=ax, cmap="YlOrRd", cbar=False,
                    yticklabels=False,
                    xticklabels=clean_xticklabels if i == len(valid_plots) - 1 else False,
                    linewidths=0.0) # No lines between cells
        
        ax.set_ylabel('')
        ax.set_title(title, fontsize=14, pad=10)
        if i == len(valid_plots) - 1:
            ax.set_xlabel('Peptide Property', fontsize=14)
            plt.setp(ax.get_xticklabels(), rotation=45, ha='right') # Rotate for readability
        else:
            ax.set_xlabel('')

    plt.suptitle('Individual Peptide Structural Properties by Group (Clustered)', fontsize=24, y=0.96) # Main title
    plt.tight_layout(rect=[0, 0, 0.9, 0.96])
    
    plot_path = os.path.join(results_dir, "individual_peptide_properties_clustered_heatmap.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"Saved individual peptide structural properties heatmap: {plot_path}")



# --- Load Pre-calculated Data ---
print("\nLoading pre-calculated summary data...")

# Amino Acid Composition Data
aa_comp_summary_path = os.path.join(RESULTS_DIR, "amino_acid_composition_summary.csv")
all_compositions = pd.read_csv(aa_comp_summary_path, index_col=0) # Amino acids are the index

# Peptide Properties Data
prop_summary_path = os.path.join(RESULTS_DIR, "peptide_properties_summary_with_RSA.csv")
all_peptide_properties_avg = pd.read_csv(prop_summary_path, index_col=0) # Properties are the index

print("Data loaded successfully.")

# --- Prepare Data for Plotting Functions ---

# For plot_composition and plot_average_properties (bar plots),
# we need to convert the DataFrame columns back into a dictionary of Series.
composition_series_dict = {col: all_compositions[col] for col in all_compositions.columns}
average_properties_series = {col: all_peptide_properties_avg[col] for col in all_peptide_properties_avg.columns}


# --- Plotting the Compositions (Bar Plots - Existing Style) ---
print("\nGenerating Amino Acid Composition Bar Plots...")

plot_composition(
    {'Full Library (VT/VP)': composition_series_dict['Full_Library_VT_VP'],
     'Experiment Used (VT/VP)': composition_series_dict['Experiment_Used_VT_VP']},
    'Full Library vs. Experiment Used Peptides (VT/VP)',
    'full_vs_used',
    RESULTS_DIR
)
plot_composition(
    {'Experiment Used (VT/VP)': composition_series_dict['Experiment_Used_VT_VP'],
     'Experiment Not Used (VT/VP)': composition_series_dict['Experiment_Not_Used_VT_VP']},
    'Experiment Used vs. Not Used Peptides (VT/VP)',
    'used_vs_not_used',
    RESULTS_DIR
)
plot_composition(
    {'Significant (VT/VP)': composition_series_dict['Experiment_Significant_VT_VP'],
     'Non-Significant (VT/VP)': composition_series_dict['Experiment_NonSignificant_VT_VP']},
    'Experiment Significant vs. Non-Significant Peptides (VT/VP)',
    'significant_vs_nonsignificant',
    RESULTS_DIR
)
plot_composition(
    {'Upregulated Significant (VT/VP)': composition_series_dict['Experiment_Upregulated_VT_VP'],
     'Downregulated Significant (VT/VP)': composition_series_dict['Experiment_Downregulated_VT_VP']},
    'Upregulated vs. Downregulated Significant Peptides (VT/VP)',
    'upregulated_vs_downregulated',
    RESULTS_DIR
)
plot_composition(
    {'Full Library (VT/VP)': composition_series_dict['Full_Library_VT_VP'],
    'Significant (VT/VP)': composition_series_dict['Experiment_Significant_VT_VP'],
    'Non-Significant (VT/VP)': composition_series_dict['Experiment_NonSignificant_VT_VP'],
    'Upregulated Significant (VT/VP)': composition_series_dict['Experiment_Upregulated_VT_VP'],
    'Downregulated Significant (VT/VP)': composition_series_dict['Experiment_Downregulated_VT_VP']},
    'Comparison of Peptides (VT/VP)',
    'comparison_peptide_amino_acid_comparison',
    RESULTS_DIR
)


# --- Plotting the Peptide Properties (Bar Plots - Existing Style) ---
print("\nGenerating Peptide Structural Properties Bar Plots...")

plot_average_properties(
    {'Full Library (VT/VP)': average_properties_series['Full_Library_VT_VP'],
     'Experiment Used (VT/VP)': average_properties_series['Experiment_Used_VT_VP']},
    'Full Library vs. Experiment Used Peptides (VT/VP)',
    'full_vs_used_properties_with_RSA',
    RESULTS_DIR
)
plot_average_properties(
    {'Experiment Used (VT/VP)': average_properties_series['Experiment_Used_VT_VP'],
     'Experiment Not Used (VT/VP)': average_properties_series['Experiment_Not_Used_VT_VP']},
    'Experiment Used vs. Not Used Peptides (VT/VP)',
    'used_vs_not_used_properties_with_RSA',
    RESULTS_DIR
)
plot_average_properties(
    {'Significant (VT/VP)': average_properties_series['Experiment_Significant_VT_VP'],
     'Non-Significant (VT/VP)': average_properties_series['Experiment_NonSignificant_VT_VP']},
    'Experiment Significant vs. Non-Significant Peptides (VT/VP)',
    'significant_vs_nonsignificant_properties_with_RSA',
    RESULTS_DIR
)
plot_average_properties(
    {'Upregulated Significant (VT/VP)': average_properties_series['Experiment_Upregulated_VT_VP'],
     'Downregulated Significant (VT/VP)': average_properties_series['Experiment_Downregulated_VT_VP']},
    'Upregulated vs. Downregulated Significant Peptides (VT/VP)',
    'upregulated_vs_downregulated_properties_with_RSA',
    RESULTS_DIR
)
plot_average_properties(
    {'Full Library (VT/VP)': average_properties_series['Full_Library_VT_VP'],
     'Significant (VT/VP)': average_properties_series['Experiment_Significant_VT_VP'],
     'Non-Significant (VT/VP)': average_properties_series['Experiment_NonSignificant_VT_VP'],
     'Upregulated Significant (VT/VP)': average_properties_series['Experiment_Upregulated_VT_VP'],
     'Downregulated Significant (VT/VP)': average_properties_series['Experiment_Downregulated_VT_VP']},
    'Comparison of Peptides (VT/VP)',
    'comparison_peptide_properties',
    RESULTS_DIR
)

# --- Generate Heatmaps (New Visualization Style) ---
print("\nGenerating Heatmap Visualizations...")

# Heatmap for Amino Acid Composition
plot_composition_heatmap(
    all_compositions,
    'All Peptide Groups (VT/VP)',
    'all_groups',
    RESULTS_DIR
)

# Heatmap for Peptide Structural Properties
plot_properties_heatmap(
    all_peptide_properties_avg,
    'All Peptide Groups (VT/VP)',
    'all_groups_structural',
    RESULTS_DIR
)



# Get the detailed plots for individual peptides
print("\nLoading comprehensive peptide data for individual heatmaps...")

# Load comprehensive amino acid composition data
comprehensive_aa_table_path = os.path.join(RESULTS_DIR, "comprehensive_peptide_amino_acid_composition_and_metadata.csv")
if os.path.exists(comprehensive_aa_table_path):
    comprehensive_peptide_table_aa = pd.read_csv(comprehensive_aa_table_path, index_col='identifier')
    print(f"Loaded amino acid data: {comprehensive_peptide_table_aa.shape[0]} peptides.")
else:
    print(f"Error: Amino acid comprehensive data not found at {comprehensive_aa_table_path}. Skipping AA heatmaps.")
    comprehensive_peptide_table_aa = pd.DataFrame() # Create empty to avoid errors

# Load comprehensive peptide structural properties data
comprehensive_prop_table_path = os.path.join(RESULTS_DIR, "comprehensive_peptide_properties_and_metadata_with_RSA.csv")
if os.path.exists(comprehensive_prop_table_path):
    comprehensive_peptide_table_prop = pd.read_csv(comprehensive_prop_table_path, index_col='identifier')
    print(f"Loaded structural properties data: {comprehensive_peptide_table_prop.shape[0]} peptides.")
else:
    print(f"Error: Structural properties comprehensive data not found at {comprehensive_prop_table_path}. Skipping properties heatmaps.")
    comprehensive_peptide_table_prop = pd.DataFrame() # Create empty to avoid errors

print("Comprehensive data loaded successfully. Generating individual heatmaps...")

# Call the new plotting functions only if data was loaded
if not comprehensive_peptide_table_aa.empty:
    plot_individual_peptide_heatmaps_aa(comprehensive_peptide_table_aa, RESULTS_DIR)

if not comprehensive_peptide_table_prop.empty:
    plot_individual_peptide_heatmaps_properties(comprehensive_peptide_table_prop, RESULTS_DIR)

print("\nIndividual peptide heatmap generation complete.")



print("\nAll Plotting Complete! Check your results directory for the generated images.")


Loading pre-calculated summary data...
Data loaded successfully.

Generating Amino Acid Composition Bar Plots...



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Amino Acid', y='Percentage', hue='Group', palette='viridis', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/full_vs_used_amino_acid_composition.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Amino Acid', y='Percentage', hue='Group', palette='viridis', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/used_vs_not_used_amino_acid_composition.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Amino Acid', y='Percentage', hue='Group', palette='viridis', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/significant_vs_nonsignificant_amino_acid_composition.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Amino Acid', y='Percentage', hue='Group', palette='viridis', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/upregulated_vs_downregulated_amino_acid_composition.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Amino Acid', y='Percentage', hue='Group', palette='viridis', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/comparison_peptide_amino_acid_comparison_amino_acid_composition.png

Generating Peptide Structural Properties Bar Plots...



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Property', y='Percentage', hue='Group', palette='Spectral', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/full_vs_used_properties_with_RSA_peptide_properties.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Property', y='Percentage', hue='Group', palette='Spectral', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/used_vs_not_used_properties_with_RSA_peptide_properties.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Property', y='Percentage', hue='Group', palette='Spectral', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/significant_vs_nonsignificant_properties_with_RSA_peptide_properties.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Property', y='Percentage', hue='Group', palette='Spectral', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/upregulated_vs_downregulated_properties_with_RSA_peptide_properties.png



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=plot_df, x='Property', y='Percentage', hue='Group', palette='Spectral', ci=None)


Saved plot: /projectnb/cancergrp/Philipp/results/RITA_peptides/comparison_peptide_properties_peptide_properties.png

Generating Heatmap Visualizations...
Saved heatmap: /projectnb/cancergrp/Philipp/results/RITA_peptides/all_groups_amino_acid_composition_heatmap.png
Saved heatmap: /projectnb/cancergrp/Philipp/results/RITA_peptides/all_groups_structural_peptide_properties_heatmap.png

Loading comprehensive peptide data for individual heatmaps...
Loaded amino acid data: 28112 peptides.
Loaded structural properties data: 28112 peptides.
Comprehensive data loaded successfully. Generating individual heatmaps...

Generating individual peptide amino acid composition heatmaps...


  plt.tight_layout(rect=[0, 0, 0.9, 0.96]) # Adjust layout to make space for suptitle and colorbar


Saved individual peptide AA composition heatmap: /projectnb/cancergrp/Philipp/results/RITA_peptides/individual_peptide_aa_composition_clustered_heatmap.png

Generating individual peptide structural properties heatmaps...


  plt.tight_layout(rect=[0, 0, 0.9, 0.96])


Saved individual peptide structural properties heatmap: /projectnb/cancergrp/Philipp/results/RITA_peptides/individual_peptide_properties_clustered_heatmap.png

Individual peptide heatmap generation complete.

All Plotting Complete! Check your results directory for the generated images.
