In [2]:
'''Enrichment seperately analysis'''
from gprofiler import GProfiler
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

In [5]:

# Initialize g:Profiler
gp = GProfiler(return_dataframe=True)

# Define some analysis parameters
significance_threshold = 0.01  # Significance threshold for filtering significant proteins
fold_change_threshold = 1      # Fold-change threshold for upregulated/downregulated proteins
organism = 'mmusculus'          # Organism for enrichment analysis
top_n = 10                    # Number of top enriched terms to plot


# Path to the uploaded Excel file
file_path = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/R_InputHandled_VolcanoPlots.xlsx'

# Load the entire Excel file (all sheets)
excel_data = pd.read_excel(file_path, sheet_name=None)

# Iterate over each sheet in the Excel file
for sheet_name, results_df_filtered in excel_data.items():
    # Extract group names from the sheet name (assuming sheet names are formatted as group1_vs_group2)
    group1, group2 = sheet_name.split('_vs_')

    # # Filter for upregulated proteins
    # upregulated_proteins_df = results_df_filtered[
    #     (results_df_filtered['p_adj'] < significance_threshold) &
    #     (results_df_filtered['log2_fold_change'] > fold_change_threshold)
    # ]
    # 
    # # Filter for downregulated proteins
    # downregulated_proteins_df = results_df_filtered[
    #     (results_df_filtered['p_adj'] < significance_threshold) &
    #     (results_df_filtered['log2_fold_change'] < -fold_change_threshold)
    # ]
    # 
        # Filter for upregulated proteins
    upregulated_proteins_df = results_df_filtered[
        (results_df_filtered['p_adj'] < significance_threshold) 
    ]

    # Filter for downregulated proteins
    downregulated_proteins_df = results_df_filtered[
        (results_df_filtered['p_adj'] < significance_threshold) 
    ]

    # Enrichment analysis for upregulated proteins
    if not upregulated_proteins_df.empty:
        enriched_terms_upregulated = gp.profile(organism=organism, sources='KEGG', query=upregulated_proteins_df['Protein'].tolist())

        if not enriched_terms_upregulated.empty:
            output_file_up = f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/enrichment_analysis_results_{group1}_vs_{group2}_u.xlsx'
            with pd.ExcelWriter(output_file_up, engine='xlsxwriter') as writer:
                upregulated_proteins_df.to_excel(writer, sheet_name=f'{group1}_{group2}_u_s', index=False)
                enriched_terms_upregulated.to_excel(writer, sheet_name=f'{group1}_{group2}_u_e', index=False)

            top_enriched_upregulated = enriched_terms_upregulated.head(top_n)
            plt.figure(figsize=(20, 15))
            sorted_terms_upregulated = top_enriched_upregulated.sort_values(by='p_value', ascending=True)
            sorted_terms_upregulated['-log10(p_value)'] = -np.log10(sorted_terms_upregulated['p_value'])
            sns.barplot(x='-log10(p_value)', y='name', data=sorted_terms_upregulated, palette='viridis')
            plt.xlabel('-log10(p-value)', fontsize=14)
            plt.ylabel('Enriched Terms', fontsize=14)
            plt.title(f'Top {top_n} Enriched Terms (Upregulated) for {group1} vs {group2}', fontsize=15)
            plt.tight_layout()
            plot_filename_up = f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/enriched_terms_{group1}_vs_{group2}_u.png'
            plt.savefig(plot_filename_up, dpi=300)
            plt.show()
            print(f"Enrichment analysis for upregulated proteins complete for {group1} vs {group2}. Plot saved as {plot_filename_up}")

    # Enrichment analysis for downregulated proteins
    if not downregulated_proteins_df.empty:
        enriched_terms_downregulated = gp.profile(organism=organism, query=downregulated_proteins_df['Protein'].tolist())

        if not enriched_terms_downregulated.empty:
            output_file_down = f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/enrichment_analysis_results_{group1}_vs_{group2}_d.xlsx'
            with pd.ExcelWriter(output_file_down, engine='xlsxwriter') as writer:
                downregulated_proteins_df.to_excel(writer, sheet_name=f'{group1}_{group2}_d_s', index=False)
                enriched_terms_downregulated.to_excel(writer, sheet_name=f'{group1}_{group2}_d_e', index=False)

            top_enriched_downregulated = enriched_terms_downregulated.head(top_n)
            plt.figure(figsize=(20, 15))
            sorted_terms_downregulated = top_enriched_downregulated.sort_values(by='p_value', ascending=True)
            sorted_terms_downregulated['-log10(p_value)'] = -np.log10(sorted_terms_downregulated['p_value'])
            sns.barplot(x='-log10(p_value)', y='name', data=sorted_terms_downregulated, palette='viridis')
            plt.xlabel('-log10(p-value)', fontsize=14)
            plt.ylabel('Enriched Terms', fontsize=14)
            plt.title(f'Top {top_n} Enriched Terms (Downregulated) for {group1} vs {group2}', fontsize=15)
            plt.tight_layout()
            plot_filename_down = f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/enriched_terms_{group1}_vs_{group2}_d.png'
            plt.savefig(plot_filename_down, dpi=300)
            plt.show()
            print(f"Enrichment analysis for downregulated proteins complete for {group1} vs {group2}. Plot saved as {plot_filename_down}")

TypeError: GProfiler.profile() got an unexpected keyword argument 'source'

In [5]:
import pandas as pd
import os

# Specify the directory where the Excel files are stored
folder_path = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/EnrichOut/only_enrich'

# Specify the output file path
output_file = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/EnrichOut/comb_enrich_out.xlsx'

# # Create a writer to combine sheets into one workbook
# with pd.ExcelWriter(output_file) as writer:
#     # Loop through each file in the specified directory
#     for filename in os.listdir(folder_path):
#         if filename.endswith('.xlsx') or filename.endswith('.xls'):
#             file_path = os.path.join(folder_path, filename)
#             # Read the Excel file into a DataFrame
#             df = pd.read_excel(file_path)
#             # Add the DataFrame to a new sheet in the output file
#             sheet_name = os.path.splitext(filename)[0]  # Use filename (without extension) as sheet name
#             df.to_excel(writer, sheet_name=sheet_name, index=False)
# 
# print(f'Combined Excel document saved as {output_file}')

# Initialize the ExcelWriter
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    # Loop through each file in the specified directory
    for filename in os.listdir(folder_path):
        # Check for Excel file extensions
        if filename.endswith('.xlsx') or filename.endswith('.xls'):
            file_path = os.path.join(folder_path, filename)
            try:
                # Read the Excel file into a DataFrame
                df = pd.read_excel(file_path)
                
                # Get the base filename without extension for the sheet name
                sheet_name = os.path.splitext(filename)[0]
                
                # Write the DataFrame to a new sheet in the output file
                df.to_excel(writer, sheet_name=sheet_name[:31], index=False)  # Limit to 31 characters for Excel compatibility
                
                print(f"Added {filename} as sheet '{sheet_name[:31]}'")
            except Exception as e:
                print(f"Failed to process {filename}: {e}")

print(f'Combined Excel document saved as {output_file}')


Added enrich_2DG_vs_Con_d.xlsx as sheet 'enrich_2DG_vs_Con_d'
Added enrich_2DG_vs_Con_u.xlsx as sheet 'enrich_2DG_vs_Con_u'
Added enrich_2DG_vs_SDT_d.xlsx as sheet 'enrich_2DG_vs_SDT_d'
Added enrich_2DG_vs_SDT_u.xlsx as sheet 'enrich_2DG_vs_SDT_u'
Added enrich_SDT_vs_Con_d.xlsx as sheet 'enrich_SDT_vs_Con_d'
Added enrich_SDT_vs_Con_u.xlsx as sheet 'enrich_SDT_vs_Con_u'
Combined Excel document saved as /media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/NetAppSaver/EnrichOut/comb_enrich_out.xlsx
