In [16]:
import os
import pandas as pd

# based on Top 15

In [17]:
def find_csv_files(base_path):
    """
    Find all subfolders that contain both 'mummichog_pathway_enrichment.csv' and 'enrichment.csv'.
    """
    matching_folders = []
    for root, dirs, files in os.walk(base_path):
        if 'mummichog_pathway_enrichment.csv' in files and 'enrichment.csv' in files:
            matching_folders.append(root)
    return matching_folders

def process_csv_files(folder_path):
    """
    Process both CSV files to extract lists and compare common elements.
    """
    # Paths to the CSV files
    mummichog_path = os.path.join(folder_path, 'mummichog_pathway_enrichment.csv')
    enrichment_path = os.path.join(folder_path, 'enrichment.csv')

    # Read top 15 rows from both files
    mummichog_df = pd.read_csv(mummichog_path).head(15)
    enrichment_df = pd.read_csv(enrichment_path).head(15)

    # Extract the required columns (first column from mummichog and second column from enrichment)
    mummichog_list = mummichog_df.iloc[:, 0].tolist()  # First column
    enrichment_list = enrichment_df.iloc[:, 1].tolist()  # Second column

    # Find common elements
    common_elements = set(mummichog_list).intersection(set(enrichment_list))

    return mummichog_list, enrichment_list, common_elements

def main(base_path):
    """
    Main function to find subfolders and compare CSV files.
    """
    matching_folders = find_csv_files(base_path)

    for folder in matching_folders:
        mummichog_list, enrichment_list, common_elements = process_csv_files(folder)

        # Display the results
#         print(f"\nFolder: {folder}")
#         print(f"Mummichog List: {mummichog_list}")
#         print(f"Enrichment List: {enrichment_list}")
#         print(f"Common Elements: {common_elements}")
#         print(f"Number of Common Elements: {len(common_elements)}")
#         print(len(common_elements)/15)

In [18]:
if __name__ == "__main__":
    base_path = '/Users/bowen/Desktop/multi_omics_analysis/2408_pea 2/scripts/240924_Enrichment_Result'
    main(base_path)

# based on P value

In [21]:
import os
import pandas as pd

def find_csv_files(base_path):
    """
    Find all subfolders that contain both 'mummichog_pathway_enrichment.csv' and 'enrichment.csv'.
    """
    matching_folders = []
    for root, dirs, files in os.walk(base_path):
        if 'mummichog_pathway_enrichment.csv' in files and 'enrichment.csv' in files:
            matching_folders.append(root)
    return matching_folders

def process_csv_files(folder_path):
    """
    Process both CSV files to extract lists and compare common elements with P(Gamma) < 0.05.
    """
    # Paths to the CSV files
    mummichog_path = os.path.join(folder_path, 'mummichog_pathway_enrichment.csv')
    enrichment_path = os.path.join(folder_path, 'enrichment.csv')

    # Read CSV files
    mummichog_df = pd.read_csv(mummichog_path)
    enrichment_df = pd.read_csv(enrichment_path)

    # Filter rows where P(Gamma) < 0.05 for mummichog
    filtered_mummichog_df = mummichog_df[mummichog_df['P(Gamma)'] < 0.05]
    

    # Ensure the second column of enrichment is numeric and drop invalid rows
    #     enrichment_df.iloc[:, 1] = pd.to_numeric(enrichment_df.iloc[:, 1], errors='coerce')
    filtered_enrichment_df = enrichment_df[enrichment_df['P value'] < 0.05].dropna()

    # Extract the required columns (first column from mummichog and first column from enrichment)
    mummichog_list = filtered_mummichog_df.iloc[:, 0].tolist()  # First column of filtered mummichog
    enrichment_list = filtered_enrichment_df.iloc[:, 1].tolist()  # First column of filtered enrichment

    # Find common elements
    common_elements = set(mummichog_list).intersection(set(enrichment_list))

    # Calculate percentage of common elements in filtered enrichment
    percentage_common = (len(common_elements) / len(enrichment_list)) * 100 if len(enrichment_list) > 0 else 0

    return mummichog_list, enrichment_list, common_elements, percentage_common

def main(base_path):
    """
    Main function to find subfolders and compare CSV files.
    """
    matching_folders = find_csv_files(base_path)

    for folder in matching_folders:
        mummichog_list, enrichment_list, common_elements, percentage_common = process_csv_files(folder)

        # Display the results
        print(f"\nGroup: {folder}")
        print(f"Number of Mummichog: {len(mummichog_list)}")
        print(f"Number of Enrichment: {len(enrichment_list)}")
#         print(f"Common Elements: {common_elements}")
        
        print(f"Number of Common Elements: {len(common_elements)}")
        print(f"Percentage of Common Elements in Enrichment (p < 0.05): {percentage_common:.2f}%")
#         print()


base_path = '/Users/bowen/Desktop/multi_omics_analysis/2408_pea 2/scripts/240924_Enrichment_Result'
main(base_path)


Group: /Users/bowen/Desktop/multi_omics_analysis/2408_pea 2/scripts/240924_Enrichment_Result/sMEn_vs_sME/sMEnD1_vs_sMED1
Number of Mummichog: 19
Number of Enrichment: 15
Number of Common Elements: 8
Percentage of Common Elements in Enrichment (p < 0.05): 53.33%

Group: /Users/bowen/Desktop/multi_omics_analysis/2408_pea 2/scripts/240924_Enrichment_Result/sMEn_vs_sME/sMEnD2_vs_sMED2
Number of Mummichog: 16
Number of Enrichment: 18
Number of Common Elements: 9
Percentage of Common Elements in Enrichment (p < 0.05): 50.00%

Group: /Users/bowen/Desktop/multi_omics_analysis/2408_pea 2/scripts/240924_Enrichment_Result/sMEn_vs_sME/sMEnD3_vs_sMED3
Number of Mummichog: 18
Number of Enrichment: 10
Number of Common Elements: 4
Percentage of Common Elements in Enrichment (p < 0.05): 40.00%

Group: /Users/bowen/Desktop/multi_omics_analysis/2408_pea 2/scripts/240924_Enrichment_Result/sMEn_vs_sME/sMEnD4_vs_sMED4
Number of Mummichog: 19
Number of Enrichment: 10
Number of Common Elements: 4
Percentage 