## Environment
Using Kernel: `dan-dev-py312-r433`

-----

In [1]:
import sys
import os

# ##### SET SYS PATH TO WHERE THE SOURCE CODE IS. #####
# Note: This is not required if you are using the pip installed package
wormcat_dir = "/Users/dan/Code/Python/wormcat3"
sys.path.insert(0, wormcat_dir)

print("Working directory:", wormcat_dir)

Working directory: /Users/dan/Code/Python/wormcat3


In [2]:
# What Annotations are available by default?

import wormcat3
from wormcat3 import AnnotationsManager
print(f"Wormcat3 {wormcat3.__version__}")
AnnotationsManager.available_annotation_files()

Wormcat3 0.1.8


['ORF_only_v2_nov-11-2021.csv',
 'ahringer_v2_nov-11-2021.csv',
 'orfeome_v2_nov-11-2021.csv',
 'whole_genome_v2_nov-11-2021.csv']

In [3]:
# What PAdjustMethod(s) are available?

from wormcat3 import PAdjustMethod
print(list(PAdjustMethod))

[<PAdjustMethod.BONFERRONI: 'bonferroni'>, <PAdjustMethod.FDR: 'fdr_bh'>]


-----


In [15]:
from pathlib import Path
from wormcat3 import Wormcat, WormcatError, PAdjustMethod
from wormcat3 import constants as cs

def format_filename(filename: str) -> str:
    # Step 1: Drop `.xlsx` and remove underscores
    base = filename.removesuffix('.xlsx').replace('_', '')
    
    # Step 2: Break into words (based on original underscores)
    words = filename.removesuffix('.xlsx').split('_')
    
    # Step 3: Capitalize first word fully, others title-case
    words = [words[0].upper()] + [w.capitalize() for w in words[1:]]
    
    # Step 4: Drop every other word (keep 0, 2, 4, ...)
    filtered_words = words[::2]
    
    # Step 5: Join with a single space
    return ' '.join(filtered_words)

def list_excel_files(dir_path: str | Path) -> list[str]:
    dir_path = Path(dir_path)
    return [file.name for file in dir_path.iterdir() if file.is_file() and file.suffix == '.xlsx']

def run_batch(file_nm, input_data_path, output_analysis_path, annotation_nm, annotation_file):
    title = f"{format_filename(file_nm)} {annotation_nm}"
    wormcat = Wormcat(working_dir_path = output_analysis_path,
                  title = title, 
                  annotation_file_name = annotation_file)

    wormcat.wormcat_batch(f"{input_data_path}/{file_nm}", 
                      p_adjust_method = PAdjustMethod.BONFERRONI, 
                      p_adjust_threshold = 0.05,
                      gene_type = cs.GENE_TYPE_WORMBASE_ID)
    

In [16]:
derived_data_path = Path("../derived_data/wang_lab")
input_data_path = derived_data_path / "500"
output_analysis_path = "../analysis/wang_lab/500"

annotation_nm = "Whole Genome"
annotation_file = "whole_genome_v2_nov-11-2021.csv"
files = list_excel_files(input_data_path)
for file_nm in files:
    run_batch(file_nm, input_data_path, output_analysis_path, annotation_nm, annotation_file)

Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/Intestine_68395
Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/Neuron_81937
Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/singleton_74072
Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/Germline_45288
Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/Coelomocyte_12828
Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/ARSC_15930
Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/Vulva_uterus_10288
Analysis complete. Output can be found at ../analysis/wang_lab/500/RSKS1D1_Expression_500_Whole_Genome_67063/Gonadal_sheath_cell_

In [17]:
derived_data_path = Path("../derived_data/wang_lab")
input_data_path = derived_data_path / "1000"
output_analysis_path = "../analysis/wang_lab/1000"

annotation_nm = "Whole Genome"
annotation_file = "whole_genome_v2_nov-11-2021.csv"
files = list_excel_files(input_data_path)
for file_nm in files:
    run_batch(file_nm, input_data_path, output_analysis_path, annotation_nm, annotation_file)



Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/Intestine_00704
Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/Neuron_57341
Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/singleton_19039
Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/Germline_59909
Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/Coelomocyte_74967
Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/ARSC_10426
Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/Vulva_uterus_87453
Analysis complete. Output can be found at ../analysis/wang_lab/1000/DAF2D1_Expression_1000_Whole_Genome_10028/Gonadal_shea