### Conda Environment

Using Kernel: `dan-dev-py312-r433`

-----

In [1]:
# What Annotations are available by default?

import wormcat3
from wormcat3 import AnnotationsManager
print(f"Wormcat3 {wormcat3.__version__}")
AnnotationsManager.available_annotation_files()

Wormcat3 0.1.8


['ORF_only_v2_nov-11-2021.csv',
 'ahringer_v2_nov-11-2021.csv',
 'orfeome_v2_nov-11-2021.csv',
 'whole_genome_v2_nov-11-2021.csv']

In [None]:
# What PAdjustMethod(s) are available?

from wormcat3 import PAdjustMethod
print(list(PAdjustMethod))

-----


In [None]:
from pathlib import Path
from wormcat3 import Wormcat, WormcatError, PAdjustMethod
from wormcat3 import constants as cs

def format_filename(filename: str) -> str:
    # Step 1: Drop `.xlsx` and remove underscores
    base = filename.removesuffix('.xlsx').replace('_', '')
    
    # Step 2: Break into words (based on original underscores)
    words = filename.removesuffix('.xlsx').split('_')
    
    # Step 3: Capitalize first word fully, others title-case
    words = [words[0].upper()] + [w.capitalize() for w in words[1:]]
    
    # Step 4: Drop every other word (keep 0, 2, 4, ...)
    filtered_words = words[::2]
    
    # Step 5: Join with a single space
    return ' '.join(filtered_words)

def list_excel_files(dir_path: str | Path) -> list[str]:
    dir_path = Path(dir_path)
    return [file.name for file in dir_path.iterdir() if file.is_file() and file.suffix == '.xlsx']

def run_batch(file_nm, input_data_path, output_analysis_path, annotation_nm, annotation_file):
    title = f"{format_filename(file_nm)} {annotation_nm}"
    wormcat = Wormcat(working_dir_path = output_analysis_path,
                  title = title, 
                  annotation_file_name = annotation_file)

    wormcat.wormcat_batch(f"{input_data_path}/{file_nm}", 
                      p_adjust_method = PAdjustMethod.BONFERRONI, 
                      p_adjust_threshold = 0.05,
                      gene_type = cs.GENE_TYPE_WORMBASE_ID)
    

In [None]:
derived_data_path = Path("../derived_data/wang_lab")
input_data_path = derived_data_path / "500"
output_analysis_path = "../analysis/wang_lab/500"

annotation_nm = "Whole Genome"
annotation_file = "whole_genome_v2_nov-11-2021.csv"
files = list_excel_files(input_data_path)
for file_nm in files:
    run_batch(file_nm, input_data_path, output_analysis_path, annotation_nm, annotation_file)

In [None]:
derived_data_path = Path("../derived_data/wang_lab")
input_data_path = derived_data_path / "1000"
output_analysis_path = "../analysis/wang_lab/1000"

annotation_nm = "Whole Genome"
annotation_file = "whole_genome_v2_nov-11-2021.csv"
files = list_excel_files(input_data_path)
for file_nm in files:
    run_batch(file_nm, input_data_path, output_analysis_path, annotation_nm, annotation_file)

