In [1]:
import sys
import os

# ##### SET SYS PATH TO WHERE THE SOURCE CODE IS. #####
# Note: This is not required if you are using the pip installed package
wormcat_dir = os.path.dirname(os.getcwd())
sys.path.insert(0, wormcat_dir)

# WORMCAT_DATA_PATH Allows you to use your own annotation files if desired
# Note: This environment variable is not required if you are using the provided Wormcat Annotations
# os.environ["WORMCAT_DATA_PATH"] = f"{wormcat_dir}/wormcat3/extdata"

example_data_dir = f"{wormcat_dir}/example_data"
example_output_dir = f"{wormcat_dir}/example_output"

print("Working directory:", wormcat_dir)

Working directory: /Users/dan/Code/Python/wormcat3


In [None]:
import os
import pandas as pd

def process_gene_data(df: pd.DataFrame, output_dir: str):
    """
    For each column in the DataFrame (except 'wormbase_id'),
    create a CSV file in the output_dir with Wormbase IDs where the column is not NaN,
    only if there are at least two genes.
    
    Args:
        df (pd.DataFrame): The input DataFrame.
        output_dir (str): Directory to save output CSV files.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    for col in df.columns:
        if col == 'wormbase_id':
            continue
        non_nan_ids = df.loc[df[col].notna(), ['wormbase_id']].copy()
        
        # Skip if fewer than 2 genes
        if len(non_nan_ids) < 3:
            continue
        
        non_nan_ids.rename(columns={'wormbase_id': 'Wormbase ID'}, inplace=True)
        safe_name = col.replace(' ', '_').replace('/', '_')
        out_path = os.path.join(output_dir, f"{safe_name}.csv")
        non_nan_ids.to_csv(out_path, index=False, header=True)

In [None]:
import pandas as pd
base_dir = "/Users/dan/Downloads"
file_nm = "genes_in_cell_type.csv"
file_path = f"{base_dir}/{file_nm}"
genes_in_cell_type_df = pd.read_csv(file_path)
process_gene_data(genes_in_cell_type_df, output_dir="./orouke_cell_type")

In [2]:
# Wormcat can now easily be run from a Jupyter Notebook
from wormcat3 import Wormcat

wormcat = Wormcat(run_prefix="orouke_cell_type_out")

orouke_path="/Users/dan/Code/Python/wormcat3/notebooks/orouke_cell_type"
results_df = wormcat.wormcat_batch(orouke_path)

rgs_value=106, total_gene_set_count=595, ac_value=6343, total_annotations_count=31389
rgs_value=86, total_gene_set_count=595, ac_value=1601, total_annotations_count=31389
rgs_value=83, total_gene_set_count=595, ac_value=244, total_annotations_count=31389
rgs_value=47, total_gene_set_count=595, ac_value=308, total_annotations_count=31389
rgs_value=47, total_gene_set_count=595, ac_value=495, total_annotations_count=31389
rgs_value=31, total_gene_set_count=595, ac_value=3200, total_annotations_count=31389
rgs_value=28, total_gene_set_count=595, ac_value=833, total_annotations_count=31389
rgs_value=27, total_gene_set_count=595, ac_value=1188, total_annotations_count=31389
rgs_value=17, total_gene_set_count=595, ac_value=371, total_annotations_count=31389
rgs_value=17, total_gene_set_count=595, ac_value=62, total_annotations_count=31389
rgs_value=15, total_gene_set_count=595, ac_value=901, total_annotations_count=31389
rgs_value=13, total_gene_set_count=595, ac_value=394, total_annotations_

In [None]:
import pandas as pd
base_dir = "/Users/dan/Downloads"
file_nm = "genes_in_cell_type_group.csv"
file_path = f"{base_dir}/{file_nm}"
genes_in_cell_type_df = pd.read_csv(file_path)
process_gene_data(genes_in_cell_type_df, output_dir="./orouke_cell_type_group")

In [3]:
# Wormcat can now easily be run from a Jupyter Notebook
from wormcat3 import Wormcat

wormcat = Wormcat(run_prefix="orouke_cell_type_group_out")

orouke_path="/Users/dan/Code/Python/wormcat3/notebooks/orouke_cell_type_group"
results_df = wormcat.wormcat_batch(orouke_path)

rgs_value=1555, total_gene_set_count=6914, ac_value=6343, total_annotations_count=31389
rgs_value=1015, total_gene_set_count=6914, ac_value=1601, total_annotations_count=31389
rgs_value=437, total_gene_set_count=6914, ac_value=1188, total_annotations_count=31389
rgs_value=401, total_gene_set_count=6914, ac_value=3200, total_annotations_count=31389
rgs_value=393, total_gene_set_count=6914, ac_value=833, total_annotations_count=31389
rgs_value=317, total_gene_set_count=6914, ac_value=407, total_annotations_count=31389
rgs_value=284, total_gene_set_count=6914, ac_value=901, total_annotations_count=31389
rgs_value=253, total_gene_set_count=6914, ac_value=733, total_annotations_count=31389
rgs_value=204, total_gene_set_count=6914, ac_value=244, total_annotations_count=31389
rgs_value=173, total_gene_set_count=6914, ac_value=295, total_annotations_count=31389
rgs_value=172, total_gene_set_count=6914, ac_value=371, total_annotations_count=31389
rgs_value=167, total_gene_set_count=6914, ac_val