In [2]:
from nanomotif.binnary import data_processing, detect_contamination, include_contigs
import os
os.environ["POLARS_MAX_THREADS"] = "1"
import polars as pl

In [8]:
class Args:
    def __init__(self):
        self.motifs_scored = "../binnary/data/data/anaerobic_digestor/nanomotif/motifs-scored.tsv"
        self.bin_motifs = "../binnary/data/data/anaerobic_digestor/nanomotif/bin-motifs.tsv"
        self.contig_bins = "../binnary/data/data/anaerobic_digestor/mmlong2_lite/tmp/binning/contig_bin.tsv"#"../binnary/data/data/anaerobic_digestor/mmlong2_lite/tmp/binning/contig_bin.tsv"
        self.mean_methylation_cutoff = 0.25
        self.n_motif_contig_cutoff = 10
        self.n_motif_bin_cutoff = 500
        self.ambiguous_motif_percentage_cutoff = 0.40
        self.out = "../binnary/data/data/anaerobic_digestor/binnary/fecal_contamination_test_comparison_matrix.tsv"

args = Args()

In [9]:
(
    motifs_scored,
    bin_motifs,
    contig_bins,
) = data_processing.load_data(args)

# 

In [12]:
bin_motif_binary = data_processing.prepare_bin_consensus(bin_motifs, args)

motifs_in_bin_consensus = bin_motif_binary.select("motif_mod").unique()["motif_mod"]

motifs_scored_in_bins = data_processing.prepare_motifs_scored_in_bins(
    motifs_scored,
    motifs_in_bin_consensus,
    contig_bins
)

bin_motifs_from_motifs_scored_in_bins = data_processing.construct_bin_consensus_from_motifs_scored_in_bins(
        motifs_scored_in_bins,
        args
    )

In [13]:
motifs_scored_in_bins_wo_unbinned = motifs_scored_in_bins \
        .filter(~pl.col("bin_contig").str.contains("unbinned"))
    
# Define the corresponding choices for each condition
choices = [
    0,  # bin motif is methylated, contig motif is methylated
    1,  # bin motif is methylated, contig motif is not methylated
    1,  # bin motif is not methylated, contig motif is methylated
    0,  # bin motif is not methylated, contig motif is not methylated
    0,  # bin motif is methylated, contig motif is not observed
    0,  # bin motif is not methylated, contig motif is not observed
]

In [18]:
motifs_scored_in_contigs = motifs_scored_in_bins_wo_unbinned \
        .filter(pl.col("n_motifs") >= args.n_motif_contig_cutoff) \
        .select(["bin_contig", "motif_mod", "mean"]) \
        .rename({"bin_contig": "bin_compare"}) 
        
        
motif_binary_compare = bin_motifs_from_motifs_scored_in_bins \
        .filter(pl.col("bin") == "mmlong2_lite.bin.1.14") \
        .join(
            motifs_scored_in_contigs.filter(pl.col("bin_compare") == "mmlong2_lite.bin.1.14_contig_39961"),
            on="motif_mod"
        )

In [19]:
motif_binary_compare = motif_binary_compare.with_columns([
    pl.when(pl.col("methylation_binary") == 1)
    .then(
        pl.when(pl.col("mean_methylation") - 4 * pl.col("std_methylation_bin") > 0.1)
        .then(pl.col("mean_methylation") - 4 * pl.col("std_methylation_bin"))
        .otherwise(0.1)
    )
    .otherwise(pl.lit(None))
    .alias("methylation_mean_threshold")
])

# Calculate the binary methylation value for each motif in each bin where the bin consensus is 1
motif_binary_compare = motif_binary_compare.with_columns([
    pl.when((pl.col("methylation_binary") == 1) & 
            ((pl.col("mean") >= pl.col("methylation_mean_threshold")) | 
            (pl.col("mean") > 0.4)))
    .then(1)
    .when(pl.col("methylation_binary") == 1)
    .then(0)
    .otherwise(pl.lit(None))
    .alias("methylation_binary_compare")
])

# Calculate score for bin consensus is 0
motif_binary_compare = motif_binary_compare.with_columns([
    pl.when(pl.col("methylation_binary") == 0)
    .then(0.25)
    .otherwise(pl.col("methylation_mean_threshold"))
    .alias("methylation_mean_threshold"),

    pl.when(pl.col("methylation_binary") == 0)
    .then((pl.col("mean") >= 0.25).cast(pl.Int32))
    .otherwise(pl.col("methylation_binary_compare"))
    .alias("methylation_binary_compare")
])