## use sourmash to make a depth file similar to read depth but based on kmers
- symlink the read sketches to folder
- create a file from all contigs
- sketch the contigs into kmers (k21,31,51)
- collect the sigs into sql db
- Run fastmultigather of all contigs <-> all reads
- Run a mgmanysearch using fmg result as picklist, using snakemake


In [None]:
# symlink the reads sigs
ln -s ../../../2023-swine-sra/sourmash/sig_files/sketch_reads/ERR113518* .
ln -s ../../../2023-swine-sra/sourmash/sig_files/sketch_reads/ERR113517* .

In [None]:
# srun 
srun --account=ctbrowngrp -p bmm -J fmg_bin -t 24:00:00 -c 100 --mem 100gb --pty bash

In [None]:
# retry at a scaled of 100, treshold of 10
# sketch the reads, save as sig.gz 
# make a zip for each fasta, concat those then make a mf
for f in *_R1.fastq.gz
do
echo sourmash sketch dna \
-p k=31,scaled=100 $f ${f%_R1*}_R2.fastq.gz \
--name ${f%_QC*} -o ${f%_QC*}.k31.sig.gz 
done | parallel -j 24

In [None]:
# fastmultigather with scale of 100.

mamba activate branchwater
sourmash scripts fastmultigather \
../sketch_reads/.k21.txt \
../sketch_contigs/ERR11351.k21.zip \
-c 100 -k 21 -t 1000 -s 100

mamba activate branchwater
sourmash scripts fastmultigather \
../sketch_reads/ERR1135178.k21.zip \
../sketch_contigs/ERR11351.k21.zip \
-c 100 -k 21 -t 1000 -s 100

In [None]:
# can give it a list of query files!!
sourmash scripts mgmanysearch \
--queries sketch_contigs/ERR1135178.sig \
--against sketch_reads/ERR1135178.sig.gz \
-k 21 --scaled 1000 -o ERR1135178.mgm.csv

In [None]:
# run the snakefile

srun --account=ctbrowngrp -p med2 -J mgbin -t 1:30:00 -c 24 --mem=30gb --pty bash
mamba activate branchwater
snakemake --resources mem_mb=30000 --rerun-triggers mtime \
-c 24 --rerun-incomplete -k --latency-wait 1 -n
