Question 1b 

In [3]:
%%bash
set -euo pipefail
# If your notebook shell can't see the conda tools, uncomment the next line:
export PATH="$HOME/miniforge3/envs/asn2/bin:$PATH"

REF="chr22_orig.fa"

for rate in 0.01 0.05 0.10 0.15
do
  for seed in 1 2 3
  do
    if [ ! -r "chr22_mut${rate}_s${seed}.fa" ]
    then
      # introduce mutations
      python3 sim_mutations.py -i "$REF" -o "chr22_mut${rate}_s${seed}.fa" -m "${rate}" -s "${seed}"

      # align and summarize with mummer
      nucmer --mum -g 1000 -b 1000 -p "chr22_orig_vs_mut${rate}_s${seed}" "$REF" "chr22_mut${rate}_s${seed}.fa"
      delta-filter -1 "chr22_orig_vs_mut${rate}_s${seed}.delta" > "chr22_orig_vs_mut${rate}_s${seed}.1delta"
      show-coords -rcl "chr22_orig_vs_mut${rate}_s${seed}.1delta" > "chr22_orig_vs_mut${rate}_s${seed}.coords"
    fi
  done
done

# quick peek (as in the prompt)
head chr22_orig_vs_mut*.coords

Wrote mutated FASTA to chr22_mut0.01_s2.fa
Wrote mutated FASTA to chr22_mut0.05_s1.fa
Wrote mutated FASTA to chr22_mut0.05_s2.fa
Wrote mutated FASTA to chr22_mut0.05_s3.fa
Wrote mutated FASTA to chr22_mut0.10_s1.fa
Wrote mutated FASTA to chr22_mut0.10_s2.fa
Wrote mutated FASTA to chr22_mut0.10_s3.fa
Wrote mutated FASTA to chr22_mut0.15_s1.fa
Wrote mutated FASTA to chr22_mut0.15_s2.fa
Wrote mutated FASTA to chr22_mut0.15_s3.fa


==> chr22_orig_vs_mut0.01_s2.coords <==
/Users/xinyichen/Desktop/JHU/comp_gen/hw/hw2/chr22_orig.fa /Users/xinyichen/Desktop/JHU/comp_gen/hw/hw2/chr22_mut0.01_s2.fa
NUCMER

    [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  |  [LEN R]  [LEN Q]  |  [COV R]  [COV Q]  | [TAGS]
       1  1000001  |        1  1000001  |  1000001  1000001  |    99.00  |  1000001  1000001  |   100.00   100.00  | chr22:20000000-21000000	chr22:20000000-21000000

==> chr22_orig_vs_mut0.05_s1.coords <==
/Users/xinyichen/Desktop/JHU/comp_gen/hw/hw2/chr22_orig.fa /Users/xinyichen/Desktop/JHU/comp_gen/hw/hw2/chr22_mut0.05_s1.fa
NUCMER

    [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  |  [LEN R]  [LEN Q]  |  [COV R]  [COV Q]  | [TAGS]
       1  1000001  |        1  1000001  |  1000001  1000001  |    95.00  |  1000001  1000001  |   100.00   100.00  | chr22:20000000-21000000	chr22:20000000-21000000

==> chr22_orig_vs_mut0.05_s2.coords <==
/Users/xinyichen/Desktop/JHU/comp_ge

Question 1d

In [3]:
%%bash
for rate in 0.01 0.05 0.10 0.15
do
  for seed in 1 2 3
  do
    if [ ! -r jaccard_mut${rate}_s${seed}.txt ]
    then
      python3 sim_mutations.py -a chr22_orig.fa -b chr22_mut${rate}_s${seed}.fa -k 21 \
        > jaccard_mut${rate}_s${seed}.txt
    fi
  done
done

head jaccard_mut*.txt


==> jaccard_mut0.01_s1.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s1.fa, k=21jaccard=0.677640 ani_exact=0.981640 ani_approx=0.981470

==> jaccard_mut0.01_s2.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s2.fa, k=21jaccard=0.676857 ani_exact=0.981586 ani_approx=0.981415

==> jaccard_mut0.01_s3.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s3.fa, k=21jaccard=0.677441 ani_exact=0.981626 ani_approx=0.981456

==> jaccard_mut0.05_s1.txt <==
a=chr22_orig.fa b=chr22_mut0.05_s1.fa, k=21jaccard=0.208039 ani_exact=0.927963 ani_approx=0.925237

==> jaccard_mut0.05_s2.txt <==
a=chr22_orig.fa b=chr22_mut0.05_s2.fa, k=21jaccard=0.206381 ani_exact=0.927610 ani_approx=0.924856

==> jaccard_mut0.05_s3.txt <==
a=chr22_orig.fa b=chr22_mut0.05_s3.fa, k=21jaccard=0.206193 ani_exact=0.927569 ani_approx=0.924812

==> jaccard_mut0.10_s1.txt <==
a=chr22_orig.fa b=chr22_mut0.10_s1.fa, k=21jaccard=0.061675 ani_exact=0.875762 ani_approx=0.867339

==> jaccard_mut0.10_s2.txt <==
a=chr22_orig.fa b=chr22_mut0.10_s2.fa, k=21jaccard=0

Question 1f

In [4]:
%%bash
for rate in 0.01 0.05 0.10 0.15
do
  for seed in 1 2 3
  do
    for mod in 100 1000
    do
      if [ ! -r modimizers_mut${rate}_s${seed}_m${mod}.txt ]
      then
        python3 compute_jaccard_modimizer.py \
          -a chr22_orig.fa \
          -b chr22_mut${rate}_s${seed}.fa \
          -k 21 -m $mod \
          > modimizers_mut${rate}_s${seed}_m${mod}.txt
      fi
    done
  done
done

head -100 modimizers_mut*.txt


==> modimizers_mut0.01_s1_m100.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s1.fa k=21 m=100 jaccard=0.675067 ani_exact=0.981462 ani_approx=0.981288 n_modimizers_a=9290 n_modimizers_b=9521

==> modimizers_mut0.01_s1_m1000.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s1.fa k=21 m=1000 jaccard=0.675244 ani_exact=0.981475 ani_approx=0.981301 n_modimizers_a=931 n_modimizers_b=957

==> modimizers_mut0.01_s2_m100.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s2.fa k=21 m=100 jaccard=0.682275 ani_exact=0.981959 ani_approx=0.981794 n_modimizers_a=9290 n_modimizers_b=9464

==> modimizers_mut0.01_s2_m1000.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s2.fa k=21 m=1000 jaccard=0.668756 ani_exact=0.981023 ani_approx=0.980841 n_modimizers_a=931 n_modimizers_b=933

==> modimizers_mut0.01_s3_m100.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s3.fa k=21 m=100 jaccard=0.679921 ani_exact=0.981797 ani_approx=0.981630 n_modimizers_a=9290 n_modimizers_b=9384

==> modimizers_mut0.01_s3_m1000.txt <==
a=chr22_orig.fa b=chr22_mut0.01_s3.