In [1]:
import os
from Bio import SeqIO 
import shutil
import subprocess

In [2]:
#Define the PATH for variable parameters

GENOME_VERSION = 'v04'

BASE_OUT_PATH = '/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/%s' % GENOME_VERSION
GENOME_PATH = '/home/gamran/genome_analysis/Warrior/Richard/output/genome_%s/' % GENOME_VERSION
GENOME  = 'DK_0911_%s' % GENOME_VERSION
OUT_PATH_NUCMER = os.path.join(BASE_OUT_PATH, 'nucmer_analysis/')
OUT_PATH_ASSEMBLYTICS = os.path.join(BASE_OUT_PATH, 'Assemblytics/')
MUMMER_PATH_PREFIX = '/home/benjamin/anaconda3/bin/'
ASSEMBLYTICS_PATH = '/home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics'
py2_env = 'py27' #python 2 environment in conda 
if not os.path.isdir(BASE_OUT_PATH):
    os.mkdir(BASE_OUT_PATH)
if not os.path.isdir(OUT_PATH_NUCMER):
    os.mkdir(OUT_PATH_NUCMER)
if not os.path.isdir(OUT_PATH_ASSEMBLYTICS):
    os.mkdir(OUT_PATH_ASSEMBLYTICS)

In [3]:
P_GENOME = GENOME + '_p_ctg'
H_GENOME = GENOME + '_h_ctg'

for x in (x + '.fa' for x in [P_GENOME, H_GENOME]):
    shutil.copy2(GENOME_PATH + x, OUT_PATH_NUCMER)
    shutil.copy2(GENOME_PATH + x, OUT_PATH_ASSEMBLYTICS)

In [4]:
#define the scripts to generate
bash_script_q= GENOME+"_ph_ctg_qmapping.sh"
bash_script_g=GENOME+"_ph_ctg_gmapping.sh"
bash_script_nucmer_assemblytics = GENOME +"_nucmer_assemblytics_mapping.sh"
bash_script_assemblytics = GENOME + '_assemblytics.sh'

In [5]:
outfq = open(os.path.join(OUT_PATH_NUCMER, bash_script_q), 'w')
outfq.write('#!/bin/bash\n')
outfg = open(os.path.join(OUT_PATH_NUCMER,bash_script_g), 'w')
outfg.write('#!/bin/bash\n') #parsing out P and corresponding h contigs and writing a short nucmer script that aligns them against each other
outfna = open(os.path.join(OUT_PATH_ASSEMBLYTICS,bash_script_nucmer_assemblytics), 'w')
outfna.write('#!/bin/bash\n')

for pseq_record in SeqIO.parse(OUT_PATH_ASSEMBLYTICS+'/'+GENOME+'_p_ctg.fa', 'fasta'):
    p_acontigs = []
    p_contig = pseq_record.id.split("_")[0]+"_"+pseq_record.id.split("_")[1]
    suffix = GENOME+"_"+p_contig+"_php"
    p_file = GENOME+"_"+p_contig+'.fa'
    SeqIO.write(pseq_record, OUT_PATH_ASSEMBLYTICS+'/'+ p_file, 'fasta')
    SeqIO.write(pseq_record,OUT_PATH_NUCMER+'/'+ p_file, 'fasta')
    for aseq_record in SeqIO.parse(OUT_PATH_ASSEMBLYTICS+'/'+GENOME+'_h_ctg.fa', 'fasta'):
        if aseq_record.id.split("_")[1]  == pseq_record.id.split("_")[1]:
            p_acontigs.append(aseq_record)
    a_file = GENOME +"_"+pseq_record.id.split("_")[0]+"_"+pseq_record.id.split("_")[1]+'_h_ctgs.fa'
    #if we have alternative contigs save those too
    if p_acontigs != []:
        outfq.write('cd ' + OUT_PATH_NUCMER + '\n')
        SeqIO.write(p_acontigs, OUT_PATH_ASSEMBLYTICS+'/'+  a_file, 'fasta')
        SeqIO.write(p_acontigs, OUT_PATH_NUCMER+'/'+  a_file, 'fasta')
        outfq.write(MUMMER_PATH_PREFIX +'/nucmer '+p_file+' '+a_file+" > "+'out.delta\n')
        outfq.write(MUMMER_PATH_PREFIX +'/delta-filter -q '+'out.delta'+" > "+suffix+"_qfiltered.delta\n")
        outfq.write(MUMMER_PATH_PREFIX +'/show-coords -T '+suffix+"_qfiltered.delta > "+suffix+".qcoords\n")
        outfq.write(MUMMER_PATH_PREFIX +'/mummerplot -p '+suffix+'_qfiltered --png '+suffix+"_qfiltered.delta\n")
        outfq.write(MUMMER_PATH_PREFIX +'/mummerplot -c -p '+suffix+'_qfiltered_cov --png '+suffix+"_qfiltered.delta\n")
        #for g_file bash script
        outfg.write('cd ' + OUT_PATH_NUCMER + '\n')
        outfg.write(MUMMER_PATH_PREFIX +'/nucmer '+p_file+' '+a_file+" > "+'out.delta\n')
        outfg.write(MUMMER_PATH_PREFIX +'/delta-filter -g '+'out.delta'+" > "+suffix+"_gfiltered.delta\n")
        outfg.write(MUMMER_PATH_PREFIX +'/show-coords -T '+suffix+"_gfiltered.delta > "+suffix+".gcoords\n")
        outfg.write(MUMMER_PATH_PREFIX +'/mummerplot -p '+suffix+'_gfiltered --png '+suffix+"_gfiltered.delta\n")
        outfg.write(MUMMER_PATH_PREFIX +'/mummerplot -c -p  '+suffix+'_gfiltered_cov --png '+suffix+"_gfiltered.delta\n")
        #for nucmer assemblytics out
        outfna.write('cd ' + OUT_PATH_ASSEMBLYTICS + '\n')
        outfna.write(MUMMER_PATH_PREFIX +'/nucmer -maxmatch -l 100 -c 500 '+p_file+' '+a_file+' -prefix ' + suffix +'\n')
outfna.close()
outfq.close()
outfg.close()  

In [6]:
#run the scripts and check if there are errors

bash_script_q_stderr = subprocess.check_output('bash %s' %os.path.join(OUT_PATH_NUCMER, bash_script_q), shell=True, stderr=subprocess.STDOUT)
bash_script_g_stderr = subprocess.check_output('bash %s' %os.path.join(OUT_PATH_NUCMER, bash_script_g), shell=True, stderr=subprocess.STDOUT)
bash_script_assemblytics_stderr = subprocess.check_output('bash %s' %os.path.join(OUT_PATH_ASSEMBLYTICS, bash_script_nucmer_assemblytics), shell=True, stderr=subprocess.STDOUT)

In [7]:
#write the Assemblytics script

folder_locs = []

outfnarun = open(os.path.join(OUT_PATH_ASSEMBLYTICS, bash_script_assemblytics), 'w')
outfnarun.write('#!/bin/bash\n')
delta_files = [x for x in os.listdir(OUT_PATH_ASSEMBLYTICS) if x.endswith('delta')]
outfnarun.write('export PATH="%s":$PATH\n'% ASSEMBLYTICS_PATH)
outfnarun.write('#Assemblytics delta_file output_prefix unique_anchor_length maximum_feature_length path_to_R_scripts\n')
outfnarun.write('source activate %s\n' %py2_env)
outfnarun.write('cd %s\n' %OUT_PATH_ASSEMBLYTICS)

for delta in delta_files:
    folder_name = OUT_PATH_ASSEMBLYTICS + delta[:-6] + '_8kbp'
    
    folder_locs.append(folder_name)
    
    output_prefix = delta[:-6] + '_8kbp_50kp'
    outfnarun.write("mkdir %s\ncp %s %s\ncd %s\n" % (folder_name, delta, folder_name, folder_name))
    outfnarun.write("Assemblytics %s %s 8000 50000 %s\n" % (delta, output_prefix, ASSEMBLYTICS_PATH) )
    output_prefix = delta[:-6] + '_8kbp_10kp'
    outfnarun.write("Assemblytics %s %s 8000 10000 %s\ncd ..\n" % (delta, output_prefix, ASSEMBLYTICS_PATH) )
outfnarun.write('source deactivate\n')
outfnarun.close()

In [8]:
# bash_script_assemblytics_stderr = subprocess.check_output('bash %s' %os.path.join(OUT_PATH_ASSEMBLYTICS, bash_script_assemblytics), shell=True, stderr=subprocess.STDOUT)

!bash {os.path.join(OUT_PATH_ASSEMBLYTICS, bash_script_assemblytics)}

Input delta file: DK_0911_v04_pcontig_083_php.delta
Output prefix: DK_0911_v04_pcontig_083_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_083_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_083.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_083_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 2 queries
Progress: 50%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 2 queries
Reading file and recording all the entries we de

Reading file and recording all the entries we decided to keep: 0 seconds for 5114 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 61 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `g

COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_expansion, Repeat_contraction, Tandem_expansion, Tandem_contraction
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_093_

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_075_php.delta
Output prefix: DK_0911_v04_pcontig_075_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_075_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_075.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_075_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 0 queries
Progress: 100%
Deciding which alignments to keep: 0 secon

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_contraction, Tandem_expansion, Tandem_contraction
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` 

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_024_php.delta
Output prefix: DK_0911_v04_pcontig_024_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_024_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_024.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_024_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 20 queries
Progress: 10%
Progress: 20%
Progress: 30%
Progress: 40%


null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_026_php.delta
Output prefix: DK_0911_v04_pcontig_026_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_026_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_026.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_026_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 18 queries
Progress: 5%
Progress: 11%
Progress: 16%
Progress: 22%
P

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_042.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_042_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 13 queries
Progress: 7%
Progress: 15%
Progress: 23%
Progress: 30%
Progress: 38%
Progress: 46%
Progress: 53%
Progress: 61%
Progress: 69%
Progress: 76%
Progress: 84%
Progress: 92%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 13 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 1615 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 22 alignments
BETWEEN,DO

Reading file and recording all the entries we decided to keep: 0 seconds for 1180 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 17 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Tandem_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer h

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Tandem_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` inst

1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
8: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_005_php.delta
Output prefix: DK_0911_v04_pcontig_005_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_072_php.delta
Output prefix: DK_0911_v04_pcontig_072_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_072_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_072.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_072_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 2 queries
Progress: 50%
Progress: 100%
Progress: 100%
Deciding whic

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_095.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_095_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 2 queries
Progress: 50%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 2 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 150 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 2 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DO

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
8: `geom_bar()` no longer has a `binwidth` parameter. Please use `

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_001_php.delta
Output prefix: DK_0911_v04_pcontig_001_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_001_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_001.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_001_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 40 queries
Progress: 10%
Progress: 20%
Progress: 30%
Progress: 40%


Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_031.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_031_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 16 queries
Progress: 6%
Progress: 12%
Progress: 18%
Progress: 25%
Progress: 31%
Progress: 37%
Progress: 43%
Progress: 50%
Progress: 56%
Progress: 62%
Progress: 68%
Progress: 75%
Progress: 81%
Progress: 87%
Progress: 93%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 16 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 1433 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between 

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_070.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_070_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 4 queries
Progress: 25%
Progress: 50%
Progress: 75%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 4 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 1053 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 11 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding varian

Reading file and recording all the entries we decided to keep: 0 seconds for 11565 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 123 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: 

The following `from` values were not present in `x`: Repeat_expansion, Repeat_contraction, Tandem_expansion, Tandem_contraction
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 10000"
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 10000"
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_084_php.delta
Output prefix: DK_0911_v04_pcontig_084_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_081_php.delta
Output prefix: DK_0911_v04_pcontig_081_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_081_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_081.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_081_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 5 queries
Progress: 20%
Progress: 40%
Progress: 60%
Progress: 80%
P

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_064.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_064_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 1 queries
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 1 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 41 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 1 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Asse

UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 69 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar(

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_expansion, Repeat_contraction, Tandem_expansion, Tandem_contraction
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
       

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_020_php.delta
Output prefix: DK_0911_v04_pcontig_020_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_020_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_020.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_020_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 21 queries
Progress: 9%
Progress: 19%
Progress: 28%
Progress: 38%
P

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_078.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_078_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 3 queries
Progress: 33%
Progress: 66%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 3 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 372 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 4 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignm

UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 15 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_expansion, Repeat_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` n

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_contraction, Tandem_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `ge

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_015_php.delta
Output prefix: DK_0911_v04_pcontig_015_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_015_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_015.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_015_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 27 queries
Progress: 7%
Progress: 14%
Progress: 22%
Progress: 29%
P

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_034.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_034_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 16 queries
Progress: 6%
Progress: 12%
Progress: 18%
Progress: 25%
Progress: 31%
Progress: 37%
Progress: 43%
Progress: 50%
Progress: 56%
Progress: 62%
Progress: 68%
Progress: 75%
Progress: 81%
Progress: 87%
Progress: 93%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 16 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 2521 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between 

UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 13 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Tandem_expansion, Tandem_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` n

Loaded 45 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Tandem_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: 

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
8: `geom_bar()` no longer has a `binwidth` parameter. Please use `

The following `from` values were not present in `x`: Repeat_expansion, Repeat_contraction, Tandem_expansion, Tandem_contraction
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_073_php.delta
Output prefix: DK_0911_v04_pcontig_073_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_043_php.delta
Output prefix: DK_0911_v04_pcontig_043_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_043_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_043.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_043_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 1 queries
Progress: 100%
Progress: 100%
Deciding which alignments t

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_contraction, Tandem_expansion, Tandem_contraction
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 10000"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 10000"
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom

COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
8: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_003_php.delta
Ou

1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_038_php.delta
Output prefix: DK_0911_v04_pcontig_038_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_038_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrio

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_013_php.delta
Output prefix: DK_0911_v04_pcontig_013_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_013_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_013.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_013_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 28 queries
Progress: 7%
Progress: 14%
Progress: 21%
Progress: 28%
P

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_039.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_039_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 14 queries
Progress: 7%
Progress: 14%
Progress: 21%
Progress: 28%
Progress: 35%
Progress: 42%
Progress: 50%
Progress: 57%
Progress: 64%
Progress: 71%
Progress: 78%
Progress: 85%
Progress: 92%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 14 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 2069 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 26 alignme

Loaded 11 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Tandem_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: 

WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_expansion
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instea

The following `from` values were not present in `x`: Repeat_contraction, Tandem_expansion, Tandem_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
8: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_033_ph

[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_082_php.delta
Output prefix: DK_0911_v04_pcontig_082_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assem

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_052_php.delta
Output prefix: DK_0911_v04_pcontig_052_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_052_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_052.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_052_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 8 queries
Progress: 12%
Progress: 25%
Progress: 37%
Progress: 50%
P

Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_079.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_079_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 4 queries
Progress: 25%
Progress: 50%
Progress: 75%
Progress: 100%
Progress: 100%
Deciding which alignments to keep: 0 seconds for 4 queries
Reading file and recording all the entries we decided to keep: 0 seconds for 316 total lines in file
UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 4 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants

UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments.
2. Finding variants between alignments
Loaded 2 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_expansion, Repeat_contraction, Tandem_expansion, Tandem_contraction
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "No variants in plot:"
[1] "min_var= 50"
[1] "max_var= 500"
[1] "No variants in plot:"
[1] "min_var= 500"
[1] "max_var= 50000"
[1] "No variants in plot:"
[1] "min_var= 10"
[1] "max_var= 50"
[1] "

COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
The following `from` values were not present in `x`: Repeat_contraction
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
8: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py compl

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_025_php.delta
Output prefix: DK_0911_v04_pcontig_025_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_025_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_025.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_025_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 1 queries
Progress: 100%
Progress: 100%
Deciding which alignments t

Loaded 43 alignments
BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments.
3. Finding variants within alignments
WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together.
4. Combine variants between and within alignments
COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics.
1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_hist

1: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
2: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
3: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
4: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
5: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
6: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
7: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
8: `geom_bar()` no longer has a `binwidth` parameter. Please use `geom_histogram()` instead. 
null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_045_php.delta
Output prefix: DK_0911_v04_pcontig_045_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_057_php.delta
Output prefix: DK_0911_v04_pcontig_057_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_057_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_057.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_057_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 8 queries
Progress: 12%
Progress: 25%
Progress: 37%
Progress: 50%
P

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_065_php.delta
Output prefix: DK_0911_v04_pcontig_065_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_065_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_065.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_065_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 6 queries
Progress: 16%
Progress: 33%
Progress: 50%
Progress: 66%
P

SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_030_php.delta
Output prefix: DK_0911_v04_pcontig_030_php_8kbp_10kp
Unique anchor length: 8000
Maximum feature length: 10000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_030_php_8kbp_10kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_030.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_030_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 17 queries
Progress: 5%
Progress: 11%
Progress: 17%
Progress: 23%
Progress: 29%
Progress: 35%

null device 
          1 
SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully
Input delta file: DK_0911_v04_pcontig_014_php.delta
Output prefix: DK_0911_v04_pcontig_014_php_8kbp_50kp
Unique anchor length: 8000
Maximum feature length: 50000
Path to R scripts: /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/downstream_analysis_2017/scripts/Assemblytics
DK_0911_v04_pcontig_014_php_8kbp_50kp
STARTING,DONE,Starting unique anchor filtering.
1. Filter delta file
Keeping fully unique alignments even if they are below the unique anchor length of 8000 bp
header:
/home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_014.fa /home/gamran/genome_analysis/Warrior/Richard/output/nucmer_assemblytics/v04/Assemblytics/DK_0911_v04_pcontig_014_h_ctgs.fa
NUCMER
First read through the file: 0 seconds for 0 query-reference combinations
Filtering alignments of 11 queries
Progress: 9%
Progress: 18%
Progress: 27%
Progress: 36%
P

In [9]:
# CHECK IF MAPPING WENT CORRECTLY

os.chdir('/home/gamran/genome_analysis/Warrior/Richard/scripts')
%run file_counting.ipynb

def checkMapping(folder_locs, ref = 62):
    '''Checks whether the mapping process in above cells worked consistently across all files'''

    '''N.B. working reference count on 6/12/17 was:
    COMPARING ALL FOLDERS TO REF: DK_0911_v03_pcontig_018_php_8kbp
    Reference Counts: {'tab': 2, 'index': 4, 'df': 2, 'genome': 4, 'summary': 2, 'csv': 8, 'txt': 2, 'gz': 2, 'bed': 6, 'delta': 1}
    matches: 86 discrepancies: 0 
    match %: 100.0'''

    ref_folder = folder_locs[ref]
    print('COMPARING ALL FOLDERS TO REF: %s' %getFolderName(folder_locs[ref]))
    # ignore .pdf and .png, as folders are expected to have different numbers of these
    refDict = getExtensionCounts(ref_folder, ['pdf', 'png'])
    print('Reference Counts:', refDict)
    
    matches = 0
    problems = 0
    for folder_loc in folder_locs:
        # ignore .pdf and .png, as folders are expected to have different numbers of these
        discrepancies = getDiscrepancies(folder_loc, refDict)
        if discrepancies == '':
            matches += 1
        else:
            problems += 1
            print(discrepancies)

    print("matches: %i discrepancies: %i " %(matches, problems))
    print("match %:", matches/(matches + problems)*100)
    
    return

checkMapping(folder_locs)

COMPARING ALL FOLDERS TO REF: DK_0911_v04_pcontig_055_php_8kbp
Reference Counts: {'tab': 2, 'csv': 8, 'genome': 4, 'txt': 2, 'summary': 2, 'gz': 2, 'delta': 1, 'index': 4, 'bed': 6}
matches: 86 discrepancies: 0 
match %: 100.0
