forked from bcgsc/NanoSim
/
example.sh
69 lines (51 loc) · 2.58 KB
/
example.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
#####
# This script shows how the simulated reads are generated in the paper.
# All training datasets are downloaded from ENA and processed with poretools.
# Only 2D pass reads are extracted and stored on bcgsc ftp server.
#####
##### Download the source file
mkdir NanoSim
cd NanoSim
wget https://github.com/bcgsc/NanoSim/archive/master.zip
unzip master.zip
# After this step, you should have a folder called NanoSim, and inside you have master.zip and NanoSim-master two sub-folders
##### Inside NanoSim, create a working directory
mkdir ecoli_simulation
cd ecoli_simulation
# 1. E. coli R7 dataset
# Origin: ftp://climb.genomics.cn/pub/10.5524/100001_101000/100102/Ecoli_R7_CombinedFasta.tgz
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R7_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_K12_MG1655_ref.fa
# Profiling stage, make sure to set the mode of read_analysis.py to -r-x or above
../NanoSim-master/src/read_analysis.py -i ecoli_R7_2D.fasta -r ecoli_K12_MG1655_ref.fa -o ecoli
# Simulation stage, suppose the genome to be simulated is called test.fasta and make sure to provide the correct path to it
../NanoSim-master/src/simulator.py circular -r test.fasta -c ecoli # Note the -c option has to be the same as -o in read_analysis.py, or both use default parameter
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R7_profile.zip
# 2. E. coli R7.3 dataset
# Origin: http://www.ebi.ac.uk/ena/data/view/ERX708228, ERX708229, ERX708230, ERX708231
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R73_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_K12_MG1655_ref.fa
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R73_profile.zip
# 3. E. coli UCSC phase1b dataset
# Origin: http://www.ebi.ac.uk/ena/data/view/ERP010368
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_UCSC_phase1b_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_K12_MG1655_ref.fa
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_UCSC1b_profile.zip
# 4. S. cerevisiae dataset
# Origin: http://labshare.cshl.edu/shares/schatzlab/www-data/nanocorr/2015.07.07/W303_ONT_Raw_reads.fa.gz
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/yeast_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/yeast_S288C_ref.fa
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/yeast_profile.zip