forked from jgurtowski/ectools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CA_Makefile
91 lines (59 loc) · 2.75 KB
/
CA_Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
BASENAME = p2
GENOME_SIZE = 4600000
SHELL = /bin/bash
CORRECTION_ROOT = /seq/schatz/a.thaliana/xiwang/james_workspace/nucmer-round-sim
TIGLEN_BIN = /sonas-hs/schatz/hpc/home/mschatz/build/bin/tig_length.pl
PBTOOLS_BIN = /sonas-hs/schatz/hpc/home/gurtowsk/workspace/pbtools
JOIN_BIN = $(PBTOOLS_BIN)/join.py
CA_BIN = /sonas-hs/schatz/hpc/home/gurtowsk/sources/wgs.svn/Linux-amd64/bin
AMOS_BIN = /sonas-hs/schatz/hpc/home/gurtowsk/sources/amos/bin
MUMMER_BIN = /sonas-hs/schatz/hpc/home/gurtowsk/sources/MUMmer
STATS_BIN = $(AMOS_BIN)/stats
GATEKEP_BIN = $(CA_BIN)/gatekeeper
TIGSTOR_BIN = $(CA_BIN)/tigStore
OVLSTOR_BIN = $(CA_BIN)/overlapStore
GTKSTOR = $(BASENAME).gkpStore
TIGSTOR = $(BASENAME).tigStore
OVLSTOR = $(BASENAME).ovlStore
.PHONY : all
all : unitigs.layout.clean.names unitigs.layout.stats
unitigs.layout :
$(TIGSTOR_BIN) -g $(GTKSTOR) -t $(TIGSTOR) 1 -U -d layout > unitigs.layout
unitigs.layout.stats: unitigs.layout
$(TIGLEN_BIN) unitigs.layout | $(STATS_BIN) -f 2 -n50 $(GENOME_SIZE) > unitigs.layout.stats
ReadIndex.txt:
$(GATEKEP_BIN) -dumpfragments $(GTKSTOR) | grep "fragmentIdent" | \
awk '{ split($$3,a,","); print a[2]"\t"a[1]}END{print "NONE\tNONE"}' > ReadIndex.txt
unitigs.layout.clean: unitigs.layout ReadIndex.txt
cat unitigs.layout | \
awk '{if($$1=="FRG"){$$1=$$2=$$3=$$4=""; print $$0}else if($$1=="unitig"){print}}' | \
sed "s/^\s*//" | uniq > unitigs.layout.clean
unitigs.layout.clean.names: unitigs.layout.clean
python $(JOIN_BIN) ReadIndex.txt unitigs.layout.clean true > unitigs.layout.clean.names
utg_frg.db: unitigs.layout.clean
cat unitigs.layout.clean | \
awk '{ if($$1=="unitig"){p = $$2}else{print $$1"\t"p}}' > utg_frg.db
frg.overlaps:
$(OVLSTOR_BIN) -d5 -d3 -d $(OVLSTOR) > frg.overlaps
frg.utg.overlaps: frg.overlaps utg_frg.db
paste <(python $(JOIN_BIN) utg_frg.db <(awk '{print $$1}' frg.overlaps )) \
<(python $(JOIN_BIN) utg_frg.db <(awk '{print $$2}' frg.overlaps )) \
<( awk '{print $$1"-"$$2}' frg.overlaps) > frg.utg.overlaps
frg.utg.overlaps.sorted: frg.utg.overlaps
sort -n -k 1,1 -k 2,2 frg.utg.overlaps > frg.utg.overlaps.sorted
.PHONY : povl
povl :
$(OVLSTOR_BIN) -p $(id) $(OVLSTOR) $(GTKSTOR) OBTCHIMERA
.PHONY : getsc
getsc:
rn=$$(echo $(read) | awk '{split($$1,a,"_corrected"); print a[1]}') ; \
idx=$$(grep "$$rn" $(CORRECTION_ROOT)/ReadIndex.txt | awk '{print $$2}'); \
grep "$$rn" $(CORRECTION_ROOT)/$${idx}.delta.pre.r.sc
.PHONY : getrawsc
getrawsc:
rn=$$(echo $(read) | awk '{split($$1,a,"_corrected"); print a[1]}') ; \
idx=$$(grep "$$rn" $(CORRECTION_ROOT)/ReadIndex.txt | awk '{print $$2}'); \
$(MUMMER_BIN)/show-coords -rclo $(CORRECTION_ROOT)/$${idx}.delta | grep "$$rn"
clean:
rm -f unitigs.layout.clean.names unitigs.layout.clean \
unitigs.layout.stats ReadIndex.txt unitigs.layout