-
Notifications
You must be signed in to change notification settings - Fork 2
/
phylo_jackknife.py
executable file
·86 lines (66 loc) · 2.32 KB
/
phylo_jackknife.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#! /usr/bin/env python
# phylo_jackknife.py by Marek Borowiec
# 15 December 2014
# This program draws and concatenates random samples
# from a pool of single gene alignments
# for a phylogenetic jackknife analysis
import glob, shutil, random, os, subprocess
try:
import readline
except ImportError:
print "Module readline not available."
else:
import rlcompleter
def complete(text, state):
return (glob.glob(text+'*')+[None])[state]
readline.set_completer_delims('\t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(complete)
prompt = "> "
print("""What is the path to your working directory?
It should contain only single-locus alignments in FASTA or NEXUS format.
example: /home/user/Data/AlignedFiles""")
dirPath = raw_input(prompt)
print("""What is the path to your phyutility.jar file?
example: /home/user/Phylo-Software/Phyutility/phyutility.jar""")
phyutDir = "java -jar " + raw_input(prompt) + " -concat -in "
#print phyutDir
print("""How many loci do you want to use for your replicate?
example: 20""")
lociNo = raw_input(prompt)
print("""How many jackknife replicates do you want to perform?
example: 200""")
replicatesNo = raw_input(prompt)
print("""Are your sequences amino acid or DNA?
(aa/dna)""")
seqType = raw_input(prompt)
filenames = [f for f in os.listdir(dirPath) if os.path.isfile(dirPath + '/' + f)]
#print(filenames)
#print(len(filenames))
directories = []
for i in range(1, int(replicatesNo) + 1):
randomFiles = random.sample(filenames, int(lociNo))
#print(randomFiles)
destDirectory = dirPath + '/Sample_' + lociNo + '_' + str(i) + "/"
print(destDirectory)
os.mkdir(destDirectory)
directories.append(destDirectory)
for fname in randomFiles:
srcPath = os.path.join(dirPath, fname)
destFile = destDirectory + fname
shutil.copyfile(srcPath, destFile)
counter = 1
for directory in directories:
#print(directory)
call_string = phyutDir + directory + '* ' + \
'-out ' + dirPath + '/Sample_' + lociNo + '_' + str(counter) + '.nex'
print(call_string)
counter += 1
subprocess.call(call_string, shell=True)
if seqType == 'aa':
call_string = "sed -i 's/DATATYPE = DNA/DATATYPE = AA/g' Sample*nex"
print(call_string)
subprocess.call(call_string, shell=True)
for directory in directories:
print("Removing directory " + directory) + " and its contents..."
shutil.rmtree(directory)