0
+a even more elaborated DNA sequence simulation script with
0
+sequence identity calculation (not overall, just neighbours)
0
+def simulate_sequence(length):
0
+ '''function that simulates the sequences'''
0
+ dna = ['A', 'C', 'G', 'T']
0
+ #randomly picking from the nucleotide list
0
+ for i in range(length):
0
+ sequence += random.choice(dna)
0
+def nucleotide_percentage(sequence):
0
+ #counting the nucleotides
0
+ print str(sequence.count('A')) + ' As ',
0
+ print str(sequence.count('C')) + ' Cs ',
0
+ print str(sequence.count('G')) + ' Gs ',
0
+ print str(sequence.count('T')) + ' Ts '
0
+def sequence_identity(seqset):
0
+ '''function that calculates sequence identies'''
0
+ #iterates through the sequences in the set -1
0
+ #and calculates sequence identities
0
+ for x in range(len(seqset) - 1):
0
+ print str(x), str(x+1)
0
+ for n in range(len(seqset[x])):
0
+ #iterates over all nucleotides and checks for identical ones
0
+ if seqset[x][n] == seqset[x + 1][n]:
0
+ iden.append(count / len(seqset[x]))
0
+setsize = int(sys.argv[1])
0
+minlength = int(sys.argv[2])
0
+maxlength = int(sys.argv[3])
0
+#generates simulated sequence sets
0
+for i in range(setsize):
0
+ rlength = random.randint(minlength, maxlength)
0
+ sequenceset.append(simulate_sequence(rlength))
0
+#calculate sequence identities
0
+identity = sequence_identity(sequenceset)
0
+for i in range(len(sequenceset)):
0
+ if i < len(sequenceset) - 1:
0
+ print 'sequence identity to next sequence : ' + str(identity[i])
0
+ nucleotide_percentage(sequenceset[i])
Comments
No one has commented yet.