In [3]:
from Bio import SeqIO as io
import numpy as np
import pandas as pd 
import os 

def readSeqs(infile):
    print("Parsing Consensus Sequences...")
    parseFA=io.parse(infile,'fasta')
    depth=[float(k.description.split(" ")[2].split("coverage:")[1]) for k in parseFA]

    #parse and read length
    parseFA=io.parse(infile,'fasta')
    length=[int(k.description.split(" ")[3].split("length:")[1]) for k in parseFA]

    #parse and read all entries
    parseFA=io.parse(infile,'fasta')
    elements=[k for k in parseFA]
    return(elements,length,depth)

def selectSeqs(start,end,elements,length,depth,depthMin=1,lengthMin=1):
    print("Filtering Alignment...")
#    filtered=[s for s in elements if sum([s.seq[i]=="-" for i in range(start,end)])==0]
    filtered=[elements[x] for x in range(len(elements)) if ((depth[x]>depthMin) and (sum([elements[x].seq[i]=="-" for i in range(start,end)])<3))]
    print("Trimming Alignment...")
    trimmedSeqs=[filtered[i].seq[start:end] for i in range(len(filtered))]
    for i in range(len(filtered)):
        filtered[i].seq=trimmedSeqs[i]
    print("Total: "+str(len(filtered)))
    return(filtered)

def genotypeSummary(filtered,reference=""):
    if reference=="":
        clists=[[c for c in s.seq] for s in filtered]#for column in filtered alignment...
        tclists=np.transpose(clists)#transpose columns 
        #print(tclists)
        variantSites=[i for i in range(len(tclists)) if len(np.unique(tclists[i],return_counts=True)[0])>1]# sites with variation
        maxChars=[]
        for site in range(len(tclists)):
            maxChars=maxChars+[np.unique(tclists[site],return_counts=True)[0][np.argmax(np.unique(tclists[site],return_counts=True)[1])]]
        print("".join(maxChars))
        output=[]    
        for s in filtered:
            s.dbxrefs="_".join([str(i+1)+s.seq[i] for i in variantSites if s.seq[i]!=maxChars[i]])
            #print([s.seq[(i/3-i%3):(i%3+3)] for i in variantSites if s.seq[i]!=maxChars[i]])
            #s.translation="_".join([str(i)+s.seq[i] for i in variantSites if s.seq[i]!=maxChars[i]])
            #print(s.dbxrefs)
            output=output+[s]
        return(output)
    else:
        clists=[[c for c in s.seq] for s in filtered]#for column in filtered alignment...
        tclists=np.transpose(clists)#transpose columns 
        variantSites=[i for i in range(len(tclists)) if len(np.unique(tclists[i],return_counts=True)[0])>1]# sites with variation
        output=[]  
        for s in filtered:
            s.dbxrefs="_".join([str(i+1)+s.seq[i] for i in variantSites if s.seq[i]!=reference[i]])
            #print([s.seq[(i/3-i%3):(i%3+3)] for i in variantSites if s.seq[i]!=maxChars[i]])
            #s.translation="_".join([str(i)+s.seq[i] for i in variantSites if s.seq[i]!=maxChars[i]])
            #print(s.dbxrefs)
            output=output+[s]
        return(output)
        

In [2]:
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/CVB3_new_allConsensus.fasta"
start=742
end=7297
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered)
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_CVB3ORF.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_CVB3ORF.csv")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 404
ATGGGAGCTCAAGTATCAACGCAAAAGACTGGGGCACATGAGACCAGGCTGAATGCTAGCGGCAATTCCATCATTCACTACACAAATATTAATTATTACAAGGATGCCGCATCCAACTCAGCCAATCGGCAGGATTTCACTCAAGACCCGGGCAAGTTCACAGAACCAGTGAAAGATATCATGATTAAATCACTACCAGCTCTCAACTCCCCCACAGTAGAGGAGTGCGGATACAGTGACAGGGCGAGATCAATCACATTAGGTAACTCCACCATAACGACTCAGGAATGCGCCAACGTGGTGGTGGGCTATGGAGTATGGCCAGATTATCTAAAGGATAGTGAGGCAACAGCAGAGGACCAACCGACCCAACCAGACGTTGCCACATGTAGGTTCTATACCCTTGACTCTGTGCAATGGCAGAAAACCTCACCAGGATGGTGGTGGAAGCTGCCCGATGCTTTGTCGAACTTAGGACTGTTTGGGCAGAACATGCAGTACCACTACTTAGGCCGAACTGGGTATACCGTACATGTGCAGTGCAATGCATCTAAGTTCCACCAAGGATGCTTGCTAGTAGTGTGTGTACCGGAAGCTGAGATGGGTTGCGCAACGCTAGACAACACCCCATCCAGTGCAGAATTGCTGGGGGGCGATAGCGCAAAGGAGTTTGCGGACAAACCGGTCGCATCCGGGTCCAACAAGTTGGTACAGAGGGTGGTGTATAATGCAGGCATGGGGGTGGGTGTTGGAAACCTCACCATTTTCCCCCACCAATGGATCAACCTACGCACCAATAATAGTGCTACAATTGTGATGCCATACACCAACAGTGTACCTATGGATAACATGTTTAGGCATAACAACGTCACCCTAATGGTTATCCCATTTGTACCGCTAGATTACTGCCCTG

In [3]:
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/EVA71_allConsensus.fasta"
start=745
end=7324
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered)
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVA71ORF.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVA71ORF.csv")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 289
ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCCAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTA

In [4]:
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/EVD68_allConsensus.fasta"
start=698
end=7262
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered)
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVd68ORF.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVd68ORF.csv")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 255
ATGGGAGCTCAGGTTACTAGACAACAAACTGGCACTCATGAAAATGCCAACATTGCCACAAATGGATCTCATATCACATACAATCAGATAAACTTTTACAAGGATAGCTATGCGGCTTCAGCCAGCAAGCAGGATTTTTCACAGGACCCATCAAAATTCACTGAACCAGTAGTGGAAGGTTTAAAAGCAGGGGCGCCAGTTTTGAAATCTCCTAGTGCTGAGGCATGTGGCTACAGTGATAGAGTATTACAGCTCAAATTAGGAAATTCAGCTATTGTCACCCAGGAAGCAGCGAACTACTGCTGCGCTTATGGTGAATGGCCCAATTACTTACCAGACCATGAAGCAGTAGCCATTGATAAACCTACACAACCAGAAACTGCTACAGATAGATTCTACACTTTGAAATCAGTCAAATGGGAAACTGGAAGCACAGGATGGTGGTGGAAACTACCCGATGCACTGAATAATATAGGCATGTTTGGACAGAATGTGCAGCATCACTACCTATATAGATCTGGTTTCTTGATTCATGTGCAGTGTAATGCCACAAAATTCCATCAAGGTGCCTTATTAGTGGTAGCAATTCCAGAACATCAGAGGGGAGCGCACAACACCAACACTAGCCCAGGGTTTGATGATATAATGAAAGGTGAAGAAGGAGGGACCTTCAATCATCCATATGTCCTTGATGATGGAACATCATTGGCTTGTGCGACGATATTTCCACATCAGTGGATAAATCTGAGAACCAACAATTCAGCAACAATTGTTCTTCCCTGGATGAATGCTGCTCCAATGGATTTCCCACTTAGACATAATCAGTGGACGCTAGCAATAATACCAGTGGTGCCATTAGGCACGCGTACAACATCAAGTATGGTCCCAATAACAGTTTCAATCGCTCCAATGT

In [5]:
#41423 - passage
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/EVA71_3h_P1_allConsensus.fasta"
start=745
end=7324
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered)
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVA71ORF_3h_P1.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVA71ORF_3h_P1.csv")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 0



In [6]:
#41423 - passage
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/EVA71_3h_P5_allConsensus.fasta"
start=745
end=7324
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered)
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVA71ORF_3h_P5.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive/filtConsensus_EVA71ORF_3h_P5.csv")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 10
ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCTAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTAT

In [7]:
#41423 - passage 1
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/EVA71_6h_P1_allConsensus.fasta"
start=745
end=7324
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered)
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/filtConsensus_EVA71ORF_6h_P1.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/filtConsensus_EVA71ORF_6h_P1.csv")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 348
ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCCAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTA

In [8]:
#41423 - passage 3
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/EVA71_6h_P3_allConsensus.fasta"
start=745
end=7324
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered,"ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCCAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTATCACTTTGGCCCCAATGTGTTCTGAATTTGCAGGCCTTAGACAAGCAGTTACGCAAGGGTTTCCTACTGAGCTGAAACCTGGCACAAACCAATTTTTAACCACTGACGATGGCGTCTCAGCACCCATTCTGCCAAACTTTCACCCCACCCCGTGTATCCATATACCCGGTGAAGTTAGAAACTTGCTAGAGCTATGCCAGGTGGAGACCATTTTAGAGGTCAACAATGTACCTACGAATGCCACTAGCTTAATGGAGAGACTGCGCTTCCCGGTCTCAGCTCAAGCCGGGAAAGGTGAGCTATGTGCAGTGTTCAGAGCTGACCCTGGACGAAGTGGGCCATGGCAGTCCACCTTGTTGGGCCAGTTGTGCGGGTACTACACCCAATGGTCAGGATCACTGGAAGTCACCTTCATGTTCACCGGGTCCTTTATGGCTACCGGCAAGATGCTCATAGCATACACACCACCAGGAGGCCCCTTACCCAAGGACCGGGCGACCGCCATGTTGGGCACGCACGTCATCTGGGACTTTGGGCTGCAATCGTCTGTCACCCTTGTAATACCATGGATCAGCAACACTCATTACAGAGCGCACGCTCGAGATGGTGTGTTCGACTACTACACTACAGGTTTGGTTAGCATATGGTACCAGACGAATTATGTGGTTCCAATTGGGGCACCCAATACAGCCTATATAATAGCATTGGCGGCAGCCCAGAAGAACTTCACCATGAAGTTGTGTAAGGATGCTAGTGATATCCTACAGACAGGCACTATCCAGGGAGATAGGGTGGCAGATGTGATTGAGAGTTCTATAGGGGACAGTGTGAGCAGAGCCCTCACCCGAGCTCTACCGGCACCTACCGGCCAAGACACACAGGTAAGCAGCCACCGATTAGATACTGGTAAAGTTCCAGCACTCCAAGCCGCTGAAATTGGAGCATCATCAAATGCTAGTGATGAGAGTATGATTGAGACACGGTGTGTTCTTAATTCACATAGTACAGCTGAGACCACTCTTGATAGCTTCTTCAGCAGAGCAGGATTAGTTGGAGAGATAGACCTCCCTCTTGAAGGCACAACCAACCCGAATGGGTACGCAAACTGGGACATAGACATAACAGGTTACGCGCAAATGCGTAGAAAGGTGGAGCTGTTCACCTACATGCGTTTTGACGCAGAGTTCACCTTTGTTGCATGCACCCCTACCGGGCAAGTTGTCCCGCAATTGCTCCAATACATGTTTGTACCACCCGGAGCCCCCAAGCCAGACTCCAGAGAATCTCTCGCATGGCAAACTGCCACTAATCCCTCAGTTTTTGTGAAGCTGTCAGACCCCCCAGCACAGGTTTCTGTTCCATTCATGTCACCTGCGAGCGCCTATCAATGGTTTTATGACGGGTATCCCACATTCGGTGAACACAAACAGGAGAAAGACCTTGAATACGGGGCATGCCCAAACAACATGATGGGTACGTTCTCAGTGCGGACTGTAGGCACCTCGAAGTCCAAGTACCCATTGGTGATCAGGATTTACATGAGGATGAAGCACGTCAGGGCGTGGATACCTCGCCCAATGCGTAACCAGAACTATCTATTCAAAGCCAACCCAAATTATGCTGGTAATTCTATTAAACCAACTGGTGCCAGTCGCACAGCAATCACCACCCTCGGGAAATTTGGACAGCAGTCCGGAGCTATCTACGTGGGCAACTTTAGAGTGGTTAACCGCCATCTTGCTACTCATAATGACTGGGCAAACCTTGTTTGGGAAGACAGCTCCCGCGACTTGCTCGTATCATCTACCACTGCTCAAGGTTGTGACACGATTGCTCGTTGCAATTGCCAGACAGGAGTGTATTATTGTAACTCAATGAGAAAACACTATCCGGTCAGTTTCTCGAAACCCAGTTTGATCTTCGTGGAGGCCAGCGAGTATTATCCAGCTAGATACCAGTCACATCTCATGCTTGCAGTGGGTCATTCGGAACCAGGGGATTGCGGTGGCATTCTTAGATGCCAACATGGCGTCGTAGGGATAGTTTCCACCGGGGGAAACGGCCTGGTGGGGTTCGCCGATGTGAGGGATCTCCTGTGGTTGGATGATGAAGCCATGGAGCAGGGCGTGTCTGATTACATTAAAGGGCTTGGAGATGCTTTTGGCATGGGGTTTACAGACGCAGTGTCAAGAGAAGTTGAAGCACTGAAAAGTCACTTGATCGGCTCAGAGGGTGCCGTGGAGAAGATTCTAAAGAACTTAGTTAAACTCATCTCTGCGCTCGTCATCGTCATCAGGAGTGATTATGACATGGTCACATTGACGGCAACACTTGCCCTGATCGGGTGCCACGGGAGCCCTTGGGCCTGGGTTAAGTCGAAGACAGCATCAATCTTGGGCATACCGATGGCTCAGAAGCAGAGTGCCTCTTGGTTAAAGAAGTTCAACGATGCGGCGAGTGCCGCGAAGGGGCTTGAGTGGATCTCCAACAAAATCAGTAAATTTATCGATTGGCTCAAGGAGAAAATCATACCGGCTGCTAAAGAGAAAGTCGAGTTTCTAAACAATCTAAAGCAACTCCCCTTATTGGAGAACCAAATTTCTAATCTCGAACAGTCAGCAGCTTCGCAGGAGGACCTTGAGGCGATGTTTGGCAACGTGTCTTATCTGGCCCACTTCTGCCGCAAATTCCAACCCCTCTATGCCACGGAAGCAAAGAGGGTGTACGCCCTAGAAAAGAGAATGAATAATTACATGCAGTTCAAGAGCAAACACCGTATTGAACCTGTATGCCTAATCATCAGAGGCTCGCCTGGTACTGGGAAGTCCTTGGCAACAGGGATTATTGCTAGAGCCATAGCAGACAAGTACCACTCCAGTGTGTATTCCTTACCTCCAGACCCAGACCACTTTGACGGATACAAACAACAGATCGTCACTGTTATGGACGACCTATGCCAAAACCCAGACGGGAAAGACATGTCACTATTTTGTCAGATGGTCTCCACAGTGGATTTTATACCGCCTATGGCATCTCTGGAGGAGAAGGGAGTCTCATTCACCTCCAAGTTTGTGATTGCCTCCACTAACGCCAGTAACATCATAGTGCCAACAGTCTCGGATTCAGATGCCATTCGTCGCCGGTTCTTTATGGACTGCGATATTGAGGTGACCGATTCCTATAAGACAGAGCTGGGCAGACTTGATGCAGGGAGAGCAGCCAGGCTGTGCTCTGAGAACAACACTGCAAACTTTAAACGGTGCAGTCCATTAGTCTGTGGGAAAGCAATCCAGCTTAGGGATAGGAAGTCCAAGGTGAGATACAGTGTGGACACGGTAGTGAGTGAACTTATCAGGGAGTATAACAACAGATCAGTTATTGGGAACACCATTGAAGCTCTTTTCCAAGGACCCCCTAAATTTAGACCAATAAGGATTAGCTTAGAGGAGAAGCCCGCACCTGATGCTATTAGTGACTTATTAGCTAGTGTTGATAGTGAAGAGGTTCGCCAATACTGTAGAGATCAGGGATGGATTGTACCTGATTCTCCCACCAACGTTGAGCGCCACTTGAATAGAGCTGTCTTGATTATGCAGTCTGTAGCCACCGTGGTAGCAGTTGTGTCCCTTGTTTACGTCATCTACAAGTTGTTCGCCGGTTTTCAAGGAGCATATTCCGGCGCCCCCAAGCAAACACTCAAGAAACCAGTGCTGCGCACGGCAACTGTGCAGGGGCCGAGCTTGGACTTCGCCCTATCTCTACTTAGGAGGAACATTAGGCAGGTCCAAACCGACCAGGGCCACTTTACAATGTTAGGAGTGCGAGACCGCTTGGCTGTGCTCCCCAGACACTCCCAACCAGGAAAGACCATCTGGGTTGAACACAAATTAGTGAAGATCGTAGATGCTGTGGAGTTAGTAGACGAACAAGGGGTTAACTTAGAGCTCACACTGGTAACGCTTGATACTAACGAAAAATTTAGAGACATCACAAGATTCATACCAGAAACAATTAGTCCTGCTAGTGATGCCACTTTAGTTATAAATACTGAACATATGCCCAGTATGTTTGTGCCAGTTGGAGATGTGGTCCAGTATGGGTTTTTGAACCTTAGTGGTAAGCCCACTCACAGGACTATGATGTACAATTTCCCAACAAAAGCAGGACAGTGTGGTGGTGTTGTGACTGCCGTGGGTAAAGTGATTGGGATCCACATTGGTGGCAACGGTAGGCAAGGTTTCTGCGCTGCCCTGAAGAGGGGATACTTTTGCAGTGAACAAGGTGAGATCCAATGGATGAAGCCCAACAAAGAAACTGGCAGGTTGAACATCAACGGACCTACTCGCACTAAGCTTGAACCAAGTGTCTTTCACGATGTGTTCGAAGGCACTAAAGAGCCAGCAGTGCTGACTAGTAAAGACCCAAGGCTGGAAGTTGACTTTGAACAGGCTCTTTTTTCAAAATACGTGGGGAACACGCTTCATGAACCCGACGAGTTTGTCAAGGAGGCGGCCTTACATTATGCCAACCAACTCAAGCAGTTAGATATCAAGACCACCAAGATGAGCATGGAGGATGCATGTTACGGCACAGAGAACCTGGAAGCTATAGATCTTCACACAAGTGCAGGATATCCATACAGTGCACTAGGCATCAAGAAAAAGGACATTTTGGATCCAACAACTCGCGATGTCAGCAAGATGAAATTCTACATGGACAAGTATGGGTTGGATCTACCGTACTCTACTTATGTTAAAGATGAACTTAGGGCCATCGACAAGATCAAGAAAGGGAAGTCTCGTCTCATAGAAGCGAGCAGTCTAAATGACTCAGTGTACTTGAGAATGACATTTGGGCACCTTTATGAAGCTTTCCACGCCAATCCAGGTACAATCACTGGTTCAGCTGTTGGGTGTAACCCAGATGTGTTCTGGAGCAAGTTACCAATTCTACTTCCAGGATCGCTTTTCGCGTTTGACTACTCGGGGTATGACGCTAGTCTCAGCCCAGTGTGGTTCAGGGCGCTGGAGATAGTCCTGCGGGAAATTGGATACTCCGAAGACGCAGTGTCTCTCATAGAAGGGATCAATCACACCCATCATGTGTACCGCAATAAAACTTATTGTGTTCTTGGGGGAATGCCCTCAGGTTGCTCAGGCACCTCCATTTTCAACTCGATGATCAACAATATCATTATTAGAACACTCCTGATTAAAACATTCAAAGGGATAGATCTAGATGAACTGAACATGGTGGCCTACGGGGATGATGTGTTGGCTAGTTACCCCTTCCCAATTGACTGTCTGGAGTTGGCAAGAACAGGCAAGGAGTATGGTCTAACTATGACCCCTGCCGACAAGTCACCCTGCTTTAATGAGGTTACATGGGAGAATGCCACTTTCTTGAAGAGAGGATTCTTGCCTGATCATCAATTCCCGTTTCTCATCCACCCTACGATGCCAATGAGGGAGATTCACGAATCCATTCGTTGGACCAAAGATGCACGAAGTACTCAAGATCACGTGCGCTCCCTCTGCTTATTAGCATGGCACAACGGGAAAGAGGAGTATGAAAAATTTGTGAGTGCAATCAGATCAGTTCCAATTGGAAAAGCATTGGCTATACCAAATTATGAGAATCTGAGAAGAAATTGGCTCGAATTGTTT")
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/filtConsensus_EVA71ORF_6h_P3.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/filtConsensus_EVA71ORF_6h_P3.csv")
print("done")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 220
done


In [4]:
#41423 - passage 5
infile="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/EVA71_6h_P5_allConsensus.fasta"
start=745
end=7324
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=25,lengthMin=1)
output=genotypeSummary(filtered,"ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCCAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTATCACTTTGGCCCCAATGTGTTCTGAATTTGCAGGCCTTAGACAAGCAGTTACGCAAGGGTTTCCTACTGAGCTGAAACCTGGCACAAACCAATTTTTAACCACTGACGATGGCGTCTCAGCACCCATTCTGCCAAACTTTCACCCCACCCCGTGTATCCATATACCCGGTGAAGTTAGAAACTTGCTAGAGCTATGCCAGGTGGAGACCATTTTAGAGGTCAACAATGTACCTACGAATGCCACTAGCTTAATGGAGAGACTGCGCTTCCCGGTCTCAGCTCAAGCCGGGAAAGGTGAGCTATGTGCAGTGTTCAGAGCTGACCCTGGACGAAGTGGGCCATGGCAGTCCACCTTGTTGGGCCAGTTGTGCGGGTACTACACCCAATGGTCAGGATCACTGGAAGTCACCTTCATGTTCACCGGGTCCTTTATGGCTACCGGCAAGATGCTCATAGCATACACACCACCAGGAGGCCCCTTACCCAAGGACCGGGCGACCGCCATGTTGGGCACGCACGTCATCTGGGACTTTGGGCTGCAATCGTCTGTCACCCTTGTAATACCATGGATCAGCAACACTCATTACAGAGCGCACGCTCGAGATGGTGTGTTCGACTACTACACTACAGGTTTGGTTAGCATATGGTACCAGACGAATTATGTGGTTCCAATTGGGGCACCCAATACAGCCTATATAATAGCATTGGCGGCAGCCCAGAAGAACTTCACCATGAAGTTGTGTAAGGATGCTAGTGATATCCTACAGACAGGCACTATCCAGGGAGATAGGGTGGCAGATGTGATTGAGAGTTCTATAGGGGACAGTGTGAGCAGAGCCCTCACCCGAGCTCTACCGGCACCTACCGGCCAAGACACACAGGTAAGCAGCCACCGATTAGATACTGGTAAAGTTCCAGCACTCCAAGCCGCTGAAATTGGAGCATCATCAAATGCTAGTGATGAGAGTATGATTGAGACACGGTGTGTTCTTAATTCACATAGTACAGCTGAGACCACTCTTGATAGCTTCTTCAGCAGAGCAGGATTAGTTGGAGAGATAGACCTCCCTCTTGAAGGCACAACCAACCCGAATGGGTACGCAAACTGGGACATAGACATAACAGGTTACGCGCAAATGCGTAGAAAGGTGGAGCTGTTCACCTACATGCGTTTTGACGCAGAGTTCACCTTTGTTGCATGCACCCCTACCGGGCAAGTTGTCCCGCAATTGCTCCAATACATGTTTGTACCACCCGGAGCCCCCAAGCCAGACTCCAGAGAATCTCTCGCATGGCAAACTGCCACTAATCCCTCAGTTTTTGTGAAGCTGTCAGACCCCCCAGCACAGGTTTCTGTTCCATTCATGTCACCTGCGAGCGCCTATCAATGGTTTTATGACGGGTATCCCACATTCGGTGAACACAAACAGGAGAAAGACCTTGAATACGGGGCATGCCCAAACAACATGATGGGTACGTTCTCAGTGCGGACTGTAGGCACCTCGAAGTCCAAGTACCCATTGGTGATCAGGATTTACATGAGGATGAAGCACGTCAGGGCGTGGATACCTCGCCCAATGCGTAACCAGAACTATCTATTCAAAGCCAACCCAAATTATGCTGGTAATTCTATTAAACCAACTGGTGCCAGTCGCACAGCAATCACCACCCTCGGGAAATTTGGACAGCAGTCCGGAGCTATCTACGTGGGCAACTTTAGAGTGGTTAACCGCCATCTTGCTACTCATAATGACTGGGCAAACCTTGTTTGGGAAGACAGCTCCCGCGACTTGCTCGTATCATCTACCACTGCTCAAGGTTGTGACACGATTGCTCGTTGCAATTGCCAGACAGGAGTGTATTATTGTAACTCAATGAGAAAACACTATCCGGTCAGTTTCTCGAAACCCAGTTTGATCTTCGTGGAGGCCAGCGAGTATTATCCAGCTAGATACCAGTCACATCTCATGCTTGCAGTGGGTCATTCGGAACCAGGGGATTGCGGTGGCATTCTTAGATGCCAACATGGCGTCGTAGGGATAGTTTCCACCGGGGGAAACGGCCTGGTGGGGTTCGCCGATGTGAGGGATCTCCTGTGGTTGGATGATGAAGCCATGGAGCAGGGCGTGTCTGATTACATTAAAGGGCTTGGAGATGCTTTTGGCATGGGGTTTACAGACGCAGTGTCAAGAGAAGTTGAAGCACTGAAAAGTCACTTGATCGGCTCAGAGGGTGCCGTGGAGAAGATTCTAAAGAACTTAGTTAAACTCATCTCTGCGCTCGTCATCGTCATCAGGAGTGATTATGACATGGTCACATTGACGGCAACACTTGCCCTGATCGGGTGCCACGGGAGCCCTTGGGCCTGGGTTAAGTCGAAGACAGCATCAATCTTGGGCATACCGATGGCTCAGAAGCAGAGTGCCTCTTGGTTAAAGAAGTTCAACGATGCGGCGAGTGCCGCGAAGGGGCTTGAGTGGATCTCCAACAAAATCAGTAAATTTATCGATTGGCTCAAGGAGAAAATCATACCGGCTGCTAAAGAGAAAGTCGAGTTTCTAAACAATCTAAAGCAACTCCCCTTATTGGAGAACCAAATTTCTAATCTCGAACAGTCAGCAGCTTCGCAGGAGGACCTTGAGGCGATGTTTGGCAACGTGTCTTATCTGGCCCACTTCTGCCGCAAATTCCAACCCCTCTATGCCACGGAAGCAAAGAGGGTGTACGCCCTAGAAAAGAGAATGAATAATTACATGCAGTTCAAGAGCAAACACCGTATTGAACCTGTATGCCTAATCATCAGAGGCTCGCCTGGTACTGGGAAGTCCTTGGCAACAGGGATTATTGCTAGAGCCATAGCAGACAAGTACCACTCCAGTGTGTATTCCTTACCTCCAGACCCAGACCACTTTGACGGATACAAACAACAGATCGTCACTGTTATGGACGACCTATGCCAAAACCCAGACGGGAAAGACATGTCACTATTTTGTCAGATGGTCTCCACAGTGGATTTTATACCGCCTATGGCATCTCTGGAGGAGAAGGGAGTCTCATTCACCTCCAAGTTTGTGATTGCCTCCACTAACGCCAGTAACATCATAGTGCCAACAGTCTCGGATTCAGATGCCATTCGTCGCCGGTTCTTTATGGACTGCGATATTGAGGTGACCGATTCCTATAAGACAGAGCTGGGCAGACTTGATGCAGGGAGAGCAGCCAGGCTGTGCTCTGAGAACAACACTGCAAACTTTAAACGGTGCAGTCCATTAGTCTGTGGGAAAGCAATCCAGCTTAGGGATAGGAAGTCCAAGGTGAGATACAGTGTGGACACGGTAGTGAGTGAACTTATCAGGGAGTATAACAACAGATCAGTTATTGGGAACACCATTGAAGCTCTTTTCCAAGGACCCCCTAAATTTAGACCAATAAGGATTAGCTTAGAGGAGAAGCCCGCACCTGATGCTATTAGTGACTTATTAGCTAGTGTTGATAGTGAAGAGGTTCGCCAATACTGTAGAGATCAGGGATGGATTGTACCTGATTCTCCCACCAACGTTGAGCGCCACTTGAATAGAGCTGTCTTGATTATGCAGTCTGTAGCCACCGTGGTAGCAGTTGTGTCCCTTGTTTACGTCATCTACAAGTTGTTCGCCGGTTTTCAAGGAGCATATTCCGGCGCCCCCAAGCAAACACTCAAGAAACCAGTGCTGCGCACGGCAACTGTGCAGGGGCCGAGCTTGGACTTCGCCCTATCTCTACTTAGGAGGAACATTAGGCAGGTCCAAACCGACCAGGGCCACTTTACAATGTTAGGAGTGCGAGACCGCTTGGCTGTGCTCCCCAGACACTCCCAACCAGGAAAGACCATCTGGGTTGAACACAAATTAGTGAAGATCGTAGATGCTGTGGAGTTAGTAGACGAACAAGGGGTTAACTTAGAGCTCACACTGGTAACGCTTGATACTAACGAAAAATTTAGAGACATCACAAGATTCATACCAGAAACAATTAGTCCTGCTAGTGATGCCACTTTAGTTATAAATACTGAACATATGCCCAGTATGTTTGTGCCAGTTGGAGATGTGGTCCAGTATGGGTTTTTGAACCTTAGTGGTAAGCCCACTCACAGGACTATGATGTACAATTTCCCAACAAAAGCAGGACAGTGTGGTGGTGTTGTGACTGCCGTGGGTAAAGTGATTGGGATCCACATTGGTGGCAACGGTAGGCAAGGTTTCTGCGCTGCCCTGAAGAGGGGATACTTTTGCAGTGAACAAGGTGAGATCCAATGGATGAAGCCCAACAAAGAAACTGGCAGGTTGAACATCAACGGACCTACTCGCACTAAGCTTGAACCAAGTGTCTTTCACGATGTGTTCGAAGGCACTAAAGAGCCAGCAGTGCTGACTAGTAAAGACCCAAGGCTGGAAGTTGACTTTGAACAGGCTCTTTTTTCAAAATACGTGGGGAACACGCTTCATGAACCCGACGAGTTTGTCAAGGAGGCGGCCTTACATTATGCCAACCAACTCAAGCAGTTAGATATCAAGACCACCAAGATGAGCATGGAGGATGCATGTTACGGCACAGAGAACCTGGAAGCTATAGATCTTCACACAAGTGCAGGATATCCATACAGTGCACTAGGCATCAAGAAAAAGGACATTTTGGATCCAACAACTCGCGATGTCAGCAAGATGAAATTCTACATGGACAAGTATGGGTTGGATCTACCGTACTCTACTTATGTTAAAGATGAACTTAGGGCCATCGACAAGATCAAGAAAGGGAAGTCTCGTCTCATAGAAGCGAGCAGTCTAAATGACTCAGTGTACTTGAGAATGACATTTGGGCACCTTTATGAAGCTTTCCACGCCAATCCAGGTACAATCACTGGTTCAGCTGTTGGGTGTAACCCAGATGTGTTCTGGAGCAAGTTACCAATTCTACTTCCAGGATCGCTTTTCGCGTTTGACTACTCGGGGTATGACGCTAGTCTCAGCCCAGTGTGGTTCAGGGCGCTGGAGATAGTCCTGCGGGAAATTGGATACTCCGAAGACGCAGTGTCTCTCATAGAAGGGATCAATCACACCCATCATGTGTACCGCAATAAAACTTATTGTGTTCTTGGGGGAATGCCCTCAGGTTGCTCAGGCACCTCCATTTTCAACTCGATGATCAACAATATCATTATTAGAACACTCCTGATTAAAACATTCAAAGGGATAGATCTAGATGAACTGAACATGGTGGCCTACGGGGATGATGTGTTGGCTAGTTACCCCTTCCCAATTGACTGTCTGGAGTTGGCAAGAACAGGCAAGGAGTATGGTCTAACTATGACCCCTGCCGACAAGTCACCCTGCTTTAATGAGGTTACATGGGAGAATGCCACTTTCTTGAAGAGAGGATTCTTGCCTGATCATCAATTCCCGTTTCTCATCCACCCTACGATGCCAATGAGGGAGATTCACGAATCCATTCGTTGGACCAAAGATGCACGAAGTACTCAAGATCACGTGCGCTCCCTCTGCTTATTAGCATGGCACAACGGGAAAGAGGAGTATGAAAAATTTGTGAGTGCAATCAGATCAGTTCCAATTGGAAAAGCATTGGCTATACCAAATTATGAGAATCTGAGAAGAAATTGGCTCGAATTGTTT")
io.write(output,handle="/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/filtConsensus_EVA71ORF_6h_P5.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects/PacBio_virus-inclusive_EVA71Passage/filtConsensus_EVA71ORF_6h_P5.csv")
print("done")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 497
done


In [None]:
#41423 - passage
#! cat /hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib1_RD_ATCC_S1_CBC/pUC19*BC.fasta > /hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib1_RD_ATCC_S1_CBC/allConsensus.fasta
infile="/hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib2_RD_ATCC_S2.fasta"
start=745
end=952
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=1,lengthMin=1)
output=genotypeSummary(filtered,"ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCCAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTATCACTTTGGCCCCAATGTGTTCTGAATTTGCAGGCCTTAGACAAGCAGTTACGCAAGGGTTTCCTACTGAGCTGAAACCTGGCACAAACCAATTTTTAACCACTGACGATGGCGTCTCAGCACCCATTCTGCCAAACTTTCACCCCACCCCGTGTATCCATATACCCGGTGAAGTTAGAAACTTGCTAGAGCTATGCCAGGTGGAGACCATTTTAGAGGTCAACAATGTACCTACGAATGCCACTAGCTTAATGGAGAGACTGCGCTTCCCGGTCTCAGCTCAAGCCGGGAAAGGTGAGCTATGTGCAGTGTTCAGAGCTGACCCTGGACGAAGTGGGCCATGGCAGTCCACCTTGTTGGGCCAGTTGTGCGGGTACTACACCCAATGGTCAGGATCACTGGAAGTCACCTTCATGTTCACCGGGTCCTTTATGGCTACCGGCAAGATGCTCATAGCATACACACCACCAGGAGGCCCCTTACCCAAGGACCGGGCGACCGCCATGTTGGGCACGCACGTCATCTGGGACTTTGGGCTGCAATCGTCTGTCACCCTTGTAATACCATGGATCAGCAACACTCATTACAGAGCGCACGCTCGAGATGGTGTGTTCGACTACTACACTACAGGTTTGGTTAGCATATGGTACCAGACGAATTATGTGGTTCCAATTGGGGCACCCAATACAGCCTATATAATAGCATTGGCGGCAGCCCAGAAGAACTTCACCATGAAGTTGTGTAAGGATGCTAGTGATATCCTACAGACAGGCACTATCCAGGGAGATAGGGTGGCAGATGTGATTGAGAGTTCTATAGGGGACAGTGTGAGCAGAGCCCTCACCCGAGCTCTACCGGCACCTACCGGCCAAGACACACAGGTAAGCAGCCACCGATTAGATACTGGTAAAGTTCCAGCACTCCAAGCCGCTGAAATTGGAGCATCATCAAATGCTAGTGATGAGAGTATGATTGAGACACGGTGTGTTCTTAATTCACATAGTACAGCTGAGACCACTCTTGATAGCTTCTTCAGCAGAGCAGGATTAGTTGGAGAGATAGACCTCCCTCTTGAAGGCACAACCAACCCGAATGGGTACGCAAACTGGGACATAGACATAACAGGTTACGCGCAAATGCGTAGAAAGGTGGAGCTGTTCACCTACATGCGTTTTGACGCAGAGTTCACCTTTGTTGCATGCACCCCTACCGGGCAAGTTGTCCCGCAATTGCTCCAATACATGTTTGTACCACCCGGAGCCCCCAAGCCAGACTCCAGAGAATCTCTCGCATGGCAAACTGCCACTAATCCCTCAGTTTTTGTGAAGCTGTCAGACCCCCCAGCACAGGTTTCTGTTCCATTCATGTCACCTGCGAGCGCCTATCAATGGTTTTATGACGGGTATCCCACATTCGGTGAACACAAACAGGAGAAAGACCTTGAATACGGGGCATGCCCAAACAACATGATGGGTACGTTCTCAGTGCGGACTGTAGGCACCTCGAAGTCCAAGTACCCATTGGTGATCAGGATTTACATGAGGATGAAGCACGTCAGGGCGTGGATACCTCGCCCAATGCGTAACCAGAACTATCTATTCAAAGCCAACCCAAATTATGCTGGTAATTCTATTAAACCAACTGGTGCCAGTCGCACAGCAATCACCACCCTCGGGAAATTTGGACAGCAGTCCGGAGCTATCTACGTGGGCAACTTTAGAGTGGTTAACCGCCATCTTGCTACTCATAATGACTGGGCAAACCTTGTTTGGGAAGACAGCTCCCGCGACTTGCTCGTATCATCTACCACTGCTCAAGGTTGTGACACGATTGCTCGTTGCAATTGCCAGACAGGAGTGTATTATTGTAACTCAATGAGAAAACACTATCCGGTCAGTTTCTCGAAACCCAGTTTGATCTTCGTGGAGGCCAGCGAGTATTATCCAGCTAGATACCAGTCACATCTCATGCTTGCAGTGGGTCATTCGGAACCAGGGGATTGCGGTGGCATTCTTAGATGCCAACATGGCGTCGTAGGGATAGTTTCCACCGGGGGAAACGGCCTGGTGGGGTTCGCCGATGTGAGGGATCTCCTGTGGTTGGATGATGAAGCCATGGAGCAGGGCGTGTCTGATTACATTAAAGGGCTTGGAGATGCTTTTGGCATGGGGTTTACAGACGCAGTGTCAAGAGAAGTTGAAGCACTGAAAAGTCACTTGATCGGCTCAGAGGGTGCCGTGGAGAAGATTCTAAAGAACTTAGTTAAACTCATCTCTGCGCTCGTCATCGTCATCAGGAGTGATTATGACATGGTCACATTGACGGCAACACTTGCCCTGATCGGGTGCCACGGGAGCCCTTGGGCCTGGGTTAAGTCGAAGACAGCATCAATCTTGGGCATACCGATGGCTCAGAAGCAGAGTGCCTCTTGGTTAAAGAAGTTCAACGATGCGGCGAGTGCCGCGAAGGGGCTTGAGTGGATCTCCAACAAAATCAGTAAATTTATCGATTGGCTCAAGGAGAAAATCATACCGGCTGCTAAAGAGAAAGTCGAGTTTCTAAACAATCTAAAGCAACTCCCCTTATTGGAGAACCAAATTTCTAATCTCGAACAGTCAGCAGCTTCGCAGGAGGACCTTGAGGCGATGTTTGGCAACGTGTCTTATCTGGCCCACTTCTGCCGCAAATTCCAACCCCTCTATGCCACGGAAGCAAAGAGGGTGTACGCCCTAGAAAAGAGAATGAATAATTACATGCAGTTCAAGAGCAAACACCGTATTGAACCTGTATGCCTAATCATCAGAGGCTCGCCTGGTACTGGGAAGTCCTTGGCAACAGGGATTATTGCTAGAGCCATAGCAGACAAGTACCACTCCAGTGTGTATTCCTTACCTCCAGACCCAGACCACTTTGACGGATACAAACAACAGATCGTCACTGTTATGGACGACCTATGCCAAAACCCAGACGGGAAAGACATGTCACTATTTTGTCAGATGGTCTCCACAGTGGATTTTATACCGCCTATGGCATCTCTGGAGGAGAAGGGAGTCTCATTCACCTCCAAGTTTGTGATTGCCTCCACTAACGCCAGTAACATCATAGTGCCAACAGTCTCGGATTCAGATGCCATTCGTCGCCGGTTCTTTATGGACTGCGATATTGAGGTGACCGATTCCTATAAGACAGAGCTGGGCAGACTTGATGCAGGGAGAGCAGCCAGGCTGTGCTCTGAGAACAACACTGCAAACTTTAAACGGTGCAGTCCATTAGTCTGTGGGAAAGCAATCCAGCTTAGGGATAGGAAGTCCAAGGTGAGATACAGTGTGGACACGGTAGTGAGTGAACTTATCAGGGAGTATAACAACAGATCAGTTATTGGGAACACCATTGAAGCTCTTTTCCAAGGACCCCCTAAATTTAGACCAATAAGGATTAGCTTAGAGGAGAAGCCCGCACCTGATGCTATTAGTGACTTATTAGCTAGTGTTGATAGTGAAGAGGTTCGCCAATACTGTAGAGATCAGGGATGGATTGTACCTGATTCTCCCACCAACGTTGAGCGCCACTTGAATAGAGCTGTCTTGATTATGCAGTCTGTAGCCACCGTGGTAGCAGTTGTGTCCCTTGTTTACGTCATCTACAAGTTGTTCGCCGGTTTTCAAGGAGCATATTCCGGCGCCCCCAAGCAAACACTCAAGAAACCAGTGCTGCGCACGGCAACTGTGCAGGGGCCGAGCTTGGACTTCGCCCTATCTCTACTTAGGAGGAACATTAGGCAGGTCCAAACCGACCAGGGCCACTTTACAATGTTAGGAGTGCGAGACCGCTTGGCTGTGCTCCCCAGACACTCCCAACCAGGAAAGACCATCTGGGTTGAACACAAATTAGTGAAGATCGTAGATGCTGTGGAGTTAGTAGACGAACAAGGGGTTAACTTAGAGCTCACACTGGTAACGCTTGATACTAACGAAAAATTTAGAGACATCACAAGATTCATACCAGAAACAATTAGTCCTGCTAGTGATGCCACTTTAGTTATAAATACTGAACATATGCCCAGTATGTTTGTGCCAGTTGGAGATGTGGTCCAGTATGGGTTTTTGAACCTTAGTGGTAAGCCCACTCACAGGACTATGATGTACAATTTCCCAACAAAAGCAGGACAGTGTGGTGGTGTTGTGACTGCCGTGGGTAAAGTGATTGGGATCCACATTGGTGGCAACGGTAGGCAAGGTTTCTGCGCTGCCCTGAAGAGGGGATACTTTTGCAGTGAACAAGGTGAGATCCAATGGATGAAGCCCAACAAAGAAACTGGCAGGTTGAACATCAACGGACCTACTCGCACTAAGCTTGAACCAAGTGTCTTTCACGATGTGTTCGAAGGCACTAAAGAGCCAGCAGTGCTGACTAGTAAAGACCCAAGGCTGGAAGTTGACTTTGAACAGGCTCTTTTTTCAAAATACGTGGGGAACACGCTTCATGAACCCGACGAGTTTGTCAAGGAGGCGGCCTTACATTATGCCAACCAACTCAAGCAGTTAGATATCAAGACCACCAAGATGAGCATGGAGGATGCATGTTACGGCACAGAGAACCTGGAAGCTATAGATCTTCACACAAGTGCAGGATATCCATACAGTGCACTAGGCATCAAGAAAAAGGACATTTTGGATCCAACAACTCGCGATGTCAGCAAGATGAAATTCTACATGGACAAGTATGGGTTGGATCTACCGTACTCTACTTATGTTAAAGATGAACTTAGGGCCATCGACAAGATCAAGAAAGGGAAGTCTCGTCTCATAGAAGCGAGCAGTCTAAATGACTCAGTGTACTTGAGAATGACATTTGGGCACCTTTATGAAGCTTTCCACGCCAATCCAGGTACAATCACTGGTTCAGCTGTTGGGTGTAACCCAGATGTGTTCTGGAGCAAGTTACCAATTCTACTTCCAGGATCGCTTTTCGCGTTTGACTACTCGGGGTATGACGCTAGTCTCAGCCCAGTGTGGTTCAGGGCGCTGGAGATAGTCCTGCGGGAAATTGGATACTCCGAAGACGCAGTGTCTCTCATAGAAGGGATCAATCACACCCATCATGTGTACCGCAATAAAACTTATTGTGTTCTTGGGGGAATGCCCTCAGGTTGCTCAGGCACCTCCATTTTCAACTCGATGATCAACAATATCATTATTAGAACACTCCTGATTAAAACATTCAAAGGGATAGATCTAGATGAACTGAACATGGTGGCCTACGGGGATGATGTGTTGGCTAGTTACCCCTTCCCAATTGACTGTCTGGAGTTGGCAAGAACAGGCAAGGAGTATGGTCTAACTATGACCCCTGCCGACAAGTCACCCTGCTTTAATGAGGTTACATGGGAGAATGCCACTTTCTTGAAGAGAGGATTCTTGCCTGATCATCAATTCCCGTTTCTCATCCACCCTACGATGCCAATGAGGGAGATTCACGAATCCATTCGTTGGACCAAAGATGCACGAAGTACTCAAGATCACGTGCGCTCCCTCTGCTTATTAGCATGGCACAACGGGAAAGAGGAGTATGAAAAATTTGTGAGTGCAATCAGATCAGTTCCAATTGGAAAAGCATTGGCTATACCAAATTATGAGAATCTGAGAAGAAATTGGCTCGAATTGTTT")
io.write(output,handle="/hpcdata/lvd_qve/Projects/vp4dms/PTD_analysis/VP4_sr_anchovy/filtConsensus_VP4_SubLib2_RD_ATCC_S2_CBC.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects//vp4dms/PTD_analysis/VP4_sr_anchovy/filtConsensus_VP4_SubLib2_RD_ATCC_S2.csv")

In [26]:
#41423 - passage
#! cat /hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib1_RD_ATCC_S1_CBC/pUC19*BC.fasta > /hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib1_RD_ATCC_S1_CBC/allConsensus.fasta
infile="/hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib1_RD_Andino_S5.fasta"
start=745
end=952
elements,length,depth = readSeqs(infile)
filtered = selectSeqs(start,end,elements,length,depth,depthMin=1,lengthMin=1)
output=genotypeSummary(filtered,"ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCCAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTATCACTTTGGCCCCAATGTGTTCTGAATTTGCAGGCCTTAGACAAGCAGTTACGCAAGGGTTTCCTACTGAGCTGAAACCTGGCACAAACCAATTTTTAACCACTGACGATGGCGTCTCAGCACCCATTCTGCCAAACTTTCACCCCACCCCGTGTATCCATATACCCGGTGAAGTTAGAAACTTGCTAGAGCTATGCCAGGTGGAGACCATTTTAGAGGTCAACAATGTACCTACGAATGCCACTAGCTTAATGGAGAGACTGCGCTTCCCGGTCTCAGCTCAAGCCGGGAAAGGTGAGCTATGTGCAGTGTTCAGAGCTGACCCTGGACGAAGTGGGCCATGGCAGTCCACCTTGTTGGGCCAGTTGTGCGGGTACTACACCCAATGGTCAGGATCACTGGAAGTCACCTTCATGTTCACCGGGTCCTTTATGGCTACCGGCAAGATGCTCATAGCATACACACCACCAGGAGGCCCCTTACCCAAGGACCGGGCGACCGCCATGTTGGGCACGCACGTCATCTGGGACTTTGGGCTGCAATCGTCTGTCACCCTTGTAATACCATGGATCAGCAACACTCATTACAGAGCGCACGCTCGAGATGGTGTGTTCGACTACTACACTACAGGTTTGGTTAGCATATGGTACCAGACGAATTATGTGGTTCCAATTGGGGCACCCAATACAGCCTATATAATAGCATTGGCGGCAGCCCAGAAGAACTTCACCATGAAGTTGTGTAAGGATGCTAGTGATATCCTACAGACAGGCACTATCCAGGGAGATAGGGTGGCAGATGTGATTGAGAGTTCTATAGGGGACAGTGTGAGCAGAGCCCTCACCCGAGCTCTACCGGCACCTACCGGCCAAGACACACAGGTAAGCAGCCACCGATTAGATACTGGTAAAGTTCCAGCACTCCAAGCCGCTGAAATTGGAGCATCATCAAATGCTAGTGATGAGAGTATGATTGAGACACGGTGTGTTCTTAATTCACATAGTACAGCTGAGACCACTCTTGATAGCTTCTTCAGCAGAGCAGGATTAGTTGGAGAGATAGACCTCCCTCTTGAAGGCACAACCAACCCGAATGGGTACGCAAACTGGGACATAGACATAACAGGTTACGCGCAAATGCGTAGAAAGGTGGAGCTGTTCACCTACATGCGTTTTGACGCAGAGTTCACCTTTGTTGCATGCACCCCTACCGGGCAAGTTGTCCCGCAATTGCTCCAATACATGTTTGTACCACCCGGAGCCCCCAAGCCAGACTCCAGAGAATCTCTCGCATGGCAAACTGCCACTAATCCCTCAGTTTTTGTGAAGCTGTCAGACCCCCCAGCACAGGTTTCTGTTCCATTCATGTCACCTGCGAGCGCCTATCAATGGTTTTATGACGGGTATCCCACATTCGGTGAACACAAACAGGAGAAAGACCTTGAATACGGGGCATGCCCAAACAACATGATGGGTACGTTCTCAGTGCGGACTGTAGGCACCTCGAAGTCCAAGTACCCATTGGTGATCAGGATTTACATGAGGATGAAGCACGTCAGGGCGTGGATACCTCGCCCAATGCGTAACCAGAACTATCTATTCAAAGCCAACCCAAATTATGCTGGTAATTCTATTAAACCAACTGGTGCCAGTCGCACAGCAATCACCACCCTCGGGAAATTTGGACAGCAGTCCGGAGCTATCTACGTGGGCAACTTTAGAGTGGTTAACCGCCATCTTGCTACTCATAATGACTGGGCAAACCTTGTTTGGGAAGACAGCTCCCGCGACTTGCTCGTATCATCTACCACTGCTCAAGGTTGTGACACGATTGCTCGTTGCAATTGCCAGACAGGAGTGTATTATTGTAACTCAATGAGAAAACACTATCCGGTCAGTTTCTCGAAACCCAGTTTGATCTTCGTGGAGGCCAGCGAGTATTATCCAGCTAGATACCAGTCACATCTCATGCTTGCAGTGGGTCATTCGGAACCAGGGGATTGCGGTGGCATTCTTAGATGCCAACATGGCGTCGTAGGGATAGTTTCCACCGGGGGAAACGGCCTGGTGGGGTTCGCCGATGTGAGGGATCTCCTGTGGTTGGATGATGAAGCCATGGAGCAGGGCGTGTCTGATTACATTAAAGGGCTTGGAGATGCTTTTGGCATGGGGTTTACAGACGCAGTGTCAAGAGAAGTTGAAGCACTGAAAAGTCACTTGATCGGCTCAGAGGGTGCCGTGGAGAAGATTCTAAAGAACTTAGTTAAACTCATCTCTGCGCTCGTCATCGTCATCAGGAGTGATTATGACATGGTCACATTGACGGCAACACTTGCCCTGATCGGGTGCCACGGGAGCCCTTGGGCCTGGGTTAAGTCGAAGACAGCATCAATCTTGGGCATACCGATGGCTCAGAAGCAGAGTGCCTCTTGGTTAAAGAAGTTCAACGATGCGGCGAGTGCCGCGAAGGGGCTTGAGTGGATCTCCAACAAAATCAGTAAATTTATCGATTGGCTCAAGGAGAAAATCATACCGGCTGCTAAAGAGAAAGTCGAGTTTCTAAACAATCTAAAGCAACTCCCCTTATTGGAGAACCAAATTTCTAATCTCGAACAGTCAGCAGCTTCGCAGGAGGACCTTGAGGCGATGTTTGGCAACGTGTCTTATCTGGCCCACTTCTGCCGCAAATTCCAACCCCTCTATGCCACGGAAGCAAAGAGGGTGTACGCCCTAGAAAAGAGAATGAATAATTACATGCAGTTCAAGAGCAAACACCGTATTGAACCTGTATGCCTAATCATCAGAGGCTCGCCTGGTACTGGGAAGTCCTTGGCAACAGGGATTATTGCTAGAGCCATAGCAGACAAGTACCACTCCAGTGTGTATTCCTTACCTCCAGACCCAGACCACTTTGACGGATACAAACAACAGATCGTCACTGTTATGGACGACCTATGCCAAAACCCAGACGGGAAAGACATGTCACTATTTTGTCAGATGGTCTCCACAGTGGATTTTATACCGCCTATGGCATCTCTGGAGGAGAAGGGAGTCTCATTCACCTCCAAGTTTGTGATTGCCTCCACTAACGCCAGTAACATCATAGTGCCAACAGTCTCGGATTCAGATGCCATTCGTCGCCGGTTCTTTATGGACTGCGATATTGAGGTGACCGATTCCTATAAGACAGAGCTGGGCAGACTTGATGCAGGGAGAGCAGCCAGGCTGTGCTCTGAGAACAACACTGCAAACTTTAAACGGTGCAGTCCATTAGTCTGTGGGAAAGCAATCCAGCTTAGGGATAGGAAGTCCAAGGTGAGATACAGTGTGGACACGGTAGTGAGTGAACTTATCAGGGAGTATAACAACAGATCAGTTATTGGGAACACCATTGAAGCTCTTTTCCAAGGACCCCCTAAATTTAGACCAATAAGGATTAGCTTAGAGGAGAAGCCCGCACCTGATGCTATTAGTGACTTATTAGCTAGTGTTGATAGTGAAGAGGTTCGCCAATACTGTAGAGATCAGGGATGGATTGTACCTGATTCTCCCACCAACGTTGAGCGCCACTTGAATAGAGCTGTCTTGATTATGCAGTCTGTAGCCACCGTGGTAGCAGTTGTGTCCCTTGTTTACGTCATCTACAAGTTGTTCGCCGGTTTTCAAGGAGCATATTCCGGCGCCCCCAAGCAAACACTCAAGAAACCAGTGCTGCGCACGGCAACTGTGCAGGGGCCGAGCTTGGACTTCGCCCTATCTCTACTTAGGAGGAACATTAGGCAGGTCCAAACCGACCAGGGCCACTTTACAATGTTAGGAGTGCGAGACCGCTTGGCTGTGCTCCCCAGACACTCCCAACCAGGAAAGACCATCTGGGTTGAACACAAATTAGTGAAGATCGTAGATGCTGTGGAGTTAGTAGACGAACAAGGGGTTAACTTAGAGCTCACACTGGTAACGCTTGATACTAACGAAAAATTTAGAGACATCACAAGATTCATACCAGAAACAATTAGTCCTGCTAGTGATGCCACTTTAGTTATAAATACTGAACATATGCCCAGTATGTTTGTGCCAGTTGGAGATGTGGTCCAGTATGGGTTTTTGAACCTTAGTGGTAAGCCCACTCACAGGACTATGATGTACAATTTCCCAACAAAAGCAGGACAGTGTGGTGGTGTTGTGACTGCCGTGGGTAAAGTGATTGGGATCCACATTGGTGGCAACGGTAGGCAAGGTTTCTGCGCTGCCCTGAAGAGGGGATACTTTTGCAGTGAACAAGGTGAGATCCAATGGATGAAGCCCAACAAAGAAACTGGCAGGTTGAACATCAACGGACCTACTCGCACTAAGCTTGAACCAAGTGTCTTTCACGATGTGTTCGAAGGCACTAAAGAGCCAGCAGTGCTGACTAGTAAAGACCCAAGGCTGGAAGTTGACTTTGAACAGGCTCTTTTTTCAAAATACGTGGGGAACACGCTTCATGAACCCGACGAGTTTGTCAAGGAGGCGGCCTTACATTATGCCAACCAACTCAAGCAGTTAGATATCAAGACCACCAAGATGAGCATGGAGGATGCATGTTACGGCACAGAGAACCTGGAAGCTATAGATCTTCACACAAGTGCAGGATATCCATACAGTGCACTAGGCATCAAGAAAAAGGACATTTTGGATCCAACAACTCGCGATGTCAGCAAGATGAAATTCTACATGGACAAGTATGGGTTGGATCTACCGTACTCTACTTATGTTAAAGATGAACTTAGGGCCATCGACAAGATCAAGAAAGGGAAGTCTCGTCTCATAGAAGCGAGCAGTCTAAATGACTCAGTGTACTTGAGAATGACATTTGGGCACCTTTATGAAGCTTTCCACGCCAATCCAGGTACAATCACTGGTTCAGCTGTTGGGTGTAACCCAGATGTGTTCTGGAGCAAGTTACCAATTCTACTTCCAGGATCGCTTTTCGCGTTTGACTACTCGGGGTATGACGCTAGTCTCAGCCCAGTGTGGTTCAGGGCGCTGGAGATAGTCCTGCGGGAAATTGGATACTCCGAAGACGCAGTGTCTCTCATAGAAGGGATCAATCACACCCATCATGTGTACCGCAATAAAACTTATTGTGTTCTTGGGGGAATGCCCTCAGGTTGCTCAGGCACCTCCATTTTCAACTCGATGATCAACAATATCATTATTAGAACACTCCTGATTAAAACATTCAAAGGGATAGATCTAGATGAACTGAACATGGTGGCCTACGGGGATGATGTGTTGGCTAGTTACCCCTTCCCAATTGACTGTCTGGAGTTGGCAAGAACAGGCAAGGAGTATGGTCTAACTATGACCCCTGCCGACAAGTCACCCTGCTTTAATGAGGTTACATGGGAGAATGCCACTTTCTTGAAGAGAGGATTCTTGCCTGATCATCAATTCCCGTTTCTCATCCACCCTACGATGCCAATGAGGGAGATTCACGAATCCATTCGTTGGACCAAAGATGCACGAAGTACTCAAGATCACGTGCGCTCCCTCTGCTTATTAGCATGGCACAACGGGAAAGAGGAGTATGAAAAATTTGTGAGTGCAATCAGATCAGTTCCAATTGGAAAAGCATTGGCTATACCAAATTATGAGAATCTGAGAAGAAATTGGCTCGAATTGTTT")
io.write(output,handle="/hpcdata/lvd_qve/Projects/vp4dms/PTD_analysis/VP4_sr_anchovy/filtConsensus_VP4_SubLib1_RD_Andino_S5.fasta",format="fasta")
infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
infoFrame.to_csv("/hpcdata/lvd_qve/Projects//vp4dms/PTD_analysis/VP4_sr_anchovy/filtConsensus_VP4_SubLib1_RD_Andino_S5.csv")

Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 0


In [41]:
#41423 - passage!
#! cat /hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib1_RD_ATCC_S1_CBC/pUC19*BC.fasta > /hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/VP4_SubLib1_RD_ATCC_S1_CBC/allConsensus.fasta
indir="/hpcdata/lvd_qve/Sequencing_Data/QVEU_Seq_0091_NextSeq_NDAS_VP4DMS_scRNAseq-1stRun/230823_VH01023_29_AAC2YKCHV/Analysis/1/Data/fastq/"
flist=[i for i in os.listdir(indir) if i.endswith(".fasta")]
for i in flist:
    start=745
    end=952
    try:
        print(i)
        elements,length,depth = readSeqs(indir+i)
        filtered = selectSeqs(start,end,elements,length,depth,depthMin=1,lengthMin=1)
        output=genotypeSummary(filtered,"ATGGGCTCACAAGTGTCCACACAACGCTCCGGTTCACACGAAAACTCTAACTCAGCTACCGAGGGTTCCACTATAAACTATACTACCATTAATTACTATAAAGATTCCTATGCCGCCACAGCAGGTAAGCAGAGCCTTAAGCAGGACCCAGACAAGTTTGCAAATCCTGTCAAAGACATCTTCACTGAAATGGCAGCGCCATTAAAATCTCCATCTGCTGAGGCATGTGGTTACAGCGATCGGGTGGCACAATTAACTATTGGCAATTCTACCATCACTACGCAAGAAGCAGCAAACATCATAGTTGGCTATGGTGAGTGGCCTTCCTACTGTTCGGACTCTGATGCTACTGCAGTGGACAAACCAACGCGCCCAGATGTTTCGGTGAATAGGTTTTACACATTGGACACAAAATTGTGGGAGAAATCATCCAAGGGGTGGTACTGGAAATTCCCGGATGTGTTAACTGAAACCGGGGTCTTTGGTCAAAATGCACAGTTCCACTACCTCTATCGGTCAGGGTTCTGCATTCACGTGCAGTGCAATGCCAGTAAGTTCCACCAAGGAGCACTCCTAGTCGCTGTCCTCCCAGAGTATGTCATTGGGACAGTGGCAGGTGGCACAGGGACGGAGGATAGCCACCCCCCTTATAAGCAGACTCAACCCGGTGCTGATGGCTTCGAATTGCAACACCCGTACGTGCTTGATGCTGGCATTCCAATATCACAATTAACAGTGTGCCCACATCAGTGGATTAATTTGAGGACCAACAATTGTGCCACAATAATAGTGCCGTACATAAACGCACTACCCTTTGATTCTGCCTTGAACCATTGTAACTTTGGTCTGCTGGTTGTGCCTATTAGCCCGTTAGATTATGACCAAGGTGCGACGCCAGTGATCCCCATTACTATCACTTTGGCCCCAATGTGTTCTGAATTTGCAGGCCTTAGACAAGCAGTTACGCAAGGGTTTCCTACTGAGCTGAAACCTGGCACAAACCAATTTTTAACCACTGACGATGGCGTCTCAGCACCCATTCTGCCAAACTTTCACCCCACCCCGTGTATCCATATACCCGGTGAAGTTAGAAACTTGCTAGAGCTATGCCAGGTGGAGACCATTTTAGAGGTCAACAATGTACCTACGAATGCCACTAGCTTAATGGAGAGACTGCGCTTCCCGGTCTCAGCTCAAGCCGGGAAAGGTGAGCTATGTGCAGTGTTCAGAGCTGACCCTGGACGAAGTGGGCCATGGCAGTCCACCTTGTTGGGCCAGTTGTGCGGGTACTACACCCAATGGTCAGGATCACTGGAAGTCACCTTCATGTTCACCGGGTCCTTTATGGCTACCGGCAAGATGCTCATAGCATACACACCACCAGGAGGCCCCTTACCCAAGGACCGGGCGACCGCCATGTTGGGCACGCACGTCATCTGGGACTTTGGGCTGCAATCGTCTGTCACCCTTGTAATACCATGGATCAGCAACACTCATTACAGAGCGCACGCTCGAGATGGTGTGTTCGACTACTACACTACAGGTTTGGTTAGCATATGGTACCAGACGAATTATGTGGTTCCAATTGGGGCACCCAATACAGCCTATATAATAGCATTGGCGGCAGCCCAGAAGAACTTCACCATGAAGTTGTGTAAGGATGCTAGTGATATCCTACAGACAGGCACTATCCAGGGAGATAGGGTGGCAGATGTGATTGAGAGTTCTATAGGGGACAGTGTGAGCAGAGCCCTCACCCGAGCTCTACCGGCACCTACCGGCCAAGACACACAGGTAAGCAGCCACCGATTAGATACTGGTAAAGTTCCAGCACTCCAAGCCGCTGAAATTGGAGCATCATCAAATGCTAGTGATGAGAGTATGATTGAGACACGGTGTGTTCTTAATTCACATAGTACAGCTGAGACCACTCTTGATAGCTTCTTCAGCAGAGCAGGATTAGTTGGAGAGATAGACCTCCCTCTTGAAGGCACAACCAACCCGAATGGGTACGCAAACTGGGACATAGACATAACAGGTTACGCGCAAATGCGTAGAAAGGTGGAGCTGTTCACCTACATGCGTTTTGACGCAGAGTTCACCTTTGTTGCATGCACCCCTACCGGGCAAGTTGTCCCGCAATTGCTCCAATACATGTTTGTACCACCCGGAGCCCCCAAGCCAGACTCCAGAGAATCTCTCGCATGGCAAACTGCCACTAATCCCTCAGTTTTTGTGAAGCTGTCAGACCCCCCAGCACAGGTTTCTGTTCCATTCATGTCACCTGCGAGCGCCTATCAATGGTTTTATGACGGGTATCCCACATTCGGTGAACACAAACAGGAGAAAGACCTTGAATACGGGGCATGCCCAAACAACATGATGGGTACGTTCTCAGTGCGGACTGTAGGCACCTCGAAGTCCAAGTACCCATTGGTGATCAGGATTTACATGAGGATGAAGCACGTCAGGGCGTGGATACCTCGCCCAATGCGTAACCAGAACTATCTATTCAAAGCCAACCCAAATTATGCTGGTAATTCTATTAAACCAACTGGTGCCAGTCGCACAGCAATCACCACCCTCGGGAAATTTGGACAGCAGTCCGGAGCTATCTACGTGGGCAACTTTAGAGTGGTTAACCGCCATCTTGCTACTCATAATGACTGGGCAAACCTTGTTTGGGAAGACAGCTCCCGCGACTTGCTCGTATCATCTACCACTGCTCAAGGTTGTGACACGATTGCTCGTTGCAATTGCCAGACAGGAGTGTATTATTGTAACTCAATGAGAAAACACTATCCGGTCAGTTTCTCGAAACCCAGTTTGATCTTCGTGGAGGCCAGCGAGTATTATCCAGCTAGATACCAGTCACATCTCATGCTTGCAGTGGGTCATTCGGAACCAGGGGATTGCGGTGGCATTCTTAGATGCCAACATGGCGTCGTAGGGATAGTTTCCACCGGGGGAAACGGCCTGGTGGGGTTCGCCGATGTGAGGGATCTCCTGTGGTTGGATGATGAAGCCATGGAGCAGGGCGTGTCTGATTACATTAAAGGGCTTGGAGATGCTTTTGGCATGGGGTTTACAGACGCAGTGTCAAGAGAAGTTGAAGCACTGAAAAGTCACTTGATCGGCTCAGAGGGTGCCGTGGAGAAGATTCTAAAGAACTTAGTTAAACTCATCTCTGCGCTCGTCATCGTCATCAGGAGTGATTATGACATGGTCACATTGACGGCAACACTTGCCCTGATCGGGTGCCACGGGAGCCCTTGGGCCTGGGTTAAGTCGAAGACAGCATCAATCTTGGGCATACCGATGGCTCAGAAGCAGAGTGCCTCTTGGTTAAAGAAGTTCAACGATGCGGCGAGTGCCGCGAAGGGGCTTGAGTGGATCTCCAACAAAATCAGTAAATTTATCGATTGGCTCAAGGAGAAAATCATACCGGCTGCTAAAGAGAAAGTCGAGTTTCTAAACAATCTAAAGCAACTCCCCTTATTGGAGAACCAAATTTCTAATCTCGAACAGTCAGCAGCTTCGCAGGAGGACCTTGAGGCGATGTTTGGCAACGTGTCTTATCTGGCCCACTTCTGCCGCAAATTCCAACCCCTCTATGCCACGGAAGCAAAGAGGGTGTACGCCCTAGAAAAGAGAATGAATAATTACATGCAGTTCAAGAGCAAACACCGTATTGAACCTGTATGCCTAATCATCAGAGGCTCGCCTGGTACTGGGAAGTCCTTGGCAACAGGGATTATTGCTAGAGCCATAGCAGACAAGTACCACTCCAGTGTGTATTCCTTACCTCCAGACCCAGACCACTTTGACGGATACAAACAACAGATCGTCACTGTTATGGACGACCTATGCCAAAACCCAGACGGGAAAGACATGTCACTATTTTGTCAGATGGTCTCCACAGTGGATTTTATACCGCCTATGGCATCTCTGGAGGAGAAGGGAGTCTCATTCACCTCCAAGTTTGTGATTGCCTCCACTAACGCCAGTAACATCATAGTGCCAACAGTCTCGGATTCAGATGCCATTCGTCGCCGGTTCTTTATGGACTGCGATATTGAGGTGACCGATTCCTATAAGACAGAGCTGGGCAGACTTGATGCAGGGAGAGCAGCCAGGCTGTGCTCTGAGAACAACACTGCAAACTTTAAACGGTGCAGTCCATTAGTCTGTGGGAAAGCAATCCAGCTTAGGGATAGGAAGTCCAAGGTGAGATACAGTGTGGACACGGTAGTGAGTGAACTTATCAGGGAGTATAACAACAGATCAGTTATTGGGAACACCATTGAAGCTCTTTTCCAAGGACCCCCTAAATTTAGACCAATAAGGATTAGCTTAGAGGAGAAGCCCGCACCTGATGCTATTAGTGACTTATTAGCTAGTGTTGATAGTGAAGAGGTTCGCCAATACTGTAGAGATCAGGGATGGATTGTACCTGATTCTCCCACCAACGTTGAGCGCCACTTGAATAGAGCTGTCTTGATTATGCAGTCTGTAGCCACCGTGGTAGCAGTTGTGTCCCTTGTTTACGTCATCTACAAGTTGTTCGCCGGTTTTCAAGGAGCATATTCCGGCGCCCCCAAGCAAACACTCAAGAAACCAGTGCTGCGCACGGCAACTGTGCAGGGGCCGAGCTTGGACTTCGCCCTATCTCTACTTAGGAGGAACATTAGGCAGGTCCAAACCGACCAGGGCCACTTTACAATGTTAGGAGTGCGAGACCGCTTGGCTGTGCTCCCCAGACACTCCCAACCAGGAAAGACCATCTGGGTTGAACACAAATTAGTGAAGATCGTAGATGCTGTGGAGTTAGTAGACGAACAAGGGGTTAACTTAGAGCTCACACTGGTAACGCTTGATACTAACGAAAAATTTAGAGACATCACAAGATTCATACCAGAAACAATTAGTCCTGCTAGTGATGCCACTTTAGTTATAAATACTGAACATATGCCCAGTATGTTTGTGCCAGTTGGAGATGTGGTCCAGTATGGGTTTTTGAACCTTAGTGGTAAGCCCACTCACAGGACTATGATGTACAATTTCCCAACAAAAGCAGGACAGTGTGGTGGTGTTGTGACTGCCGTGGGTAAAGTGATTGGGATCCACATTGGTGGCAACGGTAGGCAAGGTTTCTGCGCTGCCCTGAAGAGGGGATACTTTTGCAGTGAACAAGGTGAGATCCAATGGATGAAGCCCAACAAAGAAACTGGCAGGTTGAACATCAACGGACCTACTCGCACTAAGCTTGAACCAAGTGTCTTTCACGATGTGTTCGAAGGCACTAAAGAGCCAGCAGTGCTGACTAGTAAAGACCCAAGGCTGGAAGTTGACTTTGAACAGGCTCTTTTTTCAAAATACGTGGGGAACACGCTTCATGAACCCGACGAGTTTGTCAAGGAGGCGGCCTTACATTATGCCAACCAACTCAAGCAGTTAGATATCAAGACCACCAAGATGAGCATGGAGGATGCATGTTACGGCACAGAGAACCTGGAAGCTATAGATCTTCACACAAGTGCAGGATATCCATACAGTGCACTAGGCATCAAGAAAAAGGACATTTTGGATCCAACAACTCGCGATGTCAGCAAGATGAAATTCTACATGGACAAGTATGGGTTGGATCTACCGTACTCTACTTATGTTAAAGATGAACTTAGGGCCATCGACAAGATCAAGAAAGGGAAGTCTCGTCTCATAGAAGCGAGCAGTCTAAATGACTCAGTGTACTTGAGAATGACATTTGGGCACCTTTATGAAGCTTTCCACGCCAATCCAGGTACAATCACTGGTTCAGCTGTTGGGTGTAACCCAGATGTGTTCTGGAGCAAGTTACCAATTCTACTTCCAGGATCGCTTTTCGCGTTTGACTACTCGGGGTATGACGCTAGTCTCAGCCCAGTGTGGTTCAGGGCGCTGGAGATAGTCCTGCGGGAAATTGGATACTCCGAAGACGCAGTGTCTCTCATAGAAGGGATCAATCACACCCATCATGTGTACCGCAATAAAACTTATTGTGTTCTTGGGGGAATGCCCTCAGGTTGCTCAGGCACCTCCATTTTCAACTCGATGATCAACAATATCATTATTAGAACACTCCTGATTAAAACATTCAAAGGGATAGATCTAGATGAACTGAACATGGTGGCCTACGGGGATGATGTGTTGGCTAGTTACCCCTTCCCAATTGACTGTCTGGAGTTGGCAAGAACAGGCAAGGAGTATGGTCTAACTATGACCCCTGCCGACAAGTCACCCTGCTTTAATGAGGTTACATGGGAGAATGCCACTTTCTTGAAGAGAGGATTCTTGCCTGATCATCAATTCCCGTTTCTCATCCACCCTACGATGCCAATGAGGGAGATTCACGAATCCATTCGTTGGACCAAAGATGCACGAAGTACTCAAGATCACGTGCGCTCCCTCTGCTTATTAGCATGGCACAACGGGAAAGAGGAGTATGAAAAATTTGTGAGTGCAATCAGATCAGTTCCAATTGGAAAAGCATTGGCTATACCAAATTATGAGAATCTGAGAAGAAATTGGCTCGAATTGTTT")
        io.write(output,handle=indir+"allConsensus"+".fasta",format="fasta")
        infoFrame=pd.DataFrame([[outputs.id,outputs.dbxrefs,"".join(outputs.seq),outputs.description] for outputs in output],columns=["CBC_ID","genotype","sequence","description"])
        infoFrame.to_csv(indir+i.replace(".fasta",".csv"))
    except:
        pass
    

VP4_SubLib1_RD_ATCC_S1.fasta
Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 870
allConsensus.fasta
Parsing Consensus Sequences...
Filtering Alignment...
VP4_SubLib2_RD_Andino_S6.fasta
Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 0
VP4_SubLib1_RD_Andino_S5.fasta
Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 0
VP4_SubLib3_RD_Andino_S7.fasta
Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 473
VP4_SubLib3_RD_ATCC_S3.fasta
Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 321
VP4_SubLib4_RD_ATCC_S4.fasta
Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 403
VP4_SubLib2_RD_ATCC_S2.fasta
Parsing Consensus Sequences...
Filtering Alignment...
Trimming Alignment...
Total: 397


In [None]:
!tree "/hpcdata/lvd_qve/Projects/"