In [80]:
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
import glob
import urllib
import xml.etree.ElementTree as ET
import os
import os.path
from Bio.Alphabet import generic_dna

fastaList=list(SeqIO.parse("./All_Parts_iGEM.txt", "fasta"))
fastaDict={ele.id:ele for ele in fastaList}

In [78]:
with open("./pSB1C3.gbk",'r') as file_handle:
    record_dict = SeqIO.to_dict(SeqIO.parse(file_handle, 'gb'))
    pSB1C3 = record_dict[list(record_dict.keys())[0]]
with open("./pSB1A2.gbk",'r') as file_handle:
    record_dict = SeqIO.to_dict(SeqIO.parse(file_handle, 'gb'))
    pSB1A2 = record_dict[list(record_dict.keys())[0]]

In [30]:
plates={}
parts=[]
for infile_loc in glob.glob("./Plate_Layouts/*.csv"):
    plateNum = infile_loc.split()[-2]
    plate = pd.read_csv(infile_loc,encoding = "ISO-8859-1")
    plateParts=plate["  Part "].values.tolist()
    parts.extend([ele.strip() for ele in plateParts])
    plates[plateNum]=plateParts         
parts = list(set(parts))

In [149]:
strains=pd.read_csv("igem2019_strain_metadata.csv",index_col=0)
strains['accession']=strains['accession'].str.strip()

measuredParts = {str(ele).strip() for ele in set(list(strains['accession']))}
allParts={ele.split("BBa_")[-1] for ele in parts}

In [119]:
allPartsSeqs={}
c=0
for curPart in measuredParts&allParts:
    print(f"{c}: {curPart}")
    c+=1
    igemURL = f'http://parts.igem.org/cgi/xml/part.cgi?part={curPart}'
    tree = ET.parse(urllib.request.urlopen(igemURL))
    root = tree.getroot()
    for seq in root.iter('seq_data'):
        #record = seq.text + gbkFile
        sequence=seq.text
        sequence=sequence.replace("\n", "")
        sequence=Seq(sequence,generic_dna)

    allPartsSeqs[curPart]=sequence

0: K629005
1: K314101
2: S05060
3: I0466
4: I13504
5: K733014
6: K530025
7: K576003
8: I759016
9: K519010
10: J52022
11: R1062
12: K1061003
13: J23102
14: K817000
15: K517002
16: K747012
17: K763002
18: K523006
19: K641009
20: K592016
21: K398326
22: K747009
23: K876010
24: K523000
25: K617003
26: K747016
27: K909007
28: K731520
29: K914011
30: K861090
31: K654058
32: B1006
33: K1166002
34: B0012
35: I759020
36: K808025
37: K817022
38: K1092001
39: K606017
40: K876057
41: J23110
42: K747023
43: J23118
44: K540001
45: R0040
46: K525998
47: J23105
48: B0034
49: K624003
50: K777117
51: R0082
52: J23108
53: K775002
54: K398331
55: K553003
56: K115001
57: K731722
58: K648028
59: K1033910
60: K808003
61: K779503
62: K936013
63: I759019
64: K525710
65: K575020
66: K747019
67: I712088
68: K346002
69: K542010
70: I13600
71: K539421
72: C0050
73: J61100
74: K1150012
75: K608004
76: C0160
77: K914001
78: K808012
79: K747020
80: J23103
81: K314100
82: K530015
83: K808032
84: B0033
85: K564013
86: 

In [155]:
notAnno=[]
sequenceRecords=[]
for strain in strains.index:
    curPart = strains.loc[strain]['accession']
    if curPart in allPartsSeqs.keys():
        curBackbone=strains[strains['accession'] == curPart]['vector'].values[0]
        if curBackbone == "pSB1C3" or curBackbone == "pSB1A2":
            if curBackbone == "pSB1C3":
                bb=pSB1C3
            if curBackbone == "pSB1A2":
                bb=pSB1A2

            record = bb + allPartsSeqs[curPart].lower()
            featLoc=FeatureLocation(len(bb),len(bb)+len(allPartsSeqs[curPart]),1)
            record.features.append(SeqFeature(featLoc, type='BioBrick (accession)',qualifiers={"label": curPart}))
            record.id = curPart
            #BBnum=strains[strains['accession'] == curPart]['strain'].values
            record.name = strain
            sequenceRecords.append(record)
        else:
            notAnno.append((curPart,curBackbone))
notAnno=pd.DataFrame(notAnno,columns=["accession","backbone"]).set_index('accession')

In [142]:
notAnno=[]
sequenceRecords=[]
for curPart in allPartsSeqs.keys():
    curBackbone=strains[strains['accession'] == curPart]['vector'].values[0]
    if curBackbone == "pSB1C3" or curBackbone == "pSB1A2":
        if curBackbone == "pSB1C3":
            bb=pSB1C3
        if curBackbone == "pSB1A2":
            bb=pSB1A2

        record = bb + allPartsSeqs[curPart].lower()
        featLoc=FeatureLocation(len(bb),len(bb)+len(allPartsSeqs[curPart]),1)
        record.features.append(SeqFeature(featLoc, type='BioBrick (accession)',qualifiers={"label": curPart}))
        record.id = curPart
        BBnum=strains[strains['accession'] == curPart]['strain'].values
        if len(BBnum) > 1:
            print("error")
            break
        record.name = BBnum[0]
        sequenceRecords.append(record)
    else:
        notAnno.append((curPart,curBackbone))
notAnno=pd.DataFrame(notAnno,columns=["accession","backbone"]).set_index('accession')

error


In [158]:
for curPlas in sequenceRecords:
    SeqIO.write(curPlas,f'./reference_files/{curPlas.name}.gbk', 'gb')