In [1]:
from Bio.SeqRecord import SeqRecord
help(SeqRecord)

Help on class SeqRecord in module Bio.SeqRecord:

class SeqRecord(builtins.object)
 |  SeqRecord(seq: Union[ForwardRef('Seq'), ForwardRef('MutableSeq'), NoneType], id: Optional[str] = '<unknown id>', name: str = '<unknown name>', description: str = '<unknown description>', dbxrefs: Optional[list[str]] = None, features: Optional[list['SeqFeature']] = None, annotations: Optional[dict[str, Union[str, int]]] = None, letter_annotations: Optional[dict[str, collections.abc.Sequence[Any]]] = None) -> None
 |
 |  A SeqRecord object holds a sequence and information about it.
 |
 |  Main attributes:
 |   - id          - Identifier such as a locus tag (string)
 |   - seq         - The sequence itself (Seq object or similar)
 |
 |  Additional attributes:
 |   - name        - Sequence name, e.g. gene name (string)
 |   - description - Additional text (string)
 |   - dbxrefs     - List of database cross references (list of strings)
 |   - features    - Any (sub)features defined (list of SeqFeature ob

In [2]:
from Bio.Seq import Seq
simple_seq = Seq("GATC")
from Bio.SeqRecord import SeqRecord
simple_seq_r = SeqRecord(simple_seq)
print(simple_seq)
print(simple_seq_r)

GATC
ID: <unknown id>
Name: <unknown name>
Description: <unknown description>
Number of features: 0
Seq('GATC')


In [3]:
print(simple_seq_r.id)

<unknown id>


In [4]:
simple_seq_r.id = "AC12345"
simple_seq_r.description = "Made up sequence I wish I could write a paper about"
print(simple_seq_r.description)

Made up sequence I wish I could write a paper about


In [5]:
simple_seq_r.seq

Seq('GATC')

In [6]:
from Bio.Seq import Seq
simple_seq = Seq("GATC")
from Bio.SeqRecord import SeqRecord
simple_seq_r = SeqRecord(simple_seq, id="AC12345")
print(simple_seq_r)

ID: AC12345
Name: <unknown name>
Description: <unknown description>
Number of features: 0
Seq('GATC')


In [7]:
simple_seq_r.annotations["evidence"] = "None. I just made it up."
print(simple_seq_r.annotations)

{'evidence': 'None. I just made it up.'}


In [8]:
print(simple_seq_r.annotations["evidence"])

None. I just made it up.


In [9]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

# Ensure simple_seq_r is defined
simple_seq = Seq("GATC")
simple_seq_r = SeqRecord(simple_seq, id="AC12345")

# Add letter annotations
simple_seq_r.letter_annotations["phred_quality"] = [40, 40, 38, 30]
print(simple_seq_r.letter_annotations)
print(simple_seq_r.letter_annotations["phred_quality"])

{'phred_quality': [40, 40, 38, 30]}
[40, 40, 38, 30]


In [10]:
from Bio import SeqIO
record = SeqIO.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/NC_005816.fna", "fasta")
record

FileNotFoundError: [Errno 2] No such file or directory: '/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/NC_005816.fna'

In [None]:
record.seq

Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG')

In [None]:
record.id

'gi|45478711|ref|NC_005816.1|'

In [None]:
record.name

'gi|45478711|ref|NC_005816.1|'

In [None]:
record.description

'gi|45478711|ref|NC_005816.1| Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence'

In [None]:
record.dbxrefs

[]

In [None]:
record.annotations

{}

In [None]:
record.annotations

{}

In [None]:
record.features

[]

In [None]:
from Bio import SeqIO
record = SeqIO.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/NC_005816.gb", "genbank")
record

SeqRecord(seq=Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG'), id='NC_005816.1', name='NC_005816', description='Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence', dbxrefs=['Project:58037'])

In [None]:
record.seq

Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG')

In [None]:
record.annotations

{'molecule_type': 'DNA',
 'topology': 'circular',
 'data_file_division': 'BCT',
 'date': '21-JUL-2008',
 'accessions': ['NC_005816'],
 'sequence_version': 1,
 'gi': '45478711',
 'keywords': [''],
 'source': 'Yersinia pestis biovar Microtus str. 91001',
 'organism': 'Yersinia pestis biovar Microtus str. 91001',
 'taxonomy': ['Bacteria',
  'Proteobacteria',
  'Gammaproteobacteria',
  'Enterobacteriales',
  'Enterobacteriaceae',
  'Yersinia'],
 'references': [Reference(title='Genetics of metabolic variations between Yersinia pestis biovars and the proposal of a new biovar, microtus', ...),
  Reference(title='Complete genome sequence of Yersinia pestis strain 91001, an isolate avirulent to humans', ...),
  Reference(title='Direct Submission', ...),
  Reference(title='Direct Submission', ...)],
 'comment': 'PROVISIONAL REFSEQ: This record has not yet been subject to final\nNCBI review. The reference sequence was derived from AE017046.\nCOMPLETENESS: full length.'}

In [None]:
record.description

'Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence'

In [None]:
record.id

'NC_005816.1'

In [None]:
record.name

'NC_005816'

In [None]:
record.letter_annotations

{}

In [None]:
len(record.annotations)

13

In [None]:
len(record.letter_annotations)

0

In [None]:
record.dbxrefs

['Project:58037']

In [None]:
len(record.features)

41

In [None]:
from Bio import SeqFeature
start_pos = SeqFeature.AfterPosition(5)
end_pos = SeqFeature.BetweenPosition(9, left=8, right=9)
my_location = SeqFeature.SimpleLocation(start_pos, end_pos)
print(my_location)

[>5:(8^9)]


In [None]:
my_location.start

AfterPosition(5)

In [None]:
print(my_location.start)

>5


In [None]:
my_location.end

BetweenPosition(9, left=8, right=9)

In [None]:
print(my_location.end)

(8^9)


In [None]:
int(my_location.start)

5

In [None]:
int(my_location.end)

9

In [None]:
exact_location = SeqFeature.SimpleLocation(5, 9)
print(exact_location)

[5:9]


In [None]:
print(exact_location.start)
print(exact_location.end)

5
9


In [None]:
print(int(exact_location.start))
print(int(exact_location.end))

5
9


In [None]:
from Bio import SeqIO
my_snp = 4350
record = SeqIO.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/NC_005816.gb", "genbank")
for feature in record.features:
    if my_snp in feature:
        print("%s %s" % (feature.type, feature.qualifiers.get("db_xref")))


source ['taxon:229193']
gene ['GeneID:2767712']
CDS ['GI:45478716', 'GeneID:2767712']


In [None]:
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, SimpleLocation
seq = Seq("ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTTCCTTCCTGCCAGTGCTGAGGAACTGGGAGCCTAC")
feature = SeqFeature(SimpleLocation(5, 18, strand=-1), type="gene")
print(feature)

type: gene
location: [5:18](-)
qualifiers:



In [None]:
feature_seq = seq[feature.location.start : feature.location.end].reverse_complement()
print(feature_seq)

AGCCTTTGCCGTC


In [None]:
feature_seq = feature.extract(seq)
print(feature_seq)

AGCCTTTGCCGTC


In [None]:
print(len(feature_seq))
print(len(feature))

13
13


In [None]:
print(len(feature.location))

13


In [None]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
record1 = SeqRecord(Seq("ACGT"), id="test")
record2 = SeqRecord(Seq("ACGT"), id="test")
print(record1)
print(record2)
print(record)

ID: test
Name: <unknown name>
Description: <unknown description>
Number of features: 0
Seq('ACGT')
ID: test
Name: <unknown name>
Description: <unknown description>
Number of features: 0
Seq('ACGT')
ID: NC_005816.1
Name: NC_005816
Description: Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Database cross-references: Project:58037
Number of features: 41
/molecule_type=DNA
/topology=circular
/data_file_division=BCT
/date=21-JUL-2008
/accessions=['NC_005816']
/sequence_version=1
/gi=45478711
/keywords=['']
/source=Yersinia pestis biovar Microtus str. 91001
/organism=Yersinia pestis biovar Microtus str. 91001
/taxonomy=['Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacteriales', 'Enterobacteriaceae', 'Yersinia']
/references=[Reference(title='Genetics of metabolic variations between Yersinia pestis biovars and the proposal of a new biovar, microtus', ...), Reference(title='Complete genome sequence of Yersinia pestis strain 91001, an isolate avirulen

In [None]:
# Compare specific attributes of interest
record.seq == record1.seq and record.id == record1.id

False

In [None]:
# Compare specific attributes of interest
record1.seq == record2.seq and record1.id == record2.id

True

In [None]:
# Compare specific attributes of interest
record.seq == record2.seq and record.id == record2.id

False

In [None]:
# Compare specific attributes of interest
record1.seq == record2.seq and record1.id == record2.id

True

In [None]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
record = SeqRecord(
    Seq(
        "MMYQQGCFAGGTVLRLAKDLAENNRGARVLVVCSEITAVTFRGPSETHLDSMVGQALFGD"
        "GAGAVIVGSDPDLSVERPLYELVWTGATLLPDSEGAIDGHLREVGLTFHLLKDVPGLISK"
        "NIEKSLKEAFTPLGISDWNSTFWIAHPGGPAILDQVEAKLGLKEEKMRATREVLSEYGNM"
        "SSAC"
    ),
    id="gi|14150838|gb|AAK54648.1|AF376133_1",
    description="chalcone synthase [Cucumis sativus]",
)
print(record.format("fasta"))

>gi|14150838|gb|AAK54648.1|AF376133_1 chalcone synthase [Cucumis sativus]
MMYQQGCFAGGTVLRLAKDLAENNRGARVLVVCSEITAVTFRGPSETHLDSMVGQALFGD
GAGAVIVGSDPDLSVERPLYELVWTGATLLPDSEGAIDGHLREVGLTFHLLKDVPGLISK
NIEKSLKEAFTPLGISDWNSTFWIAHPGGPAILDQVEAKLGLKEEKMRATREVLSEYGNM
SSAC



In [None]:
from Bio import SeqIO
record = SeqIO.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/NC_005816.gb", "genbank")
record

SeqRecord(seq=Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG'), id='NC_005816.1', name='NC_005816', description='Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence', dbxrefs=['Project:58037'])

In [None]:
len(record)

9609

In [None]:
len(record.features)

41

In [None]:
print(record.features[19])

type: misc_feature
location: [3497:3626](+)
qualifiers:
    Key: locus_tag, Value: ['YP_pPCP04']
    Key: note, Value: ['ProfileScan match to entry PS50323 ARG_RICH, E-value 8.981']



In [None]:
print(record.features[18])

type: CDS
location: [3485:3857](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478715', 'GeneID:2767720']
    Key: locus_tag, Value: ['YP_pPCP04']
    Key: note, Value: ['Best Blastp hit = gi|321919|pir||JQ1541 hypothetical 16.9K protein - Salmonella typhi murium plasmid NTP16.']
    Key: product, Value: ['hypothetical protein']
    Key: protein_id, Value: ['NP_995570.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MSKKRRPQKRPRRRRFFHRLRPPDEHHKNRRSSQRWRNPTGLKDTRRFPPEAPSCALLFRPCRLPDTSPPFSLREAWRFLIAHAVGISVRCRSFAPSWAVCTNPPFSPTTAPYPVTIVLSPTR']



In [None]:
print(record.features[17])

type: gene
location: [3485:3857](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767720']
    Key: locus_tag, Value: ['YP_pPCP04']



In [None]:
sub_record = record[4300:4800]
sub_record
print(sub_record)

ID: NC_005816.1
Name: NC_005816
Description: Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Number of features: 2
/molecule_type=DNA
Seq('ATAAATAGATTATTCCAAATAATTTATTTATGTAAGAACAGGATGGGAGGGGGA...TTA')


In [None]:
print(sub_record)

ID: NC_005816.1
Name: NC_005816
Description: Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Number of features: 2
/molecule_type=DNA
Seq('ATAAATAGATTATTCCAAATAATTTATTTATGTAAGAACAGGATGGGAGGGGGA...TTA')


In [None]:
print(sub_record.features)

[SeqFeature(SimpleLocation(ExactPosition(42), ExactPosition(480), strand=1), type='gene', qualifiers=...), SeqFeature(SimpleLocation(ExactPosition(42), ExactPosition(480), strand=1), type='CDS', qualifiers=...)]


In [None]:
print(sub_record.features[1])

type: CDS
location: [42:480](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478716', 'GeneID:2767712']
    Key: gene, Value: ['pim']
    Key: locus_tag, Value: ['YP_pPCP05']
    Key: note, Value: ['similar to many previously sequenced pesticin immunity protein entries of Yersinia pestis plasmid pPCP, e.g. gi| 16082683|,ref|NP_395230.1| (NC_003132) , gi|1200166|emb|CAA90861.1| (Z54145 ) , gi|1488655| emb|CAA63439.1| (X92856) , gi|2996219|gb|AAC62543.1| (AF053945) , and gi|5763814|emb|CAB531 67.1| (AL109969)']
    Key: product, Value: ['pesticin immunity protein']
    Key: protein_id, Value: ['NP_995571.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MGGGMISKLFCLALIFLSSSGLAEKNTYTAKDILQNLELNTFGNSLSHGIYGKQTTFKQTEFTNIKSNTKKHIALINKDNSWMISLKILGIKRDEYTVCFEDFSLIRPPTYVAIHPLLIKKVKSGNFIVVKEIKKSIPGCTVYYH']



In [None]:
sub_record.annotations

{'molecule_type': 'DNA'}

In [None]:
sub_record.dbxrefs

[]

In [None]:
sub_record.annotations["topology"] = "linear"
print(sub_record.annotations["topology"])

linear


In [None]:
sub_record.id

'NC_005816.1'

In [None]:
sub_record.description

'Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence'

In [None]:
sub_record.name

'NC_005816'

In [None]:
sub_record.description = (
    "Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, partial"
)
print(sub_record.format("genbank")[:200] + "...")

LOCUS       NC_005816                500 bp    DNA     linear   UNK 01-JAN-1980
DEFINITION  Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, partial.
ACCESSION   NC_005816
VERSION     NC_0058...


In [None]:
from Bio import SeqIO

# Create a sample FASTQ file for testing
with open("example.fastq", "w") as f:
	f.write(
		"@SEQ_ID\n"
		"GATTTGGGGTTTCCCAGTCACGAC\n"
		"+\n"
		"IIIIIIIIIIIIIIIIIIIIIIII\n"
	)

# Parse the FASTQ file
record = next(SeqIO.parse("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/example.fastq", "fastq"))
print(len(record))

24


In [None]:
print(record.seq)

GATTTGGGGTTTCCCAGTCACGAC


In [None]:
print(record.letter_annotations["phred_quality"])

[40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40]


In [None]:
left = record[:20]
print(left.seq)

GATTTGGGGTTTCCCAGTCA


In [None]:
print(left.letter_annotations["phred_quality"])

[40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40]


In [None]:
right = record[21:]
print(right.seq)

GAC


In [None]:
print(right.letter_annotations["phred_quality"])

[40, 40, 40]


In [None]:
edited = left + right
len(edited)

23

In [None]:
print(edited.seq)

GATTTGGGGTTTCCCAGTCAGAC


In [None]:
print(edited.letter_annotations["phred_quality"])

[40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40]


In [None]:
edited = record[:20] + record[21:]
print(edited)

ID: SEQ_ID
Name: SEQ_ID
Description: SEQ_ID
Number of features: 0
Per letter annotation for: phred_quality
Seq('GATTTGGGGTTTCCCAGTCAGAC')


In [None]:
from Bio import SeqIO
record = SeqIO.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/NC_005816.gb", "genbank")
record
print(record)

ID: NC_005816.1
Name: NC_005816
Description: Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Database cross-references: Project:58037
Number of features: 41
/molecule_type=DNA
/topology=circular
/data_file_division=BCT
/date=21-JUL-2008
/accessions=['NC_005816']
/sequence_version=1
/gi=45478711
/keywords=['']
/source=Yersinia pestis biovar Microtus str. 91001
/organism=Yersinia pestis biovar Microtus str. 91001
/taxonomy=['Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacteriales', 'Enterobacteriaceae', 'Yersinia']
/references=[Reference(title='Genetics of metabolic variations between Yersinia pestis biovars and the proposal of a new biovar, microtus', ...), Reference(title='Complete genome sequence of Yersinia pestis strain 91001, an isolate avirulent to humans', ...), Reference(title='Direct Submission', ...), Reference(title='Direct Submission', ...)]
/comment=PROVISIONAL REFSEQ: This record has not yet been subject to final
NCBI review. The 

In [None]:
print(len(record))

9609


In [None]:
print(len(record.features))

41


In [None]:
print(record.dbxrefs)

['Project:58037']


In [None]:
print(record.annotations.keys)

<built-in method keys of dict object at 0x116105cc0>


In [None]:
shifted = record[2000:] + record[:2000]
shifted
print(shifted)

ID: NC_005816.1
Name: NC_005816
Description: Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Number of features: 40
/molecule_type=DNA
Seq('GATACGCAGTCATATTTTTTACACAATTCTCTAATCCCGACAAGGTCGTAGGTC...GGA')


In [None]:
print(len(shifted))

9609


In [None]:
print(len(shifted.features))

40


In [None]:
print(shifted.dbxrefs)

[]


In [None]:
shifted.annotations = record.annotations.copy()
print(shifted.dbxrefs)
print(shifted.annotations)
print(shifted.annotations.copy())
# shifted.dbxrefs = record.dbxrefs[:]


[]
{'molecule_type': 'DNA', 'topology': 'circular', 'data_file_division': 'BCT', 'date': '21-JUL-2008', 'accessions': ['NC_005816'], 'sequence_version': 1, 'gi': '45478711', 'keywords': [''], 'source': 'Yersinia pestis biovar Microtus str. 91001', 'organism': 'Yersinia pestis biovar Microtus str. 91001', 'taxonomy': ['Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacteriales', 'Enterobacteriaceae', 'Yersinia'], 'references': [Reference(title='Genetics of metabolic variations between Yersinia pestis biovars and the proposal of a new biovar, microtus', ...), Reference(title='Complete genome sequence of Yersinia pestis strain 91001, an isolate avirulent to humans', ...), Reference(title='Direct Submission', ...), Reference(title='Direct Submission', ...)], 'comment': 'PROVISIONAL REFSEQ: This record has not yet been subject to final\nNCBI review. The reference sequence was derived from AE017046.\nCOMPLETENESS: full length.'}
{'molecule_type': 'DNA', 'topology': 'circular', 'd

In [None]:
shifted.annotations.keys

<function dict.keys>

In [None]:
from Bio import SeqIO
rec = SeqIO.read("/Users/macbookairm2/Library/Mobile Documents/com~apple~CloudDocs/Biopython(own)/NC_005816.gb", "genbank")
print(rec.id, len(rec), len(rec.features), len(rec.dbxrefs), len(rec.annotations))
print(rec)

NC_005816.1 9609 41 1 13
ID: NC_005816.1
Name: NC_005816
Description: Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Database cross-references: Project:58037
Number of features: 41
/molecule_type=DNA
/topology=circular
/data_file_division=BCT
/date=21-JUL-2008
/accessions=['NC_005816']
/sequence_version=1
/gi=45478711
/keywords=['']
/source=Yersinia pestis biovar Microtus str. 91001
/organism=Yersinia pestis biovar Microtus str. 91001
/taxonomy=['Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacteriales', 'Enterobacteriaceae', 'Yersinia']
/references=[Reference(title='Genetics of metabolic variations between Yersinia pestis biovars and the proposal of a new biovar, microtus', ...), Reference(title='Complete genome sequence of Yersinia pestis strain 91001, an isolate avirulent to humans', ...), Reference(title='Direct Submission', ...), Reference(title='Direct Submission', ...)]
/comment=PROVISIONAL REFSEQ: This record has not yet been subject t

In [None]:
rc = rec.reverse_complement(id="TESTING")
print(rc.id, len(rc), len(rc.features), len(rc.dbxrefs), len(rc.annotations))
print(rc)
print(len(rc))
print(len(rc.features))
print(len(rc.dbxrefs))
print(len(rc.annotations))

TESTING 9609 41 0 0
ID: TESTING
Name: <unknown name>
Description: <unknown description>
Number of features: 41
Seq('CAGGGGTCGGGGTACGCATTCCCTCATGCGTCAATATTATCTGGCATTGCGATG...ACA')
9609
41
0
0
