In [1]:
from Bio.Seq import Seq

In [4]:
MAIN_SEQ = Seq('CGATCGATCCCGATCAGTAGGATCGAT')

Base Theory : 

GC content, or guanine-cytosine content, is the percentage of guanine (G) and cytosine (C) bases in a DNA or RNA molecule. The remaining bases are adenine (A) and thymine (T) in DNA, and adenine (A) and uracil (U) in RNA.

GC content is important because it can affect the stability of DNA and RNA, the folding of proteins, and the ability of DNA to be transcribed into RNA.


In biology, the melting point (Tm) of DNA or RNA is the temperature at which the double-stranded molecule separates into two single-stranded molecules. The Tm is determined by the length of the molecule, the composition of the bases, and the concentration of ions in the solution.

In [12]:
# GC Content -> Calculate percentage G and C in the DNA / RNA.

# Deprecated Methods!

# from Bio.SeqUtils import GC

# gc_main_seq = GC(MAIN_SEQ)
# print(f'GC Main Sequence : {gc_main_seq}')

from Bio.SeqUtils import gc_fraction 
gc_main_seq = gc_fraction(MAIN_SEQ) 
print(f'GC Main Sequence : {gc_main_seq}')

# Manual Methods
def gc_content(input):
    count_c = input.count('C')
    count_g = input.count('G')
    length = len(input)
    return (count_c + count_g) / length

print(f'GC Manual Methods : {gc_content(MAIN_SEQ)}')

GC Main Sequence : 0.5185185185185185
Manual Methods : 0.5185185185185185


In [17]:
# AT Content -> Calculate percentage A and T in the DNA

def at_content(input):
    count_a = input.count('A')
    count_t = input.count('T')
    length = len(input)
    return (count_a + count_t) / length

print(f'Manual AT Content : {at_content(MAIN_SEQ)}')

# AT Content using GC Content
def at_content_using_gc(input):
    return 1 - gc_content(input)

print(f'AT Content with GC Content : {at_content_using_gc(MAIN_SEQ)}')

Manual AT Content : 0.48148148148148145
AT Content with GC Content : 0.4814814814814815


In [20]:
from Bio.SeqUtils import MeltingTemp

# Melting Points -> Get affected by GC Content
# More GC Percentage then more DNA sequence will be stable

# 3 Method to calculate Melting Points: 

# Tm_Wallace (Rule of Thumb) [Not very accurate becauase it's only basic foundation]
# Tm_GC (Calculate based on GC) []
# Tm_NN (Calculate based on n earest neighbor thermodynamics)

# Tm_Wallace 
wallace_temp = MeltingTemp.Tm_Wallace(MAIN_SEQ)
print(f'Wallace Melting Point : {wallace_temp}')

# Tm_GC
gc_temp = MeltingTemp.Tm_GC(MAIN_SEQ)
print(f'GC Melting Point : {gc_temp}')

# Tm_NN
nn_temp = MeltingTemp.Tm_NN(MAIN_SEQ)
print(f'NN Melting Point : {nn_temp}')



Wallace Melting Point : 82.0
GC Melting Point : 58.939939109014944
NN Melting Point : 59.28973077206433


Base Theory : 

Molecular weight is the mass of a single molecule of a substance. It is measured in atomic mass units (amu)

In [22]:
# Molecular Weight

from Bio.SeqUtils import molecular_weight
weight = molecular_weight(MAIN_SEQ)
print(f'Molecular Weight : {weight}')

Molecular Weight : 8364.3339
