In [3]:
from alefuncs import *

In [81]:
## Draft implementation
from collections import Counter

def xna_calc(sequence, t='dsDNA', p=0):
    '''str => dict
    Return basic "biomath" calculations based on the input sequence.
    Arguments:
        t (type) :'ssDNA' or 'dsDNA'
        p (phosphates): 0,1,2
        #in case if ssDNA having 3'P, you should pass 2 i.e., 2 phospates present in 1 dsDNA molecule 
    '''
    r = {}
    
    #check inputs
    c = Counter(sequence.upper())
    for k in c.keys():
        if k in 'ACGNT':
            pass
        else:
            raise ValueError(f'Wrong sequence passed: "sequence" contains invalid characters, only "ATCGN" are allowed.')
    if t not in ['ssDNA','dsDNA']:
        raise ValueError(f'Wrong DNA type passed: "t" can be "ssDNA" or "dsDNA". "{t}" was passed instead.')
    if not 0 <= p <= 2:
        raise ValueError(f'Wrong number of 5\'-phosphates passed: "p" must be an integer from 0 to 4. {p} was passed instead.')
    
    
    ##Calculate:
    
    #length
    r['len'] = len(sequence)
    

    #molecular weight
    #still unsure about what is the best method to do this
    
    #s = 'ACTGACTGACTATATTCGCGATCGATGCGCTAGCTCGTACGC'
    #bioinformatics.org : 25986.8  Da
    #Thermo             : 25854.8  Da 
    #Promega            : 27720.0  Da 
    #MolBioTools        : 25828.77 Da
    #This function      : 25828.86 Da #Similar to OligoCalc implementation
    
    #DNA Molecular Weight (typically for synthesized DNA oligonucleotides.
    #The OligoCalc DNA MW calculations assume that there is not a 5' monophosphate)
    #Anhydrous Molecular Weight = (An x 313.21) + (Tn x 304.2) + (Cn x 289.18) + (Gn x 329.21) - 61.96
    #An, Tn, Cn, and Gn are the number of each respective nucleotide within the polynucleotide.
    #The subtraction of 61.96 gm/mole from the oligonucleotide molecular weight takes into account the removal
    #of HPO2 (63.98) and the addition of two hydrogens (2.02).
    #Alternatively, you could think of this of the removal of a phosphate and the addition of a hydroxyl,
    #since this formula calculates the molecular weight of 5' and 3' hydroxylated oligonucleotides.
    #Please note: this calculation works well for synthesized oligonucleotides.
    #If you would like an accurate MW for restriction enzyme cut DNA, please use:
    #Molecular Weight = (An x 313.21) + (Tn x 304.2) + (Cn x 289.18) + (Gn x 329.21) - 61.96 + 79.0
    #The addition of 79.0 gm/mole to the oligonucleotide molecular weight takes into account the 5' monophosphate
    #left by most restriction enzymes.
    #No phosphate is present at the 5' end of strands made by primer extension,
    #so no adjustment to the OligoCalc DNA MW calculation is necessary for primer extensions.
    #That means that for ssDNA, you need to add 79.0 to the value calculated by OligoCalc
    #to get the weight with a 5' monophosphate.
    #Finally, if you need to calculate the molecular weight of phosphorylated dsDNA,
    #don't forget to adjust both strands. You can automatically perform either addition
    #by selecting the Phosphorylated option from the 5' modification select list.
    #Please note that the chemical modifications are only valid for DNA and may not be valid for RNA
    #due to differences in the linkage chemistry, and also due to the lack of the 5' phosphates
    #from synthetic RNA molecules. RNA Molecular Weight (for instance from an RNA transcript).
    #The OligoCalc RNA MW calculations assume that there is a 5' triphosphate on the molecule)
    #Molecular Weight = (An x 329.21) + (Un x 306.17) + (Cn x 305.18) + (Gn x 345.21) + 159.0
    #An, Un, Cn, and Gn are the number of each respective nucleotide within the polynucleotide.
    #Addition of 159.0 gm/mole to the molecular weight takes into account the 5' triphosphate.
    
    if t == 'ssDNA':
        mw = ((c['A']*313.21)+(c['T']*304.2)+(c['C']*289.18)+(c['G']*329.21)+(c['N']*303.7)-61.96)+(p*79.0)
        
    elif t =='dsDNA':
        mw_F = ((c['A']*313.21)+(c['T']*304.2)+(c['C']*289.18)+(c['G']*329.21)+(c['N']*303.7)-61.96)+(p*79.0)
        d = Counter(complement(sequence.upper())) #complement sequence
        mw_R = ((d['A']*313.21)+(d['T']*304.2)+(d['C']*289.18)+(d['G']*329.21)+(d['N']*303.7)-61.96)+(p*79.0)
        mw = mw_F + mw_R
    elif t == 'ssRNA':
        pass
    elif t == 'dsRNA':
        pass
    else:
        return ValueError(f'Nucleic acid type not understood: "{t}"')
        
    r['MW in Daltons'] = mw
    
    #in ng
    r['MW in ng'] = mw * 1.6605402e-15
    
    #molecules in 1ng
    r['molecules per ng'] = 1/r['MW in ng']
    
    #ng for 10e10 molecules
    r['ng per billion molecules'] = (10**9)/r['molecules per ng'] #(1 billions)
    
    #moles per ng
    r['moles per ng'] = (r['MW in ng'] * mw)
    return r

In [76]:
S1_WT  = 'AGGCTGGGGCACAGCAGGCCAGTGTGCAGGGTGGCAAGTGGCTCCTGACCTGGAGTCTTCCAGTGTGATGATGGTGAGGATGGGCCTCCGGTTCATGCCGCCCATGCAGGAACTGTTACACATGTAGTTGTAGTGGATGGTGGTACAGTCAGAGCCAACCTAGGAGATAACACAGGCCCAAGATGAGGCCAGTGCGCCTT'
S1_MUT = 'AGGCTGGGGCACAGCAGGCCAGTGTGCAGGGTGGCAAGTGGCTCCTGACCTGGAGTCTTCCAGTGTGAAGATGGTGAGGATGGGCCTCCGGTTCATGCCGCCCATGCAGGAACTGTTACACATGTAGTTGTCGTGGATGGTGGTACAGTCAGAGCCAACCTAGGAGATAACACAGGCCCAAGATGAGGCCAGTGCGCCTT'
s = 'ACTGACTGACTATATTCGCGATCGATGCGCTAGCTCGTACGC'

In [80]:
xna_calc(S1_MUT,'ssDNA',p=0)

{'MW in Daltons': 62194.229999999996,
 'MW in ng': 1.0327601912304599e-10,
 'len': 200,
 'molecules per ng': 9682789949.606516,
 'moles per ng': 6.42317248682312e-06,
 'ng per billion molecules': 0.10327601912304599}

In [59]:
xna_calc(S1_MUT,'ssDNA',p=0)

{'MW in Daltons': 62256.0,
 'MW in ng': 1.033785906912e-10,
 'len': 200,
 'molecules per ng': 9673182748.12895,
 'moles per ng': 6.435937542071347e-06,
 'ng per billion molecules': 0.1033785906912}