# DNA To Music
#### Author: Frank Escalante
#### Updated: 08/10/2024

In [134]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as img
import cv2
import random
import os

# Functions

In [135]:
def DNA_nmbrs(bases, seq_length):
    nucleotide = ''
    base_combos = []
    count = 0
    while len(base_combos) != len(bases)**seq_length:
        while len(nucleotide) < seq_length and len(nucleotide) != seq_length:
            base = random.choice(bases)
            nucleotide += base
        if nucleotide not in base_combos:
            base_combos.append((count, nucleotide))
            nucleotide = ''
            count += 1
        if nucleotide in base_combos:
            nucleotide = ''
    return len(base_combos), base_combos
                
        
def fib_seq(N,seq=[]):
    if N == 0:
        seq = [0]
    elif N == 1:
        seq = [0,1]
    elif N > 1:
        seq = [0,1]
        for i in range(2,N):
            next = seq[len(seq)-1] + seq[len(seq)-2]
            seq.append(next)
    return seq
   
        
def mkDNA(length, alphabet):
    if alphabet == 'STD':
        bits = 'ATGC'
    else:
        bits = alphabet
        
    max_numb = len(bits)
    lst = []
    for i in range(0,max_numb):
        lst.append(i)
    
    DNA = ''
    for i in range(0,length):
        select = random.choice(lst)
        base = bits[select]
        DNA += base
    
    return DNA

def music_notes(alphabet):
    if alphabet == 'STD':
        alphabet = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
    if alphabet == 'Expanded':
        alphabet = ['R','C','C#','D','D#','E','E#','F','F#','G','G#','A','A#','B','B#','X']
    music = []
    for i in range(0, len(alphabet)):
        music.append((i,alphabet[i]))
    return music

def quartenary(nmbr):
    nmbr_dict = {0: "A", 1: "C", 2: "G", 3: "T"}
    memory = []
    nucleotides = []
    if nmbr <= 3:
            remainder = nmbr % 4
            memory.append(0)
            memory.append(0)
            memory.append(remainder)
    else:
        fours = nmbr // 4
        if fours <= 3:
            memory.append(0)
            memory.append(fours)
            memory.append(nmbr % 4)
        elif fours > 3:
            sixteens = nmbr // 16
            memory.append(sixteens)
            remainder = nmbr % 16
            if remainder == 0:
                memory.append(0)
                memory.append(0)
            elif remainder > 0:
                memory.append(round(remainder // 4))
                memory.append(remainder % 4)
    
    for i in memory:
        nucleotide = nmbr_dict[i]
        nucleotides.append(nucleotide)
    
    codon = ''
    for i in nucleotides:
        codon += i
        
    return codon, memory

def mltplr(numbr, points, multiplier):
    modulus = len(points)
    original = numbr % modulus
    new = (numbr * multiplier) % modulus
    if original == 0:
        original = modulus
    if new == 0:
        new = modulus
    for i in points:
        if i[0] == original:
            index1 = i[0]
            DNA_codon1 = i[1]
            music_note1 = i[2]
        if i[0] == new:
            index2 = i[0]
            DNA_codon2 = i[1]
            music_note2 = i[2]
    seq = ''
    for i in points:
        if i[0] == abs(index2 - index1) % modulus:
            seq = i[1]
    if seq == '':
        seq = points[modulus-1][1]
    return (seq,(music_note1,music_note2))   
    
def DNAmusic(seq, points):
    count = 0
    codon = ''
    number_seq = []
    music_seq = []
    for i in range(0,len(seq)):
        if i % 3 != 0:
            codon += seq[i]
            count += 1
        if i % 3 == 0:
            codon += seq[i]
            count = 0
            for entry in points:
                trans_seq = entry[1]
                if trans_seq == codon:
                    number_seq.append(entry[0])
                    music_seq.append(entry[2])
            codon = ''
    return number_seq, music_seq

def music_nlyzr(music,nmbr_seq):
    unique_notes = list(set(music))
    unique_nmbrs = list(set(nmbr_seq))
    notes = {}
    nmbrs = {}
    for i in music:
        if i != list(notes.keys()):
            notes[i] = music.count(i)
    for i in nmbr_seq:
        if i != list(nmbrs.keys()):
            nmbrs[i] = nmbr_seq.count(i)
    return nmbrs, notes 

### Test Run

In [136]:
codons = []
for i in range(0,64):
    codons.append((i,quartenary(i)[0]))

notes = music_notes('Expanded')

cdn_nmbr_nte = []
count = 0
countr = 0
while count != len(codons):
    code = (count+1, codons[count][1], notes[countr][1])
    cdn_nmbr_nte.append(code)
    count += 1
    countr += 1
    if countr == 16:
         countr = 0
            
notes = []
DNAseq = ''
for i in range(0,64):
    output = mltplr(i,cdn_nmbr_nte,38)
    DNAseq += output[0]

GFP = 'ATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGA\
TGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACAAACGGAAAACTTACCCTTAAATTTATT\
TGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTGACTTATGGTGTTCAATGC\
TTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTAC\
AGGAAAGAACTATATCTTTCAAAGATGACGGGACCTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGA\
TACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAA\
TTGGAATACAACTTTAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTA\
ACTTCAAAATTAGACACAACGTTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCC\
AATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGAT\
CCCAACGAAAAGAGAGACCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGG\
ATGAACTATACAAAAGGCCTGCAGCAAACGACGAAAACTACGCTGCATCAGTT' 

RFP = 'ATGGTGAGCAAGGGCGAGGAGGATAACATGGCCATCATCAAGGAGTTCATGCGCTTCAAGGTGCACATGG\
AGGGCTCCGTGAACGGCCACGAGTTCGAGATCGAGGGCGAGGGCGAGGGCCGCCCCTACGAGGGCACCCA\
GACCGCCAAGCTGAAGGTGACCAAGGGTGGCCCCCTGCCCTTCGCCTGGGACATCCTGTCCCCTCAGTTC\
ATGTACGGCTCCAAGGCCTACGTGAAGCACCCCGCCGACATCCCCGACTACTTGAAGCTGTCCTTCCCCG\
AGGGCTTCAAGTGGGAGCGCGTGATGAACTTCGAGGACGGCGGCGTGGTGACCGTGACCCAGGACTCCTC\
CCTGCAGGACGGCGAGTTCATCTACAAGGTGAAGCTGCGCGGCACCAACTTCCCCTCCGACGGCCCCGTA\
ATGCAGAAGAAGACCATGGGCTGGGAGGCCTCCTCCGAGCGGATGTACCCCGAGGACGGCGCCCTGAAGG\
GCGAGATCAAGCAGAGGCTGAAGCTGAAGGACGGCGGCCACTACGACGCTGAGGTCAAGACCACCTACAA\
GGCCAAGAAGCCCGTGCAGCTGCCCGGCGCCTACAACGTCAACATCAAGTTGGACATCACCTCCCACAAC\
GAGGACTACACCATCGTGGAACAGTACGAACGCGCCGAGGGCCGCCACTCCACCGGCGGCATGGACGAGC\
TGTACAAGTAA'

music_trans1 = DNAmusic(GFP,cdn_nmbr_nte)
GFP_analysis = music_nlyzr(music_trans1[1],music_trans1[0])

music_trans2 = DNAmusic(RFP,cdn_nmbr_nte)
RFP_analysis = music_nlyzr(music_trans2[1],music_trans2[0])


In [154]:
print("The gene for GFP is approximately: " + str(len(GFP)) +" nucleotides long\
 or: " + str(len(GFP)/3) + " Amino Acids Long")
print("The gene for RFP is approximately: " + str(len(RFP)) +" nucleotides long\
 or: " + str(len(RFP)/3) + " Amino Acids Long")

# Generate a random nucleotide sequence 168 kbp long
syn_DNA = mkDNA(16800,'STD')

music_trans_synDNA = DNAmusic(syn_DNA,cdn_nmbr_nte)
synDNA_analysis = music_nlyzr(music_trans_synDNA[1],music_trans_synDNA[0])
print(music_trans_synDNA)
#print(synDNA_analysis)

The gene for GFP is approximately: 753 nucleotides long or: 251.0 Amino Acids Long
The gene for RFP is approximately: 711 nucleotides long or: 237.0 Amino Acids Long
([9, 50, 34, 50, 57, 43, 39, 3, 28, 46, 17, 56, 43, 30, 63, 54, 49, 42, 37, 32, 2, 13, 18, 23, 62, 17, 28, 23, 56, 53, 15, 25, 13, 42, 31, 16, 63, 37, 34, 5, 33, 48, 59, 63, 37, 45, 30, 58, 8, 43, 23, 22, 60, 59, 5, 14, 52, 28, 27, 25, 5, 56, 23, 43, 35, 18, 33, 28, 55, 39, 3, 10, 7, 42, 43, 57, 1, 4, 26, 47, 10, 14, 64, 62, 57, 24, 33, 41, 54, 5, 58, 18, 32, 18, 25, 37, 54, 43, 28, 12, 35, 60, 36, 35, 49, 34, 42, 50, 64, 32, 11, 32, 60, 21, 62, 30, 48, 31, 48, 52, 55, 5, 22, 64, 2, 33, 7, 60, 47, 32, 28, 63, 20, 53, 4, 15, 15, 60, 50, 62, 13, 41, 10, 48, 39, 10, 10, 53, 61, 2, 14, 15, 34, 1, 2, 22, 30, 60, 8, 20, 32, 31, 35, 21, 28, 43, 44, 64, 15, 4, 5, 57, 10, 55, 4, 45, 24, 39, 6, 4, 26, 29, 20, 12, 7, 42, 51, 3, 6, 53, 42, 14, 42, 46, 52, 62, 47, 52, 39, 58, 12, 31, 15, 9, 58, 12, 2, 21, 51, 29, 14, 53, 10, 17, 50, 36

### Works Cited
1) GFP Sequence

“Aequorea Victoria Isolate SGFP-206 Green Fluorescent Protein (GFP) 
          Gen - Nucleotide - NCBI.” National Center for Biotechnology 
          Information, U.S. National Library of Medicine, 
          www.ncbi.nlm.nih.gov/nuccore/JX472995.1?report=fasta. 
          Accessed 10 Aug. 2024. 
          
2) RFP Sequence

“Synthetic Construct Monomeric Red Fluorescent Protein Gene, Complete 
          C - Nucleotide - NCBI.” National Center for Biotechnology 
          Information, U.S. National Library of Medicine, 
          www.ncbi.nlm.nih.gov/nuccore/AY678264.1?report=fasta. 
          Accessed 10 Aug. 2024. 