# CS5002 Final progect Group 3

# 1. Sonification of DNA Algorythm

In [2]:
import soundfile as sf
from playsound import playsound
import numpy as np
import matplotlib.pyplot as plt

In [91]:
def nucleotides_to_sound(nucleotides):
    """
    Connects sequence of nucleotides to corresponding notes frequencies in Fm7 chord:
    Thymine (T) - F3 (174.61 Hz), Guanine (G) - Ab3 (207.65 Hz), Cytosine (C) - C4 (261.63 Hz), Adenine (A) - Eb4 (311.13 Hz). Or one octave higher:
    #Thymine (T) - F4 (349.23 Hz), Guanine (G) - Ab4 (415.30 Hz), Cytosine (C) - C5 (523.25 Hz), Adenine (A) - Eb5 (622.25 Hz).
    
    Args:
    - nucleotides (str): sequence of nucleotides in DNA

    Returns:
    - note_freqs (list of float): List of frequencies of the notes in Hz.
    """
    
    # Define the mapping of nucleotides to note frequencies
    note_mapping = {'A': 311.13, 'T': 174.61, 'C': 261.63, 'G': 207.65}
    # note_mapping = {'A': 622.25, 'T': 349.23, 'C': 523.25, 'G': 415.30}

    # transform DNA sequence into a list
    nuc_lst = list(nucleotides.upper())
    
    # Initialize an empty list to store note frequencies
    note_freqs = []
    
    # Iterate over the nucleotides
    for nuc in nuc_lst:
        note_freqs.append(note_mapping[nuc])
    
    return note_freqs



The F minor seven (Fm7) chord consists of four notes: the root note F, the minor third A♭, the perfect fifth C, and the minor seventh E♭. While one could choose any four notes theoretically, this specific chord is chosen for its pleasing and harmonious sound. Fm7 is often associated with moderate tragedy, light sadness, a touch of mysticism, and a sense of spaciousness.

In [25]:
# Example usage:
nucleotides = "GGGGGGGGGGGGGGGGTAAGGGGGG"
note_freqs = nucleotides_to_sound(nucleotides)
print(note_freqs)

[523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25, 349.23, 293.66, 293.66, 523.25, 523.25, 523.25, 523.25, 523.25, 523.25]


In [71]:
def generate_and_play_notes(note_freqs, duration, filename):
    """
    Generates sine wave notes at the specified frequencies, saves them to a WAV file, and plays the sequence.

    Args:
        note_freqs (list of float): List of frequencies of the notes in Hz.
        duration (float): The duration of each note in seconds.
        filename (str): The filename to save the WAV file.
    """
    filename = filename + ".wav"
    
    # Sampling rate (the number of samples (data points) captured per second)
    sampling_rate = 44100

    # Initialize an empty array to store audio samples
    audio_samples = np.array([])

    # Generate each note and concatenate to the audio samples
    for freq in note_freqs:
        # Define the time array
        t = np.linspace(0, duration, int(sampling_rate * duration), endpoint=False)

        # Generate the sine wave for the note
        note = np.sin(2 * np.pi * freq * t)

        # Concatenate the note to the audio samples
        audio_samples = np.append(audio_samples, note)

    # Reshape the audio samples to 2D array (1 channel)
    audio_samples = audio_samples.reshape(-1, 1)

    # Write the audio samples to a WAV file
    sf.write(filename, audio_samples, sampling_rate)

    # Play the generated audio
    playsound(filename)



In [33]:
# Example usage:
note_freqs = [440, 523.25,0, 659.25]  # A4, C5, E5 frequencies
generate_and_play_notes(note_freqs, 0.25, "sequence.wav") 

In [72]:
def play_dna(nucleotides, filename, duration = 0.25):
    """
    Generates and plays music corresponding to a sequence of nucleotides in DNA.

    This function converts a sequence of nucleotides in DNA to musical notes according to a predefined mapping. 
    It generates sine wave notes at the specified frequencies, saves them to a WAV file, and plays the sequence.

    Args:
        nucleotides (str): Sequence of nucleotides in DNA.
        filename (str): The filename to save the WAV file (without extension).
        duration (float, optional): The duration of each note in seconds. Defaults to 0.25.
    """
    # Convert DNA sequence to corresponding note frequencies
    note_freqs = nucleotides_to_sound(nucleotides)
    
    # Generate and play notes
    generate_and_play_notes(note_freqs, duration, filename)

# 2. Practice examples

Telomeres serve as caps that protect important information on chromosomes from damage during cell division. However, they shorten with each division, limiting the potential number of divisions a cell can undergo. This has led to a theory suggesting that the maximum potential lifespan of a human, even with all the benefits of medicine, is around 120 years. Beyond this point, chromosomes may lack sufficient protection at their ends.

In [84]:
play_dna("TTAGGGTTAGGGTTAGGG","Telomeric_DNA")

In [86]:
play_dna("TTTGGGTTTGGGTTTGGG","mutated_Telomeric_DNA") 
#inhibits proliferation of LOX melanoma and UM-UC-3 bladder cancer cells

Insulin is a crucial hormone in our body that helps control blood sugar levels. Genetic engineers were able to isolate (cut out) the gene for insulin from DNA and insert it into genetic code of bacteria (E. coli), allowing it to synthesize insulin that can be used for medical purposes.

In [90]:
play_dna("AGCCCTCCAGGACAGGCTGCATCAGAAGAGGCCATCAAGCAGGTCTGTTCCAAGGGCCTTT", "part_insulin")