# Rosalind - Bioinformatics Stronghold

In [4]:
from itertools import product

import numpy as np
import pandas as pd

In [5]:
def open_FASTA_file(file_name):
    file = open(file_name + ".txt")
    file_items = {}
    item = ""
    for line in file:
        if line[0] == ">":
            ID = line.strip("\n")
            file_items[ID] = ""
        else:
            file_items[ID] +=line.strip("\n")
        
    return file_items

## LCSM - Finding a Shared Motif

In [46]:
motifs = open_FASTA_file("sample")
motifs = list(motifs.values())

def shortest_motif(motifs):
    shortest_motif_length = min(list(map(len, motifs)))
    for motif in motifs:
        if len(motif) == shortest_motif_length:
            return motif       

def create_frames(reference, size):
    frames = []
    for frame in range(len(reference)):
        if frame + size <= len(reference):
            frames.append(reference[frame:frame+size])
    frames = list(set(frames))

    return frames

def find_longest_common_substring(motifs, size):
    if size > 0:
        reference_motif = shortest_motif(motifs=motifs)
        frames = create_frames(reference_motif, size)

        for frame in frames:
            is_substring = []
            for motif in motifs:
                is_substring.append(motif.find(frame))
            
            if -1 not in is_substring:
                return frame
        
        return find_longest_common_substring(motifs, size-1)
    else:
        return -1

#size = len(shortest_motif(motifs))
#print(find_longest_common_substring(motifs, size=size))


TTGGGTAGAGTTGGTCTAGCGGAAAATAAATCGAGTCACGAGACCGATAAGTCTTGACGCCGGGGGAACCCACTCCCGAGTA


## REVP - Locating Restriction Sites

In [16]:
def find_complimentary_DNA(template):
    forward = "ATGC"
    reverse = "TACG"
    table = template.maketrans(forward, reverse)

    return template.translate(table)

def is_palindrome(sequence):
    return sequence[::-1] == find_complimentary_DNA(sequence)

def find_palindromes(size):
    products = list(product("ATGC", repeat=size))
    products = ["".join(item) for item in products]
    palindromes = [DNA for DNA in products if DNA[::-1] == find_complimentary_DNA(DNA)]
    
    return palindromes

def create_palindromes (start_size, end_size):
    palindromes = []
    for size in range(start_size, end_size+1, 2):
        palindromes += find_palindromes(size)

    return palindromes

def save_palindromes_to_txt(palindromes):
    palindromes = ",".join(palindromes)
    file = open("palindromes.txt", "w")
    file.write(palindromes)
    file.close()

#save_palindromes_to_txt(create_palindromes(4, 12))

def load_palindromes(file_name):
    file = open(file_name + ".txt")
    file = file.read()
    palindromes = file.split(",")

    return palindromes

def find_restriction_sites(sequence, restriction_sites):
    for site in restriction_sites:
        location = sequence.find(site)
        while location != -1:
            print(location+1, len(site))
            location = sequence.find(site, location+1)

#palindromes = load_palindromes("palindromes")
#sequence = open_FASTA_file("rosalind_revp")
#sequence = list(sequence.values())[0]
#find_restriction_sites(sequence=sequence, restriction_sites=palindromes)




365 4
502 4
839 4
275 4
663 4
701 4
911 4
152 4
796 4
177 4
230 4
833 4
872 4
700 4
93 4
504 4
252 4
294 4
456 4
257 4
378 4
451 4
595 4
670 4
846 4
26 4
147 4
482 4
627 4
235 4
370 4
437 4
444 4
591 4
756 4
866 4
105 4
527 4
737 4
764 4
1 4
164 4
357 4
409 4
411 4
645 4
827 4
154 4
215 4
583 4
57 4
358 4
410 4
515 4
644 4
767 4
455 6
669 6
436 6
104 6
845 6
626 6
755 6
251 6
293 6
763 6
409 6
871 6
454 8
668 8
435 8
453 10
667 10
452 12


## PRTM - Calculating Protein Mass

In [9]:
mass_table_file = r"C:\Users\vente\OneDrive\Documents\Code\bioinformatics_challenges\databases\monoisotopic_mass_table_amino_acids.txt"
mass_table_file = list(map(lambda x: x.split(), open(mass_table_file).read().split("\n")))

mass_table = {}
map(lambda x: mass_table.update({x[0]: x[1]}), mass_table_file)
mass_table_file[0][1]


'71.03711'