In [222]:
#SynMap2GCcontent
##Step 1 Importing FASTA files

####Import library
import os
from itertools import islice
from itertools import chain, repeat
import numpy as np
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Bar, Scatter, Figure, Layout
init_notebook_mode(connected=True)
# import sys
# from multiprocessing import Process


####Global variables 
header = []
nucleotides = []
window_size = 3


####File input/path
#my_file = open(os.path.expanduser('Plasmodium_chabaudi_chabaudi_strain_AS.faa'))
my_file = open(os.path.expanduser('Plasmodium__test.faa'))
#my_file = open(os.path.expanduser('Plasmodium_chabaudi_chabaudi_TEST2.faa'))
file_contents = my_file.read()
file_contents_lower = file_contents.lower()                                                    #Lowercases all strings in file
file_contents_lower_split = file_contents_lower.split()                                        #Split the whole file string into words
                                                                       

##Step 2 Separating the file from a single string into two lists    
for i in file_contents_lower_split:                                                            #Divides words into two lists: one with the simbol ">" and the other one without it
    if ">" in i:
        header.append(i)
    else:
        nucleotides.append(i)


def window(seq, window_size):
#Creates a sliding window rolling on the sequence
    foriter = []
    for n in seq:
        foriter.append(n)
    lst = iter(foriter)
    result = tuple(islice(lst, window_size))
    if len(result) == window_size:
        yield list(result)    
    for elem in lst:
        result = result[1:] + (elem,)
        yield list(result)


def AT_counter(seq):
#Performs a rolling calculation of AT% for the sequence in the corresponding sequence segment
    seq_tuple = window(seq, window_size)
    frag_list_at = []
    frag_lenghts = []
    for n in seq_tuple:    
        l = len(n)
        frag_lenghts.append(l)
        count_a = n.count('t')
        count_t = n.count('a')
        count_at = count_a+count_t
        percent_at = (float(count_at/l))*100
        frag_list_at.append(percent_at)
        percent_array_at = np.asarray(frag_list_at)
    return percent_array_at


def GC_counter(seq):
#Performs a rolling calculation of GC% for the sequence in the corresponding sequence segment
    seq_tuple = window(seq, window_size)
    frag_list_gc = []
    frag_lenghts = []
    for n in seq_tuple:    
        l = len(n)
        frag_lenghts.append(l)
        count_g = n.count('g')
        count_c = n.count('c')
        count_gc = count_g+count_c
        percent_gc = (float(count_gc/l))*100
        frag_list_gc.append(percent_gc)
        percent_array_gc = np.asarray(frag_list_gc)
    return percent_array_gc


def X_counter(seq):
#Performs a rolling calculation of X% for the sequence in the corresponding sequence segment
    seq_tuple = window(seq, window_size)
    frag_list_x = []
    frag_lenghts = []
    for n in seq_tuple:    
        l = len(n)
        frag_lenghts.append(l)
        count_x = n.count('x')
        percent_x = (float(count_x/l))*100
        frag_list_x.append(percent_x)
        percent_array_x = np.asarray(frag_list_x)
    return percent_array_x


def N_counter(seq):
#Performs a rolling calculation of N% for the sequence in the corresponding sequence segment
    seq_tuple = window(seq, window_size)
    frag_list_n = []
    frag_lenghts = []
    for n in seq_tuple:    
        l = len(n)
        frag_lenghts.append(l)
        count_n = n.count('n')
        percent_n = (float(count_n/l))*100
        frag_list_n.append(percent_n)
        percent_array_n = np.asarray(frag_list_n)
    return percent_array_n


def positioner(seq):
#Loops for every nucleotide per sequence, counts the number of positions and adds them to the position list
    l = 0
    position = []
    for l in range(0,len(seq)):                                                              
        l = l+1
        position.append(l)
        position_array = np.asarray(position)
    return position_array



def plotly(nucleo):
    for seq, name in zip(nucleotides,header):
        AT_cnt_arr = AT_counter(seq)        
        GC_cnt_arr = GC_counter(seq)
        N_cnt_arr = N_counter(seq)
        X_cnt_arr = X_counter(seq)
        pos_arr = positioner(seq)
               
        AT_trace = Scatter(
        y = AT_cnt_arr,
        x = pos_arr,
        name='AT content',
        line = dict(color = ('rgb(3,141,243)'))) #Blue
        
        GC_trace = Scatter(
        y = GC_cnt_arr,
        x = pos_arr,
        name='GC content',
        line = dict(color = ('rgb(64,182,77)'))) #Green
        
        
        N_trace = Scatter(
        y = N_cnt_arr,
        x = pos_arr,
        name='N content',
        line = dict(color = ('rgb(243,145,3)'))) #Orange
        
        X_trace = Scatter(
        y = X_cnt_arr,
        x = pos_arr,
        name='X content',
        line = dict(color = ('rgb(171,3,243)'))) #Purple
    
    
        data = [AT_trace, GC_trace, N_trace, X_trace]  

        
        layout = dict(title='Sliding window for '+ name,xaxis=dict(title='Window iteration',rangeslider=dict()),yaxis=dict(title='Percentage (%)'))
        fig = dict(data=data, layout=layout)
        iplot(fig)
    
    return

    
plotly(nucleotides)