In [25]:
import numpy as np  
import matplotlib.pyplot as plt  
import scipy
from scipy.stats import norm
from collections import Counter
from ipywidgets import interactive, IntSlider

In [19]:
def read_sequence(filename):
    with open(filename,'r') as file:
        sequence = file.read()
        sequence = sequence.replace("\n","")
    return sequence

In [20]:
def kmer_frequency(sequence,length):
    output = {}
    for i in range(0,len(sequence)-length+1):
        fragment = sequence[i:i+length]
        count = sequence.count(fragment)
        output[fragment] = count
    return output

In [36]:
def bar_interactive(length):
    sequence = read_sequence(filename)
    output = kmer_frequency(sequence,length)
    top10 = dict(Counter(output).most_common(10))
    
    #Plot the bar chart for top 10 values
    fig1 = plt.figure(figsize = (10,5))
    ax = fig1.add_subplot(1,1,1)
    ax.bar(list(top10.keys()), top10.values(), color = "b")
    ax.set_xlabel("Fragments",fontsize = 15)
    ax.set_ylabel("Counts",fontsize = 15)

In [37]:
def hist_interactive(length):
    sequence = read_sequence(filename)
    output = kmer_frequency(sequence,length)
    
    fig2 = plt.figure(figsize = (5,5))
    ax1 = fig2.add_subplot(1,1,1)
    
    _, bins, _ = ax1.hist(output.values(),bins = 100, density=1, alpha=0.5,label = "bins")
    
    mu, sigma = scipy.stats.norm.fit(list(output.values()))
    best_fit_line = scipy.stats.norm.pdf(bins, mu, sigma)
    ax1.plot(bins, best_fit_line, label = 'trendline')
    ax1.set_xlabel("Counts",fontsize = 15)
    ax1.set_ylabel("Proportion", fontsize = 15)
    ax1.set_title(f"Mean = {mu:.2f}, SD = {sigma:.2f}",fontsize = 20)

In [38]:
%matplotlib inline
filename = '/Users/apple/Desktop/eve.txt'
sequence = read_sequence(filename)
length = 5

# create interactive sliders for kmer sequence
length_widget = IntSlider(min = 1, max = len(sequence), value = length)

# adjust settings to prevent continous recalculation and update of plot while user drags widget
for item in [ length_widget ]:
    item.continuous_update = False

interactive(bar_interactive, length = length_widget)

interactive(children=(IntSlider(value=5, continuous_update=False, description='length', max=1406, min=1), Outp…

In [39]:
%matplotlib inline
# create interactive sliders for kmer sequence
length_widget = IntSlider(min = 1, max = len(sequence), value = length)

# adjust settings to prevent continous recalculation and update of plot while user drags widget
for item in [ length_widget ]:
    item.continuous_update = False

interactive(hist_interactive, length = length_widget)

interactive(children=(IntSlider(value=5, continuous_update=False, description='length', max=1406, min=1), Outp…