In [5]:
#import widget functionality
from __future__ import print_function
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import FloatSlider
from ipywidgets import IntSlider


#Load libraries
import pandas as pd
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt
import math as math
import line_profiler
%load_ext line_profiler


The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [6]:
#Creates array to hold the frequency of each barcode in the library
def my_data(barcode_amount, broadness, exponentiality, all_unique):
    data = np.zeros((barcode_amount, 2))

    num = 1
    for row in range(barcode_amount):
        data[row][0] = num
        num += 1

    for row in range(barcode_amount):
        #Needs tweaking so that the steepness can be modified to arrive later in the curve
        data[row][1] = exponentiality*math.exp(all_unique/broadness*(data[row][0]))
    
    #Produce cumulative sum of barcodes
    #np.cumsum(data[:,1], out=data[:,2])
    
    return data

#Calculates the probability of picking each barcode
def probability_distribution(barcode_distribution): 
    barcode_probability = np.zeros(len(barcode_distribution))
    counter = 0
    for barcode in barcode_distribution[:,1]:
        probability = barcode_distribution[counter,1] / sum(barcode_distribution[:,1])
        barcode_probability[counter] = probability
        counter += 1
    
    return barcode_probability

#Calculates the fraction of uniquely labelled cells, given the barcode probability distribution and number of infected cells
def fraction_unique(barcode_probability, cell_num):
    barcode_contribution = barcode_probability * (1 - (1 - barcode_probability)**(cell_num - 1))
    unique = 1 - sum(barcode_contribution)

    return unique

In [7]:
#Defines barcode distribution with editable parameters, outputting visualisation

def generate_data(n_bins, stride, barcode_amount, broadness, exponentiality, max_cells, all_unique):
    #Create subplots
    fig = plt.figure(figsize=(30,7))
    coord1 = 131
    coord2 = 132
    coord3 = 133

    #plot barcode library
    plt.subplot(coord1)
    plt.plot(my_data(barcode_amount, broadness, exponentiality, all_unique)[::stride,0], 
             my_data(barcode_amount, broadness, exponentiality, all_unique)[::stride,1], c='b', linewidth=4.0)
    plt.title('Barcode distribution', fontsize=20, pad=20)
    plt.xlabel('Barcode index', fontsize=16, labelpad=10)
    plt.ylabel('Barcode abundance', fontsize=16, labelpad=10)
    plt.xlim(0)
    #plt.ylim(-10, 10000)
    plt.grid(True)

    #plot cumulative fraction
    plt.subplot(coord2)
    n, bins, patches = plt.hist(my_data(barcode_amount, broadness, exponentiality, all_unique)[:,1], n_bins, density=True, histtype='step', cumulative=True, label='Empirical', color = 'b', linewidth=4.0)
    #Remove rightmost vertical line from histogram CDF
    patches[0].set_xy(patches[0].get_xy()[:-1])

    plt.title('Cumulative barcode distribution', fontsize=20, pad=20)
    plt.xlabel('Barcode abundance group', fontsize=16, labelpad=10)
    plt.ylabel('Cumulative fraction', fontsize=16, labelpad=10)
    plt.xlim(0)
    plt.ylim(0,1)
    plt.grid(True)
  
    global barcode_distribution
    barcode_distribution = my_data(barcode_amount, broadness, exponentiality, all_unique)
    
    #plot_unique_barcoded_cells
    cell_num_array = np.arange(1, max_cells, dtype = float)
    barcode_probability = probability_distribution(barcode_distribution)

    #Select a stride for picking numbers out of the cell_num_array and calculate the fraction unique only for these
    strided_array = []
    for num in cell_num_array:
        if not num % stride:
            strided_array.append(fraction_unique(barcode_probability, int(num)))
    strided_cells = np.arange(1, max_cells, stride)
    #disgusting bodge to make these two arrays the same size after striding
    strided_array = np.append(strided_array, strided_array[-1])

    plt.subplot(coord3)
    plt.plot(strided_cells, strided_array, c='b', linewidth=4.0)
    plt.title('Uniquely labeled cells', fontsize=20, pad=20)
    plt.xlabel('Number of infected starter cells', fontsize=16, labelpad=10)
    plt.ylabel('Fraction of uniquely labeled cells', fontsize=16, labelpad=10)
    plt.xlim(0)
    plt.ylim(0,1)
    plt.grid(True)

#Takes about 1 sec per thousand barcodes + 1 sec per thousand cells
style = {'description_width': 'initial'}
interact_plot = interact.options(manual=True, manual_name="Plot graphs")
interact_plot(generate_data, max_cells=IntSlider(description = 'Number of cells', style=style, min = 2, max = 100000, step = 2, value = 5000), 
                               barcode_amount=IntSlider(description = 'Number of barcodes', style=style, min = 10, max = 10000, step = 50, value = 2500, continuous_update=False),
                               n_bins=IntSlider(description = 'Number of bins', style=style, min = 1, max = 1000, value = 500),
                               stride=IntSlider(description = 'Stride', style=style, min = 2, max = 1000, step = 2, value = 100),  
                               broadness=IntSlider(description = 'Skew constant', style=style, min = 1, max = 1000, value = 500, continuous_update=False),
                               all_unique=IntSlider(description = 'Barcode skew', style=style, min = 0, max = 1, value = 1, continuous_update=False), 
                               exponentiality=FloatSlider(description = 'Exp', style=style, min = 0, max = 3, step = 1e-2, value = 1, continuous_update=False))

interactive(children=(IntSlider(value=500, description='Number of bins', max=1000, min=1, style=SliderStyle(de…

<function __main__.generate_data(n_bins, stride, barcode_amount, broadness, exponentiality, max_cells, all_unique)>

In [None]:
from barcode_functions import plot_unique_barcoded_cells
%lprun -f plot_unique_barcoded_cells plot_unique_barcoded_cells()