# Lab 6

In [1]:
import numpy as np
import pandas as pd 
import seaborn as sns 
import scipy as sci
import matplotlib as mp
import matplotlib.pyplot as plt 

from scipy import stats
from textwrap import wrap

%matplotlib inline
%config InlineBackend.figure_format = 'pdf'                                      

## Import data

In [2]:
KS = pd.read_excel('/Users/kev/Downloads/BCMB 301A Lab Techniques/Lab 6 - Study of reversible inhibition using beta-galactosidase/Data/kevin.xlsx',
                  header=1)

DI = pd.read_excel('/Users/kev/Downloads/BCMB 301A Lab Techniques/Lab 6 - Study of reversible inhibition using beta-galactosidase/Data/Daniel.xlsx',
                  header=13, index_col=1)

TC = pd.read_excel('/Users/kev/Downloads/BCMB 301A Lab Techniques/Lab 6 - Study of reversible inhibition using beta-galactosidase/Data/Tyler H.xlsx',
                  header=1)

## Clean up and analyze data

In [3]:
def abs_ave(D, x0=1, xf=9, dx=10, y0=0, yf=4, n=9, Nanx=[], Nany=[], 
            Inh="Name IDIOT!!!", Unit="Units DUMBASS!!!", 
            ONPG = [10, 20, 40, 80], I=[0, 10, 20, 40], C=1, V=200, time=4):
    # 2D data size
    x = xf-x0
    y = yf-y0
    
    # Turn into 3d array
    # Axis 1 = Substrate
    # Axis 2 = Inhibitor
    # Axis 3 = Time
    A = np.zeros((x, y, n))
    for i in range(0,n):
        a = np.array(D.iloc[x0+dx*i:xf+dx*i, y0:yf])
        A[:,:,i] = a 
    
    # Find average change in absorbance with time
    Diff = np.diff(A, axis=2)
    Ave = np.nanmean(Diff, axis=2)
    Ave = pd.DataFrame(Ave)
    
    # Get rid of errors
    if len(Nanx)>0:
        for i in range(0, len(Nanx)):
            Ave.iloc[Nanx[i], Nany[i]] = np.nan
    else: 
        Ave += 0
        
    # Get averages of change with respect to time
    Abs = np.zeros((int(x/2), y))
    for i in range(0, int(x/2)):
        Ave_abs = np.nanmean(Ave.iloc[2*i:2*i+2,:], axis=0)
        Abs[i,:] = Ave_abs

    # Label columns
    Abs = pd.DataFrame(Abs)
    I_c = [i*C/V for i in I]
    Abs.columns = [r'{0}: {1:#.3g} {2}'.format(Inh, i, Unit) for i in I_c]
    ABS = Abs.T
    S_c = [i * 4/V for i in ONPG]
    ABS.columns = S_c
    return ABS.T*time, Ave*time, np.array(S_c), np.array(I_c)

In [4]:
KS_abs, KS_ave, KS_Sc, KS_Ic = abs_ave(D=KS, Nanx = [0, 7, 4], Nany = [0, 0, 1], Inh='[Sucrose]', Unit='moles/l')
TC_abs, TC_ave, TC_Sc, TC_Ic = abs_ave(D=TC, Nanx = [1, 1, 3], Nany = [0, 1, 1], Inh='[Calcium]', C=200, Unit='g/l')
DI_abs, DI_ave, DI_Sc, DI_Ic = abs_ave(D=DI, Nanx = [3, 4], Nany = [3, 3], x0=1, xf=9, y0=1, yf=5, dx=12, Inh='[IPTG]', C=2, Unit='g/l')

In [5]:
# KS_abs

In [6]:
# TC_abs

In [7]:
# DI_abs

## Functions to generate graphs

In [8]:
P = ['xkcd:lightish green', 
     'xkcd:medium green', 
     'xkcd:jungle green', 
     'xkcd:deep green']

# Fit enzyme
# (Michaelis–Menten kinetics)
def enz_func(x, a, b):
    y = (a*x)/(b + x)
    return y

# Curve fit 
def curve_fit(f, x, D, Colour=P, append=False):
    x = np.array(x)
    X = x
    xmax = np.max(x)
    xmin = np.min(x)
    xrange = np.linspace(0, xmax, 200)
    Popt = []
    Rerr = []
    for i in range(0, np.shape(D)[0]):
        y = np.array(D.iloc[:, i])
        Y = y
        popt, pcov = sci.optimize.curve_fit(f, X, Y, method='dogbox')
        Popt.append(popt)
        plt.plot(xrange, f(xrange, *popt), c=Colour[i])
        
        # Get R^2 values 
        Res = Y - f(X, *popt)
        ss_res = np.sum(Res**2)
        ss_tot = np.sum((Y - np.mean(Y))**2)
        Rsq = 1 - (ss_res/ss_tot)
        Rerr.append(Rsq)
    return Popt, Rerr

# linear fit 
def lin_fit(A, x_val, res=100, Colour=P, show_xint=False, cus_xmin=False):    
        xInts = []
        yInts = []
        R2 = []
        for i in range(0, np.shape(A)[1]):
            slope, intercept, r_value, p_value, std_err = stats.linregress(x_val, A.iloc[:,i]) 
            if show_xint == True:
                if cus_xmin == False: 
                    xmin = -intercept/slope
                else: 
                    xmin = cus_xmin
            else: 
                xmin = np.min(x_val)
            xmax = np.max(x_val)
            xrange = np.linspace(xmin, xmax, 100)
            y = slope*xrange + intercept
            xInts.append(-intercept/slope)
            yInts.append(intercept)
            R2.append(r_value**2)
            plt.plot(xrange, y, color=Colour[i])
        return xInts, yInts, R2
    
# Solve the system of linear equations
def solv_LinEq(xInts, yInts):
    Slope = -np.array(yInts)/np.array(xInts)
    a = np.array([-Slope, np.zeros_like(Slope)+1]).T
    b = np.array(yInts)
    Ints = []
    for i in range(0, len(yInts)-1):
        for j in range(i+1, len(yInts)):
            Sol = np.linalg.solve(np.array([a[i], a[j]]), np.array([b[i], b[j]]))
            Ints.append(Sol)
    return np.array(Ints)

# Normal plot 
def plot_norm(D, f, xname='[ONPG] (mg/ml)', yname=r'$v_0$ (min$^{-1}$)', 
              plot_fit=False, Colour=P, append=False, plot_max=False):
    fig, ax = plt.subplots(1, 1, figsize=(10, 5))
    Lines = sns.scatterplot(data=D,
                            markers=['o', 'o', 'o', 'o'], 
                            palette=Colour)
    A = D.T
    A_name = list(A.columns.values)
    GL = Lines.get_legend_handles_labels()
    
    # Plot curve fit 
    if plot_fit == True:
        Popt, Rerr = curve_fit(f, x=A_name, D=D, append=append)
    
    # Plot ylimit as x goes to infty
    if plot_max == True: 
        for i in range(0, len(Popt)):
            HLines = ax.hlines(Popt[i][0], 0, np.max(A_name), 
                               color=Colour[i], ls='--', 
                               label=r'$V_{{Max}}$={0:#.3G} min$^{{-1}}$'.format(Popt[i][0]))
        for j in range(0, len(Popt)):    
            VLines = ax.vlines(Popt[j][1], ymin=0, ymax=0.5*Popt[j][0], 
                               ls=':', color=Colour[j],
                               label=r'$K_M={0:#.3G}$ mg/ml;  R$^2$={1:#.3G}'.format(Popt[j][1], Rerr[j]))
        tickspace = np.arange(0, np.max(A_name)+0.2, 0.2)
        plt.xticks(tickspace, np.around(tickspace, decimals=1))

    ax.legend(fontsize=9, ncol=3)
    plt.xlabel(xname, fontsize=12)
    plt.ylabel(yname, fontsize=12)
    plt.grid()
    
# Lineweaver-Burk plot
def plot_LB(D, xname=r'$[ONPG]^{-1}$ (ml/mg)', 
            yname=r'$v_0^{-1}$ (min)', Colour=P, show_xint=False):
    
    A = D.T
    A_name = list(A.columns.values)
    D_name = [1/i for i in A_name]
    A.columns = D_name
    D = 1/(A.T)
    palette=Colour
    
    # Plot it 
    plot_norm(D, xname, yname, plot_fit=False)
    xInts, yInts, R2 = lin_fit(A=D, x_val=D_name, show_xint=show_xint)
    
    # Legend name
    D_col_name = list(D.columns.values)
    V_max = [1/i for i in yInts]
    K_m = [-1/i for i in xInts]
    legend_label = [r'{0}; $V_{{Max}}=${1:#.3G} min$^{{-1}}$; $K_M=${2:#.3G} mg/ml; R$^2$={3:#.3G}'.format(D_col_name[i], 
                    V_max[i], K_m[i], R2[i]) for i in range(0, len(K_m))]

    plt.legend(legend_label, fontsize=9.5)
    plt.xlabel(xname, fontsize=12)
    plt.ylabel(yname, fontsize=12)
    return D

# DIXON!!!!!
def plot_DX(A, Colour=P, S_con=None, I_con=None, Alt=False, show_xint=False, 
            cus_xmin=False, show_int=False, var_lim=5, int_cut=[0, 10**10],
            Sub='[ONPG]', xname='INHIBITOR!!!', yname=r'$v_0^{-1}$ (min)', 
            units='UNITS SAVES LIVES!!!'):
        
        # Alternative Dixon plot 
        if Alt ==True:
            D = 1/A.T * S_con
            yname = r'$[S] \cdot v_0^{-1}$ (min $\cdot$ g/l)'
            Alt_Eq = '[S] $\cdot$ '
        
        else:
            D = 1/A.T
            Alt_Eq = ''
        
        # Naming columns
        D.columns = [r'{0}: {1:#.3G} g/l'.format(Sub, i) for i in list(S_con)]
        
        D2 = D.T
        D2.columns = list(I_con)
        D3 = D2.T
        
        plt.figure(figsize=(10, 5))
        DX = sns.scatterplot(data=D3,
                            markers=['o', 'o', 'o', 'o'], 
                            palette=Colour)
        
        # Finding linear regression
        xInts, yInts, R2 = lin_fit(A=D3, x_val=I_con, show_xint=show_xint, cus_xmin=cus_xmin)
        
        Slope = -np.array(yInts)/np.array(xInts)
        D_col_name = list(D3.columns.values)
        legend_label = [r'{0}; {4}$v_0^{{-1}}$ = {1:#.3G}[S] + {2:#.3G}; R$^2=${3:#.3G}'.format(D_col_name[i], 
                        Slope[i], yInts[i], R2[i], Alt_Eq) for i in range(0, len(xInts))]
        
        # Find system of linear equations interception points  
        Ints = solv_LinEq(xInts, yInts)
        
        # To show interception or not to show 
        if show_int == True:
            Kave = np.mean(Ints[int_cut[0]: int_cut[1]], axis=0)
            Kstd = np.var(Ints[int_cut[0]: int_cut[1]], axis=0)**0.5
            
            # Limit the acceptable standard deviation
            if Kstd[0] <= var_lim:
                plt.errorbar(Kave[0], Kave[1], xerr=Kstd[0], color='xkcd:very dark green', 
                             capsize=4, zorder=10)
                plt.plot([Kave[0], Kave[0]], [0, Kave[1]], 
                         linestyle=':', color='xkcd:very dark green', zorder=10)
                if Alt == True: 
                    K_name = label=r'$K_{{ies}}$ = {0:#.3G} $\pm$ {1:#.3G} {2}'.format(-Kave[0], Kstd[0], units)
                else:
                    K_name = label=r'$K_i$ = {0:#.3G} $\pm$ {1:#.3G} {2}'.format(-Kave[0], Kstd[0], units)
                legend_label.append(K_name)
                
            elif Kstd[0] > var_lim:   
                plt.plot(list(I_con), D3.iloc[:,1], linewidth=0)
                if Alt == True:
                    K_name = r'$K_{ies}$ does not exist'
                else: 
                    K_name = r'$K_{i}$ does not exist'
                legend_label.append(K_name) 
                

        plt.xlabel('{0} ({1})'.format(xname, units), fontsize=12)
        plt.ylabel(yname, fontsize=12)
        plt.legend(legend_label, fontsize=9.5)
        plt.grid()
        return D2.T, Ints

In [9]:
# KS_LB

In [10]:
# Plot it out 
# ===========

# Note: V_max between graphs inconsintent 
plot_norm(KS_abs, enz_func, plot_fit=True, plot_max=True)
plot_norm(TC_abs, enz_func, plot_fit=True, plot_max=True)
plot_norm(DI_abs, enz_func, plot_fit=True, plot_max=True)

KS_LB = plot_LB(KS_abs, show_xint=True)
TC_LB = plot_LB(TC_abs, show_xint=True)
DI_LB = plot_LB(DI_abs, show_xint=True)

KS_DX1, KS_Int1 = plot_DX(KS_abs, S_con=KS_Sc, I_con=KS_Ic, xname='[Sucrose]', units='mmoles/ml',
                          show_int=True, var_lim=0.01) 
TC_DX1, TC_Int1 = plot_DX(TC_abs, S_con=TC_Sc, I_con=TC_Ic, xname='[Calcium]', units='mg/ml',
                          show_int=True, show_xint=True, cus_xmin = -5, int_cut=[1, -1]) 
DI_DX1, DI_Int1 = plot_DX(DI_abs, S_con=DI_Sc, I_con=DI_Ic, xname='[IPTG]',units='mg/ml',
                          show_int=True, show_xint=True, cus_xmin = -0.1) 

KS_DX2, KS_Int2 = plot_DX(KS_abs, S_con=KS_Sc, I_con=KS_Ic, xname='[Sucrose]', units='mmoles/ml',
                          show_int=True, Alt=True, show_xint=True, cus_xmin=-0.4)
TC_DX2, TC_Int2 = plot_DX(TC_abs, S_con=TC_Sc, I_con=TC_Ic, xname='[Calcium]', units='mg/ml',
                          show_int=True, Alt=True, show_xint=True, cus_xmin=-60, int_cut=[1, 100], var_lim=50) 
DI_DX2, DI_Int2 = plot_DX(DI_abs, S_con=DI_Sc, I_con=DI_Ic, xname='[IPTG]', units='mg/ml',
                          show_int=True, Alt=True, show_xint=True)

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>

<Figure size 720x360 with 1 Axes>