This notebook extracts tabulated data from CSV files (e.g. gel quantification data) and plots results in a format that's consistent with the other Python plots in the paper.

In [None]:
# load modules

# uncomment for debugging
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.core.debugger import set_trace

import os, sys, inspect
import matplotlib
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from scipy import stats

import seaborn as sns


In [None]:
# Set up plot export and plotting styles

# Plotting and figure saving params
save_figs = True
save_dir = '../reports/figures/Plots_From_csv'
    
# create save figure dir and set up figure/font sizes
if save_figs:
    %matplotlib
    matplotlib.rcParams['figure.figsize'] = 1.6, 1.4
    save_dir_pdf = os.path.join(save_dir, 'pdf')
    if not os.path.exists(save_dir_pdf):
        os.makedirs(save_dir_pdf)
    
    # Set up fonts
    matplotlib.rc("font", family="Arial")

    matplotlib.rcParams['pdf.fonttype'] = 42 # Make fonts editable
    matplotlib.rcParams['axes.linewidth']= 0.5
    matplotlib.rcParams['lines.linewidth'] = 0.5

    SMALL_SIZE = 5
    MEDIUM_SIZE = 6
    BIGGER_SIZE = 7

    plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
    plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
    plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
    plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
    plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
    
    
else:
    %matplotlib inline
    matplotlib.rcParams['figure.figsize'] = 6, 4



In [None]:
# Load XBP1 splicing data
data_dir = '../data/processed/Gel_quantification'
xbp1_spliced_filename = 'XBP1_splicing_rawData_01.csv'

xbp1s_full_file = os.path.join(data_dir, xbp1_spliced_filename)

xbp1s_raw = pd.read_csv(xbp1s_full_file)

print('Loaded')

In [None]:
# Create a dataframe with one row per sample

sample_IDs = xbp1s_raw['Sample'].unique()

samples = pd.DataFrame(data=sample_IDs, columns=['sample_ID'])

gels = xbp1s_raw['File_name'].unique()

# Apply background correction
for sample_ID in sample_IDs:
    sample_data = xbp1s_raw[xbp1s_raw['Sample'] == sample_ID]
    
    i_spliced_bc = []
    i_unspliced_bc = []
    for gel in gels:
        gel_data = sample_data[sample_data['File_name'] == gel]
        
        s = gel_data.loc[gel_data['Band'] == 'spliced', 'Mean'].item()
        u = gel_data.loc[gel_data['Band'] == 'unspliced', 'Mean'].item()
        b = gel_data.loc[gel_data['Band'] == 'bkgnd', 'Mean'].item()
        
        #apply background correction
        i_spliced_bc.append(s-b)
        i_unspliced_bc.append(u-b)
    
    # calculate spliced/unspliced ratios and stats for this sample
    i_s = np.asarray(i_spliced_bc)
    i_u = np.asarray(i_unspliced_bc)
    spliced_frac = np.divide (i_s, (i_s + i_u))
    s_over_u = np.divide (i_s, i_u)
    
    sf_mean = np.mean(spliced_frac)
    sf_std = np.std(spliced_frac)
    
    su_mean = np.mean(s_over_u)
    su_std = np.std(s_over_u)
    
    sample_row = samples['sample_ID'] == sample_ID
    samples.at[sample_row, 'spliced_frac'] = sf_mean
    samples.at[sample_row, 'spliced_frac_std'] = sf_std
    samples.at[sample_row, 's_over_u'] = su_mean
    samples.at[sample_row, 's_over_u_std'] = su_std
        
#print(samples)


# Plot summary
data_to_plot = samples

x = data_to_plot['sample_ID']
y = data_to_plot['spliced_frac']
yerr = data_to_plot['spliced_frac_std']

ax = sns.barplot(x=x, y=y, yerr=yerr,  color='steelblue')
plt.show()


In [None]:
# Plot specific columns

#excluded_samples = []
#excluded_samples = ['BT1-2C1', 'BT1-2A1', 'BT1-2A2', 'BT1-2A3', 'BT1-2A4', 'BT1-2B1', 'BT1-2B2',
# 'BT1-2B3', 'BT1-2B4', 'BT1-3A1', 'BT1-3A2', 'BT1-3A3', 'BT1-3A4', 'BT1-3B1',
# 'BT1-3B2', 'BT1-3B3', 'BT1-3B4', 'BT1-2C2', 'BT1-3C1', 'BT1-3C2', 'BT1-3C3',
# 'BT1-3C4']

excluded_samples = ['BT1-2C1', 'BT1-3A1', 'BT1-3A2', 'BT1-3A3', 'BT1-3A4','BT1-3B1',
 'BT1-3B2', 'BT1-3B3', 'BT1-3B4', 'BT1-2C2', 'BT1-3C1', 'BT1-3C2', 'BT1-3C3',
 'BT1-3C4']

sample_rows_plot = ~samples['sample_ID'].isin(excluded_samples)
data_to_plot = samples.loc[sample_rows_plot]

x = data_to_plot['sample_ID']
y = data_to_plot['s_over_u']
yerr = data_to_plot['s_over_u_std']

fig, ax1 = plt.subplots()
fig.tight_layout(pad=2)

ax1 = sns.barplot(x=x, y=y, color='steelblue', yerr=yerr)

ax1.set_ylabel('XBP1s/XBP1u')

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'XBP1s_XBP1u_fraction.pdf')
    plt.savefig(fig_filename_pdf)
    plt.show()


In [None]:
# Plot specific columns

#excluded_samples = []
#excluded_samples = ['BT1-2C1', 'BT1-2A1', 'BT1-2A2', 'BT1-2A3', 'BT1-2A4', 'BT1-2B1', 'BT1-2B2',
# 'BT1-2B3', 'BT1-2B4', 'BT1-3A1', 'BT1-3A2', 'BT1-3A3', 'BT1-3A4', 'BT1-3B1',
# 'BT1-3B2', 'BT1-3B3', 'BT1-3B4', 'BT1-2C2', 'BT1-3C1', 'BT1-3C2', 'BT1-3C3',
# 'BT1-3C4']

excluded_samples = ['BT1-2C1', 'BT1-2A1', 'BT1-2A2', 'BT1-2A3', 'BT1-2A4', 'BT1-2B1', 'BT1-2B2',
 'BT1-2B3', 'BT1-2B4','BT1-3B1',
 'BT1-3B2', 'BT1-3B3', 'BT1-3B4', 'BT1-2C2', 'BT1-3C1', 'BT1-3C2', 'BT1-3C3',
 'BT1-3C4']

sample_rows_plot = ~samples['sample_ID'].isin(excluded_samples)
data_to_plot = samples.loc[sample_rows_plot]

x = data_to_plot['sample_ID']
y = data_to_plot['s_over_u']
yerr = data_to_plot['s_over_u_std']

fig, ax2 = plt.subplots()
fig.tight_layout(pad=2)

ax2 = sns.barplot(x=x, y=y, color='steelblue', yerr=yerr)

ax2.set_ylabel('XBP1s/XBP1u')

if save_figs:
    fig_filename_pdf = os.path.join(save_dir_pdf, 'XBP1s_XBP1u_fraction_WT_only.pdf')
    plt.savefig(fig_filename_pdf)
    plt.show()
