In [3]:
# Colab setup ------------------
import os, sys, subprocess
if "google.colab" in sys.modules:
    cmd = "pip install --upgrade iqplot bebi103 watermark"
    process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()
    data_path = "https://s3.amazonaws.com/bebi103.caltech.edu/data/"
else:
    data_path = "../data/"
# ------------------------------

Importing required packages. Sophia Vera

In [4]:
import pandas as pd
import numpy as np
import iqplot
import bokeh.io

bokeh.io.output_notebook()

This loads in the data frame from the file. Sophia Vera

In [5]:
#reads in the data from the excel sheet
df = pd.read_excel(os.path.join(data_path, "wang_cool_gal4.xlsx"), sheet_name = "Sheet1")

#creates a dictionary keying the old column titles to tuples of the driver type and temperature
multi = {n:(n[0:2].upper(),n[2:4]) for n in df.columns}

df = df.rename(columns = multi)
df.columns = pd.MultiIndex.from_tuples(df.columns, names = ('Pmyo-2::GAL4 Driver', 'Temperature (°C)'))

This here removes the NaNs from the table. Sophie Vera

In [6]:
datatable = df.fillna('')
#a readable version of the data table, with blank spaces f
datatable

Pmyo-2::GAL4 Driver,SC,SC,SC,SK,SK,SK,M3,M3,M3
Temperature (°C),15,20,25,15,20,25,15,20,25
0,1298.547,2737.135,16499.3,8978.031,14251.97,16593.96,20722.62,11703.01,17073.8
1,6798.146,12185.36,12672.19,11221.13,13616.94,6910.673,10373.52,17025.58,15379.37
2,10686.91,1532.901,15832.53,9460.2,17474.9,20934.66,21707.12,17499.83,16991.49
3,2018.864,2357.752,15065.03,9049.072,16042.57,18042.75,21750.27,15657.5,13393.35
4,1551.396,1888.596,16960.67,14132.81,12867.4,23189.56,20820.26,11383.33,7709.297
5,1682.306,5757.599,13609.09,13359.27,16677.19,20526.22,7395.207,8404.338,13274.08
6,5801.691,3796.529,15602.73,13160.89,13344.14,19782.51,22510.41,10261.6,13168.06
7,3485.557,4758.537,12163.93,11233.41,18016.01,21371.72,11121.24,10938.8,14285.94
8,5701.043,11756.49,13334.83,8949.01,13734.49,18062.91,31089.68,19347.14,12071.33
9,2584.733,1526.974,14128.31,12566.75,15299.76,19314.24,24028.75,14456.7,12538.9


This makes the data into a more workable format and removes blank spaces and produces a tidy dataframe here. Sophia Vera

In [7]:
#melting makes it easier to remove blanks 
df = pd.melt(df, value_name = 'Fluorescence Intensity (a.u.)')
df = df.dropna() 
df

Unnamed: 0,Pmyo-2::GAL4 Driver,Temperature (°C),Fluorescence Intensity (a.u.)
0,SC,15,1298.547
1,SC,15,6798.146
2,SC,15,10686.910
3,SC,15,2018.864
4,SC,15,1551.396
...,...,...,...
261,M3,25,14544.240
262,M3,25,12140.310
263,M3,25,11876.020
264,M3,25,15535.850


This simpy sends the data into seperate box and whiskers plots based on the multiindex headers. Juan Arvelo Sophia Vera

In [8]:
p = iqplot.stripbox(
    data = df,
    q = df.columns[2], 
    cats = [df.columns[0], df.columns[1]],
    color_column = df.columns[0],
    q_axis = "y",
    spread = "jitter",
    frame_height = 400,
    frame_width = 500,
    show_legend = True,
    legend_location = "top_right",
    x_axis_label = df.columns[1],
    title = "Flourescence at Different Temperatures by Driver"
)

bokeh.io.show(p)

b. Based on just the plots we can see that while SK the cool Gal4 matches the normal Gal4 SC in the 25° Celsius regime. At the lower 15° and 20° regimes the SK performs similarly to its 25° Celsius regime while SC sees a massive drop in expression. So I think this system is far better suited for the lower temperatures. Juan Arvelo



In [None]:
%load_ext watermark
%watermark -v -p pandas,bokeh,iqplot,jupyterlab

Python implementation: CPython
Python version       : 3.9.11
IPython version      : 8.4.0

pandas    : 1.4.4
bokeh     : 2.4.3
iqplot    : 0.3.2
jupyterlab: 3.4.4

