In [None]:
#this is where we import the tools we will use

import pandas as pd #pandas is a data management tools
import numpy as np #numpy make manipulating numbers easier

import seaborn as sns #seaborn makes plotting data very convenient!
import matplotlib.pyplot as plt #matplotlib is the base python plotting package seaborn is built on top off


#this is a formatting option so our figures are displayed in the jupyter notebook
%matplotlib inline

In [None]:
#load sample data from data folder

dat = pd.read_excel("data/sample_data.xlsx", sheet_name=1)

In [None]:
#display the data

dat

In [None]:
#looks overwhelming! how do we manipulate this data?
#lets try to visualize it 

sns.heatmap(___)

In [None]:
#that doesn't work... What does the error say?
#lets try to remove that column

dat.iloc[:, _:]

In [None]:
#lets try to visualize it again

sns.heatmap(dat.iloc[:, _:])
plt.show()

In [None]:
#looks strange but we are getting somewhere. 
#On the x-axis there are all 100 wells of the bioscreen plate.
#On the y-axis there are all of the timepoints

#I want to be able to visualize the bioscreen plate at each timepoint, let us say at 16 hours

dat

In [None]:
#we see that 16 hours corresponds with row 32 since the bioscreen read the plate every 30 minutes. 
#Lets pull that row out.

dat.iloc[__, :]

In [None]:
#lets also reformat this data so it looks like a bioscreen plate

#first copy only the values (not including the time or well labels) to a new variable "timepoint"
timepoint = dat.iloc[__, 1:]

timepoint

In [None]:
#notice the datatype is object, we must convert it to float (numerical)

timepoint = np.array(timepoint, dtype=float)

In [None]:
#make the data look like a bioscreen plate (10x10)
timepoint = timepoint.reshape(__, __)

timepoint

In [None]:
#lets try to plot it now

sns.heatmap(________)
plt.show()

In [None]:
#looks like a bioscreen plate!
#one problem is that the plate is the wrong way. We can solve this by transposing the matrix

sns.heatmap(________)
plt.show()

In [None]:
#we can do this all in one block of code now that we figured it out. Its nice to include comments

#get data
timepoint = dat.iloc[__, 1:]

#reformat data
timepoint = np.array(timepoint, dtype=float)
timepoint = timepoint.reshape(__, __)

#plot data
sns.heatmap(________)
plt.show()

In [None]:
#how do you get a different timepoint? lets say hour 20?

#get data
timepoint = dat.iloc[__, 1:]

#reformat data
timepoint = np.array(timepoint, dtype=float)
timepoint = timepoint.reshape(__, __)

#plot data
sns.heatmap(________)
plt.show()

In [None]:
#notice we only change one value. Lets make this a variable t, representing the number of hours 

t = 20

#get data
timepoint = dat.iloc[__, 1:]

#reformat data
timepoint = np.array(timepoint, dtype=float)
timepoint = timepoint.reshape(__, __)

#plot data
sns.heatmap(________)
plt.show()

In [1]:
#now lets write a loop to show us the data every 5 hours

for t in [5, 10, 15, 20]:
    print('hour ' + str(t))
    
    #____

hour 5
hour 10
hour 15
hour 20


In [None]:
#lets make it a little nicer by adding labels and scaling the data the same in each figure.

for t in [5, 10, 15, 20]:
    print('hour ' + str(t))
    
    #get data
    #_____
    #format data
    #_____
    
    #add labels
    timepoint.index = ['LB', 'A', 'G', 'T', 'C', 'U'] #labels for the y axis
    timepoint.columns = ['WT', 'WT', 'WT', '15Y', '15Y', '15Y'] #labels for the x axis
    
    #plot data
    #____

In [None]:
#we're not done yet! 
#we are happy with what the data looks like, so lets work on making growth curves
#lets remind ourselves of what the data looks like

dat

In [None]:
#lets get the columns that contain our data

#first, make a list of the wells we are interested in. 
wells = [1, 4, 11, 14, 21, 24, 31, 34, 41, 44, 51, 54]

wells

In [None]:
#notice I did this experiment in triplicate, so I can use this trick to fill the list with triplicates
wells = [1, 4, 11, 14, 21, 24, 31, 34, 41, 44, 51, 54]
wells = [[w, w+1, w+2] for w in wells] #make a temporary list with the triplicates

print(wells)
print()

wells = [item for sublist in wells for item in sublist] #flatten list

print(wells)

In [None]:
#add the 'Well-' before each number
wells = ['Well-'+str(w) for w in wells]

print(wells)

In [None]:
#now we add the labels - your experiment may differ. 
#we will use the same trick to fill in triplicates so no need to do so here
#labels should correspond with these positions: wells = [1, 4, 11, 14, 21, 24, 31, 34, 41, 44, 51, 54]

labels1 = ['WT', '15Y', 'WT', '15Y', 'WT', '15Y', 'WT', '15Y', 'WT', '15Y', 'WT', '15Y', ]

labels2 = ['LB', 'LB', 'A', 'A', 'G', 'G', 'T', 'T', 'C', 'C', 'U', 'U']

In [None]:
#trick to fill in triplicates
t = [[w + ' ' + y]*3 for w, y in zip(labels1, labels2)]
labels = [item for sublist in t for item in sublist] #flatten list

print(labels)

In [None]:
#get wells of interest previously created 'wells' variable
df = dat.loc[:, wells].apply(pd.to_numeric)

df

In [None]:
#add labels
df.columns = labels

df

In [None]:
#add a time variable, it is every half hour
df.loc[:, 'Time'] = [i/2 for i in range(len(df))]

df

In [None]:
#now for plotting! lets convert the dataframe to something seaborn can work with using melt
m = pd.melt(df, id_vars=['Time'], value_vars=labels)

m

In [None]:
#we can plot this as is
ax = sns.lineplot(x='Time', y='value', hue='variable', data=m)

ax.set_xlabel('Hours')
ax.set_ylabel('OD600')

In [None]:
#or we can improve this plot by splitting the variable into strain and condition
m['Strain'] = m.variable.str.split().str[0]
m['Media'] = m.variable.str.split().str[1]

m

In [None]:
ax = sns.lineplot(x='Time', y='value', hue='Media', style='Strain', data=m)

ax.set_xlabel('Hours')
ax.set_ylabel('OD600')

In [None]:
#we can add some improvements
plt.figure(figsize = (10, 6))

ax = sns.lineplot(x='Time', y='value', hue='Media', style='Strain', data=m, palette='deep')
sns.despine()

ax.set_xlabel('Hours')
ax.set_ylabel('OD600')