In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import yaml
import matplotlib as mpl
from scipy import stats

In [None]:
with open("../data/resources/rcParams.yaml") as f:
    rcParamsDict = yaml.full_load(f)
    for k in rcParamsDict["rcParams"]:
        print("{} {}".format(k,rcParamsDict["rcParams"][k]))
        plt.rcParams[k] = rcParamsDict["rcParams"][k]
    for k1 in set(list(rcParamsDict)).difference(set(["rcParams"])):
        print("{} {}".format(k1,rcParamsDict[k1]))

In [None]:
df = pd.read_csv('../../data/csv/organoidMultiplexing_growthCurves_quant.csv')

In [None]:
df = df[df.Line != 'MIX7']

donor_map_names = {i:j for i, j in zip(df['Line'], df['Line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
df['Line'] = df['Line'].map(donor_map_names)

In [None]:
line_palette = {
 'CTL01A': '#DBB807',
    'CTL08A': '#0FB248',
    'CTL04E': '#FF0054',
    'CTL02A': '#7B00FF',
'H9': '#72190E',
 'H1': '#994F88',
 'CTL05A': '#1965B0',
 'CTL07C': '#437DBF',
 'CTL06F': '#CAE0AB',
 'CTL09A': '#FFFF00',
 'KTD8.2': '#E65518',
 'UCSFi001-A': '#7BAFDE',
'MIX1':'#EF6F6C',
'MIX2':'#EF6F6C',
'MIX3':'#EF6F6C',
'MIX4':'#EF6F6C',
'MIX5':'#EF6F6C',
'MIX6':'#EF6F6C',
'MIX8':'#EF6F6C',}

# Lineplot area not normalized

In [None]:
sns.lineplot(data = df, x = 'Day', y = 'Area(microns)', hue = 'Line', palette=line_palette)
plt.legend(bbox_to_anchor = (1,1))

In [None]:
fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.9})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]
    highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='Area(microns)', hue='Line', palette=palette, ax = ax, legend = None)
    ax.set_title(l)
    
plt.show()

In [None]:
# plotting according to order in CENSUS Seq day 5
order =  ["CTL02A", 
"H1", 
"KTD8.2", 
"CTL05A", 
"CTL07C", 
"CTL04E", 
"CTL08A", 
"CTL06F", 
"UCSFi001-A", 
"H9", 
"CTL01A", 
"CTL09A"]

fig, axs = plt.subplots(2, 5, figsize = (20,7.5), gridspec_kw = {'hspace': 0.4, 'wspace': 0.4})
axs = axs.flatten().T


for l, ax in zip(order, axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='Area(microns)', hue='Line', palette=palette, ax = ax, legend = None)
    ax.set_title(l)
    
plt.show()

## Plot only organoids generated from Mix of hPSC

In [None]:
fig, axs = plt.subplots(2, 4, figsize = (25,7.5), gridspec_kw = {'hspace': 0.4, 'wspace': 0.3})
axs = axs.flatten().T

only_mix = df[df.Line.str.startswith('MIX')]
only_mix_ordered = only_mix.Line.unique().tolist()
only_mix_ordered.sort()

for l, ax in zip(only_mix_ordered, axs):
    
    palette = {line: '#d3d3d350' for line in only_mix_ordered}
    palette[l] = line_palette[l]

    sns.lineplot(data=only_mix, x='Day', y='Area(microns)', hue='Line', palette=palette, ax = ax, legend = None)

    ax.set_title(l)
    
plt.show()

# Lineplots normalized area
In the following plots the area of each organoid is normalized on the area they had at day 0.

In [None]:
sns.lineplot(data = df, x = 'Day', y = 'AreaNorm', hue = 'Line', palette=line_palette)
plt.legend(bbox_to_anchor = (1,1))

In [None]:
g = sns.FacetGrid(df, col="Line", col_wrap=5, hue = 'Line', palette=line_palette)
g.map_dataframe(sns.lineplot, x="Day", y = 'AreaNorm')

In [None]:
fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='AreaNorm', hue='Line', palette=palette, ax = ax, legend = None)

    ax.set_title(l)
    
plt.show()

In [None]:
import warnings

warnings.filterwarnings('ignore')

In [None]:
order = ["CTL02A"
,"H1"
,"KTD8.2"
,"CTL07C"
,"CTL05A"
,"CTL08A"
,"CTL04E"
,"CTL06F"
,"CTL09A"
,"H9"
,"UCSFi001-A"
,"CTL01A"]

fig, axs = plt.subplots(3, 4, figsize = (20,12))
axs = axs.flatten().T


for l, ax in zip(order, axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]
    highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='AreaNorm', hue='Line', palette=palette, ax = ax, legend = None)
    
    ax.set_title(l, fontsize = 30)
    ax.set_ylabel('Area Normalized', fontsize = 20)
    ax.set_xlabel('Day', fontsize = 20)
    _ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
    _ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
    
plt.tight_layout()
plt.savefig('./figures/CBO_GC_pureLines.svg', dpi = 300, bbox_inches = 'tight')
plt.show()

## Plot only organoids generated from Mix of hPSC

In [None]:
fig, axs = plt.subplots(2, 4, figsize = (25,10))
axs = axs.flatten().T

only_mix = df[df.Line.str.startswith('MIX')]
only_mix_ordered = only_mix.Line.unique().tolist()
only_mix_ordered.sort()

for l, ax in zip(only_mix_ordered, axs):
    
    palette = {line: '#d3d3d350' for line in only_mix_ordered}
    palette[l] = line_palette[l]

    sns.lineplot(data=only_mix, x='Day', y='AreaNorm', hue='Line', palette=palette, ax = ax, legend = None)
    #ax.legend('off')

    ax.set_title(l, fontsize = 30)
    ax.set_ylabel('Area Normalized', fontsize = 20)
    ax.set_xlabel('Day', fontsize = 20)
    _ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
    _ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)

plt.tight_layout()
plt.savefig('./figures/CBO_GC_MIX.svg', bbox_inches = 'tight')
plt.show()

## Plot only organoids not generated from Mix of hPSC

In [None]:
fig, axs = plt.subplots(3, 4, figsize = (25,15))
axs = axs.flatten().T

only_mix = df[~df.Line.str.startswith('MIX')]
only_mix_ordered = only_mix.Line.unique().tolist()
only_mix_ordered.sort()

for l, ax in zip(only_mix_ordered, axs):
    
    palette = {line: '#d3d3d350' for line in only_mix_ordered}
    palette[l] = line_palette[l]

    sns.lineplot(data=only_mix, x='Day', y='AreaNorm', hue='Line', palette=palette, ax = ax, legend = None)
    #ax.legend('off')

    ax.set_title(l)

plt.tight_layout()
plt.show()

# Discrete derivative and cumulative

For each replicate of each line, I'm computing here:
* the discrete derivative of the area (`AreaDeriv`)
* the discrete derivate the normalized area (`AreaNormDeriv`)
* the cumulative sum of the discrete derivative of the area (`AreaCumul`)
* the cumulative sum of the discrete derivative of the normalized area (`AreaNormCumul`)
* the cumulative sum of the area (`cumulativeArea`)

In [None]:
df.index = df.FileName

smoothed = pd.Series()
derivative = pd.Series()
cumulative = pd.Series()

derivative_not_norm = pd.Series()
cumulative_not_norm = pd.Series()

cumulative_area = pd.Series()

for l in df.LineRep.unique():
    

    highlighted_data = df[df['LineRep'] == l]
    #print(l)
    highlighted_data = highlighted_data.sort_values(by = 'Day')
    
    highlighted_data['derivative'] = highlighted_data['AreaNorm'].diff() / highlighted_data['Day'].diff()
    highlighted_data['cumulative'] = highlighted_data['derivative'].cumsum()
    
    highlighted_data['cumulativeArea'] = highlighted_data['area'].cumsum()
    
    highlighted_data['derivative_not_norm'] = highlighted_data['area'].diff() / highlighted_data['Day'].diff()
    highlighted_data['cumulative_not_norm'] = highlighted_data['derivative_not_norm'].cumsum()
    
    derivative = derivative._append(highlighted_data['derivative'])
    cumulative = cumulative._append(highlighted_data['cumulative'])
    
    derivative_not_norm = derivative_not_norm._append(highlighted_data['derivative_not_norm'])
    cumulative_not_norm = cumulative_not_norm._append(highlighted_data['cumulative_not_norm'])

    cumulative_area = cumulative_area._append(highlighted_data['cumulativeArea'])

In [None]:
df['AreaNormDeriv'] = derivative
df['AreaNormCumul'] = cumulative

df['AreaDeriv'] = derivative_not_norm
df['AreaCumul'] = cumulative_not_norm

df['cumulativeArea'] = cumulative_area

## Derivative of the area 

In [None]:

fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    #highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='AreaDeriv', hue='Line', palette=palette, ax = ax, markers='.', legend = None)
    ax.legend('')

    ax.set_title(l)

plt.tight_layout()
plt.show()

## Derivative of the normalized area 

In [None]:
fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    #highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='AreaNormDeriv', hue='Line', palette=palette, ax = ax, markers='.', legend = None)
    ax.set_title(l)

plt.tight_layout()
plt.show()

## Cumulative measure of the area

In [None]:
fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    sns.lineplot(data=df, x='Day', y='cumulativeArea', hue='Line', palette=palette, ax = ax, markers='.', legend = None)
    ax.set_title(l)

plt.tight_layout()
plt.show()

In [None]:
order =  ['H1',
 'CTL07C',
 'CTL05A',
 'CTL04E',
 'CTL08A',
 'CTL06F',
 'UCSFi001-A',
 'H9',
 'CTL01A',
 'CTL09A']

fig, axs = plt.subplots(2, 5, figsize = (20,7.5), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T


for l, ax in zip(order, axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='cumulativeArea', hue='Line', palette=palette, ax = ax, legend = None)

    ax.set_title(l)
    
plt.tight_layout()
plt.show()

## Cumulative sum of the discrete derivative of the area

In [None]:

fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.8})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    #highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='AreaCumul', hue='Line', palette=palette, ax = ax, markers='.')
    ax.legend('')

    ax.set_title(l)

plt.tight_layout()
plt.show()

In [None]:
# only MIX
fig, axs = plt.subplots(2, 4, figsize = (20,7.5), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T

only_mix = df[df.Line.str.startswith('MIX')]

for l, ax in zip(only_mix.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in only_mix['Line'].unique()}
    palette[l] = line_palette[l]

    sns.lineplot(data=only_mix, x='Day', y='AreaCumul', hue='Line', palette=palette, ax = ax, legend = None)

    ax.set_title(l)
    
plt.show()

## Cumulative sum of the discrete derivative of the normalized area

In [None]:

fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.8})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    #highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='AreaNormCumul', hue='Line', palette=palette, ax = ax, markers='.', legend = None)

    ax.set_title(l)
    
plt.show()

In [None]:
# only MIX
fig, axs = plt.subplots(2, 4, figsize = (20,7.5), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T

only_mix = df[df.Line.str.startswith('MIX')]

for l, ax in zip(only_mix.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in only_mix['Line'].unique()}
    palette[l] = line_palette[l]

    sns.lineplot(data=only_mix, x='Day', y='AreaNormCumul', hue='Line', palette=palette, ax = ax, legend = None)

    ax.set_title(l)
    
plt.show()

In [None]:
fig, axs = plt.subplots(4, 5, figsize = (20,15), gridspec_kw = {'hspace': 0.4})
axs = axs.flatten().T


for l, ax in zip(df.Line.unique(), axs):
    
    palette = {line: '#d3d3d350' for line in df['Line'].unique()}
    palette[l] = line_palette[l]

    #highlighted_data = df[df['Line'] == l]

    sns.lineplot(data=df, x='Day', y='AreaNormCumul', hue='Line', palette=palette, ax = ax, markers='.', legend = None)

    ax.set_title(l)
    
plt.show()

# Fit linear models
I'm fitting here a linear model between two points to get the estimate of a certain value at unmeasured time points that could match CENSUS-Seq data (ex. day 5).
Remember that the linear model will have the following formula:

`y = ax + b`

## 2 - 6 days on discrete derivative of the Normalized Area

We will have as outputs:
* `a`, that is the slope of the linear model. Here will correspond to the __discrete acceleration__ between day 2 and 6
* if we input 5 as `x`, we can get the __discrete derivative (approximating the velocity)__ of the area at day 5

In [None]:
slopes = {}
day_5_value = {}

day_2_6 = df[df.Day.isin([2, 4, 6])]

for l in day_2_6.Line.unique():
    #print(l)
    sub = day_2_6[day_2_6.Line == l]
    #print(sub)
    slopes[l] = {}
    slopes[l]['slope'], slopes[l]['intercept'] = stats.linregress(sub['Day'], sub['AreaNormDeriv'])[0], stats.linregress(sub['Day'], sub['AreaNormDeriv'])[1]
    day_5_value[l] = slopes[l]['slope']*5 + slopes[l]['intercept']

Order by __area__ at day 5:

In [None]:
day_5_value_df = pd.DataFrame(day_5_value.values(), day_5_value.keys())
day_5_value_df.columns = ['area_day_5']
day_5_value_df.sort_values(by = 'area_day_5').drop([i for i in day_5_value_df.index if i.startswith('MIX')])

# Fit linear model 2 - 6 days - Normalized Area

We will have as outputs:
* `a`, that is the slope of the linear model. Here will correspond to the __discrete derivative (approximating the velocity)__ between day 2 and 6
* if we input 5 as `x`, we can get the __area__ at day 5

In [None]:
slopes = {}
day_5_value = {}

day_2_6 = df[df.Day.isin([2, 4, 6])]

for l in day_2_6.Line.unique():
    #print(l)
    sub = day_2_6[day_2_6.Line == l]
    #print(sub)
    slopes[l] = {}
    slopes[l]['slope'], slopes[l]['intercept'] = stats.linregress(sub['Day'], sub['AreaNorm'])[0], stats.linregress(sub['Day'], sub['AreaNorm'])[1]
    day_5_value[l] = slopes[l]['slope']*5 + slopes[l]['intercept']

Order by __area__ at day 5:

In [None]:
day_5_value_df = pd.DataFrame(day_5_value.values(), day_5_value.keys())
day_5_value_df.columns = ['area_day_5']
day_5_value_df.sort_values(by = 'area_day_5').drop([i for i in day_5_value_df.index if i.startswith('MIX')])

Order by __slope__ (discrete __velocity__):

In [None]:
slopes_26_df = pd.DataFrame.from_dict(slopes).T
slopes_26_df.sort_values(by = 'slope').drop([i for i in day_5_value_df.index if i.startswith('MIX')])

In [None]:
slopes_26_df.sort_values(by = 'slope').drop([i for i in day_5_value_df.index if i.startswith('MIX')]).to_csv('../../data/csv/CBO_day26_fit.csv')

In [None]:
pd.options.display.float_format = '{:.8f}'.format
np.round(slopes_26_df.sort_values(by = 'slope').drop([i for i in day_5_value_df.index if i.startswith('MIX')]), 8)

# Fit linear model 0 - 2 days - Normalized Area

We will have as outputs:
* `a`, that is the slope of the linear model. Here will correspond to the __discrete derivative (approximating the velocity)__ between day 0 and 2

In [None]:
slopes_02 = {}
#day__value = {}

day_02 = df[df.Day.isin([0, 2])]

for l in day_02.Line.unique():
    #print(l)
    sub = day_02[day_02.Line == l]
    #print(sub)
    slopes_02[l] = {}
    slopes_02[l]['slope'], slopes_02[l]['intercept'] = stats.linregress(sub['Day'], sub['AreaNorm'])[0], stats.linregress(sub['Day'], sub['AreaNorm'])[1]

In [None]:
slopes_02_df = pd.DataFrame.from_dict(slopes_02).T
slopes_02_df.sort_values(by = 'slope').drop([i for i in day_5_value_df.index if i.startswith('MIX')])

In [None]:
slopes_02_df.sort_values(by = 'slope').drop([i for i in day_5_value_df.index if i.startswith('MIX')]).to_csv('../../data/csv/CBO_day02_fit.csv')

### Differences in slope
We can compute the difference in the slopes, to model whether differences in the __rate of growth__ between day 0 and and between day 2 and 6 is informative of a specific capacity of growth. 


In [None]:
differenceSlopes = pd.concat([slopes_26_df['slope'], slopes_02_df['slope']], axis = 1)
differenceSlopes.columns = ['slope2-6', 'slope0-2']

In [None]:
results = differenceSlopes['slope2-6'] - differenceSlopes['slope0-2']
results.sort_values()

In [None]:
results = differenceSlopes['slope2-6'] + differenceSlopes['slope0-2']
results.sort_values()