# Data preparation

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import session_info

## Data load

In [2]:
df = pd.read_csv("../../data/tumor/B16F10.csv")
df = df.dropna(subset=["tumor_volume"]).copy()

In [3]:
target_experiments = ['Hosoi_2014_Fig1a', 'Hirano_2015_Fig1a']

B16F10 = df[
    (
        (df['Condition'] == 'Untreated') &
        (df['Experiment'].isin(target_experiments))
    )
    |
    (df['Condition'] == 'TST')
]

In [4]:
B16F10 = B16F10[B16F10["day"]<13]

In [5]:
B16F10.head()

Unnamed: 0,Experiment,Condition,id,day,mouse_cage,mouse_id,L (tumor 1),S (tumor 1),H (tumor 1),volume (tumor 1),L (tumor 2),S (tumor 2),H (tumor 2),volume (tumor 2),tumor_volume,remarks
0,Hosoi_2014_Fig1a,Untreated,1,-1,3,2,4.8,3.8,0.8,7.640353,,,,,7.640353,
1,Hosoi_2014_Fig1a,Untreated,2,-1,4,1,5.4,4.4,1.4,17.41699,,,,,17.41699,
2,Hosoi_2014_Fig1a,Untreated,3,-1,4,2,5.2,4.5,1.1,13.477432,,,,,13.477432,
3,Hosoi_2014_Fig1a,Untreated,4,-1,4,6,6.3,4.1,0.9,12.172101,,,,,12.172101,
4,Hosoi_2014_Fig1a,Untreated,5,-1,11,3,5.6,5.1,1.9,28.412564,,,,,28.412564,


In [6]:
def tumor_volume(B16F10, condition):
    Treat = B16F10[(B16F10['Condition'] == condition)] 
    return Treat.groupby('day')['tumor_volume'].agg(['mean','std']).reset_index()

In [7]:
tumor_volume(B16F10, 'Untreated')

Unnamed: 0,day,mean,std
0,-1,14.049883,5.741081
1,1,53.485929,21.887297
2,3,147.816333,53.061026
3,5,344.260854,120.806616
4,7,701.644665,199.108553
5,9,1262.894172,322.208266
6,11,2096.360954,371.230847


In [8]:
tumor_volume(B16F10, 'TST')

Unnamed: 0,day,mean,std
0,-1,25.005876,7.669906
1,1,95.960256,38.276162
2,3,186.367038,69.466151
3,5,166.122307,89.339344
4,7,220.996475,117.66282
5,9,430.94557,261.74742
6,11,797.126653,478.913216


In [9]:
im_path = "../../data/immune/B16_immune_cells.csv"
B16IM = pd.read_csv(im_path)
B16IM["cells/mg"] = B16IM["cells/g"]/1e3
B16IM = B16IM.dropna(subset=['cells/mg']).reset_index(drop=True)

im_ave = pd.DataFrame()
im_ave = B16IM.groupby(['day','cell_type','Condition'])['cells/mg'].agg(['mean','std']).reset_index()

In [10]:
def im_data(condition, cell_type):
    return im_ave[(im_ave["cell_type"]==cell_type) & (im_ave["Condition"]==condition)]

In [11]:
g = 25.4
def immune_cell_count(condition, cell_type, g):
    """
    condition: Experiment condition(eg. "Untreated")
    cell_type: Cell type("CD45")
    g: density
    return: Dataframe[day, N_mean, N_std]
    """
    tv = tumor_volume(B16F10, condition).rename(
        columns={'mean':'V_mean', 'std':'V_std'}
    )
    im = im_data(condition, cell_type).rename(
        columns={'mean':'I_mean', 'std':'I_std'}
    )
    df = pd.merge(tv, im[['day','I_mean','I_std']], on='day')
    df['N_mean'] = g * df['V_mean'] * df['I_mean']
    df['N_var']  = (g**2) * (
        (df['I_mean']**2) * (df['V_std']**2) +
        (df['V_mean']**2) * (df['I_std']**2) +
        (df['V_std']**2) * (df['I_std']**2)
    )
    df['N_std'] = np.sqrt(df['N_var'])
    
    return df[['day','N_mean','N_std']]

In [12]:
unt_v_days = tumor_volume(B16F10, 'Untreated')['day']
unt_v_data = tumor_volume(B16F10, 'Untreated')['mean'].to_numpy()*1e6
unt_v_std = tumor_volume(B16F10, 'Untreated')['std'].to_numpy()*1e6

unt_mdsc = immune_cell_count("Untreated", "CD11bGr1", g)
unt_mdsc_days = np.array([-1, 1, 3, 5, 7])
unt_mdsc_data = np.concatenate(([(unt_v_data[0]/1e6)*g*im_data('Untreated','CD11bGr1')['mean'].to_numpy()[1]],
                                unt_mdsc['N_mean'].to_numpy()))
unt_mdsc_std = np.concatenate(([0],unt_mdsc['N_std'].to_numpy()))

In [13]:
# ===== TST therapy =====
tst_mdsc = immune_cell_count("TST", "CD11bGr1", g)
tst_ctl = immune_cell_count("TST", "CTL", g)

v_days = tumor_volume(B16F10, 'TST')['day'].to_numpy()
v_data = tumor_volume(B16F10, 'TST')['mean'].to_numpy()*1e6
v_std = tumor_volume(B16F10, 'TST')['std'].to_numpy()*1e6

ctl_days = immune_cell_count("TST", "CTL", g)['day'].to_numpy()
ctl_data = immune_cell_count("TST", "CTL", g)['N_mean'].to_numpy()
ctl_std = immune_cell_count("TST", "CTL", g)['N_std'].to_numpy()

mdsc_days = np.array([-1, 1, 3, 5, 7])
mdsc_data = np.concatenate(([(v_data[0]/1e6)*g*im_data('TST','CD11bGr1')['mean'].to_numpy()[0]],
                            tst_mdsc['N_mean'].to_numpy()))
mdsc_std = np.concatenate(([0],tst_mdsc['N_std'].to_numpy()))

In [14]:
# plt.figure(figsize=(4, 3))
# plt.errorbar(unt_v_days, unt_v_data, yerr=unt_v_std, fmt='o', capsize=3, markersize=3, color='black', label='Tumor')
# plt.errorbar(unt_mdsc['day'], unt_mdsc['N_mean'], yerr=unt_mdsc['N_std'], fmt='o', capsize=3, markersize=3, color='tab:green',label='MDSC')
# plt.xlabel('Days', fontsize=12)
# plt.ylabel('Cell Number', fontsize=12)
# #plt.title('Untreated')
# plt.yscale('log')
# plt.legend()
# plt.tight_layout()

In [15]:
unt_v_data

array([1.40498830e+07, 5.34859291e+07, 1.47816333e+08, 3.44260854e+08,
       7.01644665e+08, 1.26289417e+09, 2.09636095e+09])

In [16]:
unt_mdsc['N_mean']

0    1.775118e+06
1    4.148788e+06
2    2.338563e+07
3    2.774412e+07
Name: N_mean, dtype: float64

In [17]:
# plt.figure(figsize=(4, 3))
# plt.errorbar(v_days, v_data, yerr=v_std, fmt='o', capsize=3, markersize=3, color='black', label='Tumor')
# plt.errorbar(ctl_days, ctl_data, yerr=ctl_std, fmt='o', capsize=3, markersize=3, color='tab:blue',label='CTL')
# plt.errorbar(mdsc_days, mdsc_data, yerr=mdsc_std, fmt='o', capsize=3, markersize=3, color='tab:green',label='MDSC')
# plt.xlabel('Days', fontsize=12)
# plt.ylabel('Cell Number', fontsize=12)
# #plt.title('CTL therapy')
# plt.yscale('log')
# plt.legend()
# plt.tight_layout()

In [18]:
v_days, v_data

(array([-1,  1,  3,  5,  7,  9, 11]),
 array([2.50058763e+07, 9.59602556e+07, 1.86367038e+08, 1.66122307e+08,
        2.20996475e+08, 4.30945570e+08, 7.97126653e+08]))

In [19]:
mdsc_days, mdsc_data

(array([-1,  1,  3,  5,  7]),
 array([  560780.93287041,  2152001.43167239, 40245898.01813048,
        28556129.17303456, 20503074.78313035]))