In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [39]:
"""
first, we need to import all the data in ov_datasets.csv
which has gathered all tumor volume data in McCart 2021 et al. with webplotdigitizer

data structure:
1. the first 20 columns are the tumor volume under vvDD condition
   specifying x (time) and y (tumor volume) for each two columns, totalling 10 samples
   the first row can be ignored
   the 1-5 rows are the 5 time points for all 10 samples
2. the next 20 columns are the tumor volume under normal/PBS condition with similar structure
3. the last four columns are the mean and stde data of the tumor volume under vvDD condition and normal/PBS condition
   each two columns specify one condition, respectively the x and y
   each three rows specify one time point: mean, stde+, and stde-
   again, the first row can be ignored

"""

df = pd.read_csv('/Users/yuhongliu/Documents/OV/data/ov_datasets_v7.csv')

# Extract tumor volume data for vvDD condition
tumor_vol_vvDD = df.iloc[1:6, 1:20:2].values  # Select every other column starting from the second column (Y values)
# Create a DataFrame with known time points and extracted tumor volumes
tumor_vol_vvDD_df = pd.DataFrame(tumor_vol_vvDD, index=[0, 1, 2, 3, 4], columns=[f'vvDD_sample_{i+1}' for i in range(10)])

# Extract tumor volume data for normal/PBS condition
tumor_vol_pbs = df.iloc[1:6, 21:40:2].values  # Select every other column starting from the 21 column (Y values)
# Create a DataFrame with known time points and extracted tumor volumes
tumor_vol_pbs_df = pd.DataFrame(tumor_vol_pbs, index=[0, 1, 2, 3, 4], columns=[f'pbs_sample_{i+1}' for i in range(10)])

# Extract mean and standard deviation data for vvDD and PBS conditions, for comparison with the two dataframes above
vvDD_mean_stde_data = df.iloc[1:, -3].values
pbs_mean_stde_data = df.iloc[1:, -1].values
# Create DataFrames for vvDD and PBS data
vvDD_mean_stde_df = pd.DataFrame(vvDD_mean_stde_data.reshape(-1, 3), 
                                 index=[0, 1, 2, 3, 4], 
                                 columns=['mean', 'stde_plus', 'stde_minus'])

pbs_mean_stde_df = pd.DataFrame(pbs_mean_stde_data.reshape(-1, 3), 
                                index=[0, 1, 2, 3, 4], 
                                columns=['mean', 'stde_plus', 'stde_minus'])

# convert data to numeric type
tumor_vol_vvDD_df = tumor_vol_vvDD_df.apply(pd.to_numeric, errors='coerce')
tumor_vol_pbs_df = tumor_vol_pbs_df.apply(pd.to_numeric, errors='coerce')
vvDD_mean_stde_df = vvDD_mean_stde_df.apply(pd.to_numeric, errors='coerce')
pbs_mean_stde_df = pbs_mean_stde_df.apply(pd.to_numeric, errors='coerce')


In [None]:
tumor_vol_vvDD_df

In [None]:
# plot the tumor volume data for vvDD and PBS condition
plt.figure(figsize=(10, 6))

# Plot vvDD data
for i in range(10):
    plt.plot(tumor_vol_vvDD_df.index, tumor_vol_vvDD_df.iloc[:, i], 
             marker='o', linestyle='-', color='blue', alpha=0.3, label='vvDD' if i == 0 else "")

# Plot PBS data
for i in range(10):
    plt.plot(tumor_vol_pbs_df.index, tumor_vol_pbs_df.iloc[:, i], 
             marker='o', linestyle='-', color='red', alpha=0.3, label='PBS' if i == 0 else "")

plt.xlabel('Time (days)')
plt.ylabel('Tumor Volume ($\mu m^3$)')
plt.title('Tumor Volume vs Time for vvDD and PBS Conditions')
plt.legend()
plt.grid(False)
plt.xticks([3, 4, 5, 6, 7])
plt.show()

In [None]:
# plot the mean and stde data for vvDD and PBS condition

# Calculate mean and standard error for vvDD
vvDD_mean = tumor_vol_vvDD_df.mean(axis=1)
vvDD_se = tumor_vol_vvDD_df.sem(axis=1)
# Calculate mean and standard error for PBS
pbs_mean = tumor_vol_pbs_df.mean(axis=1)
pbs_se = tumor_vol_pbs_df.sem(axis=1)

# Plot both provided and calculated data
plt.figure(figsize=(12, 6))
# Plot provided data
plt.errorbar(vvDD_mean_stde_df.index, vvDD_mean_stde_df['mean'], 
             yerr=[vvDD_mean_stde_df['mean'] - vvDD_mean_stde_df['stde_minus'], 
                   vvDD_mean_stde_df['stde_plus'] - vvDD_mean_stde_df['mean']],
             fmt='o-', color='blue', ecolor='lightblue', capsize=5, label='vvDD (provided)')
plt.errorbar(pbs_mean_stde_df.index, pbs_mean_stde_df['mean'], 
             yerr=[pbs_mean_stde_df['mean'] - pbs_mean_stde_df['stde_minus'], 
                   pbs_mean_stde_df['stde_plus'] - pbs_mean_stde_df['mean']],
             fmt='o-', color='red', ecolor='lightcoral', capsize=5, label='PBS (provided)')
# Plot calculated data
plt.errorbar(vvDD_mean.index, vvDD_mean, yerr=vvDD_se,
             fmt='s--', color='darkblue', ecolor='blue', capsize=5, label='vvDD (calculated)')
plt.errorbar(pbs_mean.index, pbs_mean, yerr=pbs_se,
             fmt='s--', color='darkred', ecolor='red', capsize=5, label='PBS (calculated)')
plt.xlabel('Time (days)')
plt.ylabel(r'Tumor Volume ($\mu m^3$)')
plt.title('Mean Tumor Volume with Standard Error vs Time')
plt.legend()
plt.grid(False)
plt.xticks([3, 4, 5, 6, 7])
plt.show()



In [30]:
# rescale the tumor volume to tumor cell number
scaled = True

if scaled:
    conversion_fac = 7.5 ** 3 # 7.5 um is the average diameter of an OV cell
    tumor_vol_vvDD_df = tumor_vol_vvDD_df / conversion_fac
    tumor_vol_pbs_df = tumor_vol_pbs_df / conversion_fac

In [31]:
# make the measurement.tsv file
def create_rows(df, condition):
    rows = []
    for time in df.index:
        for col in df.columns:
            rows.append({
                'observableId': 'tumor_num' if scaled else 'tumor_vol',
                'simulationConditionId': condition,
                'measurement': df.loc[time, col],
                'time': time,
                # 'observableParameters': 'scaling_1',
                # 'noiseParameters': 'sd_vol'
            })
    return rows

# Create rows for vvDD and PBS conditions
vvDD_rows = create_rows(tumor_vol_vvDD_df, 'vvDD')
pbs_rows = create_rows(tumor_vol_pbs_df, 'ctrl')

# Combine all rows
all_rows = vvDD_rows + pbs_rows

# Create the final dataframe
measurements_df = pd.DataFrame(all_rows)

# Reorder columns to match the specified order
column_order = ['observableId', 'simulationConditionId', 'measurement', 'time',
                #  'observableParameters',
                #    'noiseParameters'
                ]
measurements_df = measurements_df[column_order]

# Save to TSV file
if scaled:
    measurements_df.to_csv('petab_files/measurements_noscaling_newnoise_scaled.tsv', sep='\t', index=False)
else:
    measurements_df.to_csv('petab_files/measurements_noscaling_newnoise.tsv', sep='\t', index=False)

In [None]:
# Calculate the coefficient of variation for vvDD condition
vvDD_cv = tumor_vol_vvDD_df.std(axis=1) / tumor_vol_vvDD_df.mean(axis=1)

# Calculate the coefficient of variation for PBS condition
pbs_cv = tumor_vol_pbs_df.std(axis=1) / tumor_vol_pbs_df.mean(axis=1)

# Display the results
print("Coefficient of Variation for vvDD condition:")
print(vvDD_cv)

print("\nCoefficient of Variation for PBS condition:")
print(pbs_cv)

In [None]:
tumor_vol_vvDD_df.std(axis=1)

In [None]:
tumor_vol_vvDD_df.mean(axis=1)

In [35]:
# 1. Create the model file using antimony
# this is being done in model_creation.py

# 2. Create the parameter table
parameter_df = pd.DataFrame({
    'parameterId': ['rho', 'kappa', 'psi', 'beta', 'alpha', 'delta', 'sigma_add', 'sigma_mul'],
    'parameterName': ['rho', 'kappa', 'psi', 'beta', 'alpha', 'delta', 'additive_noise', 'multiplicative_noise'],
    'parameterScale': ['log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10'],
    'lowerBound': [1e-2, 1e2, 1e-13, 1e0, 1e-8, 1e-2, 1e1, 1e-4],
    'upperBound': [1e1, 1e5, 1e-9, 1e4, 1e8, 1e2, 1e5, 1e2],
    'nominalValue': [1] * 8,
    'estimate': [1] * 8,
    'parameterType': ['', '', '', '', '', '', '', '']
})
# export the parameter table to tsv file
parameter_df.to_csv('petab_files/parameters_no_scaling_new_noise_formula.tsv', sep='\t', index=False)

# 3. Create the observable table
observable_df = pd.DataFrame({
    'observableId': ['tumor_vol' if not scaled else 'tumor_num'],
    'observableName': ['tumor_volume' if not scaled else 'tumor_number'],
    'observableFormula': ['C_u + C_i'],
    'noiseFormula': ['sqrt(sigma_add^2 + (sigma_mul * (C_u + C_i))^2)'],
    'noiseDistribution': ['normal'],
    'observableTransformation': ['lin']
})
# export the observable table to tsv file
observable_df.to_csv('petab_files/observables_no_scaling_new_noise_formula_scaled.tsv', sep='\t', index=False)

# 4. Create the measurement table
# this is being done above

# 5. Create the condition table
condition_df = pd.DataFrame({
    'conditionId': ['vvDD', 'ctrl'],
    'conditionName': ['vvDD', 'ctrl'],
    'virus_injection': [3e9, 0]
})
# export the condition table to tsv file
condition_df.to_csv('petab_files/conditions.tsv', sep='\t', index=False)


In [None]:
# TODO: adjust the rho limit and refit the model
# TODO: delete the scaling parameter
    # TODO: adjust the corresponding parameter bounds
# TODO: adjust the noise formula
    # TODO: adjust the additive noise bounds
# TODO: refit the models
# TODO: scale the data to tumor cell number
# TODO: check the issue with psi and noise parameters

# TODO: do for all models

# DONE: correct the std plotting
# DONE: parameter plot error in code

# For publication fig 1

In [43]:
folder_path = '/Users/yuhongliu/Documents/OV/figures/ifac_first_model/'

# Plot setting
plt.rcParams['font.size'] = 30

dpi = 100
wid = int(2560/dpi)
hei = int(1600/dpi)

In [None]:
# Create a figure with two subplots
fig, axs = plt.subplots(2, 1, figsize=(10, 16))

# Panel a
# Plot vvDD data
for i in range(10):
    axs[0].plot(tumor_vol_vvDD_df.index+3, tumor_vol_vvDD_df.iloc[:, i], 
             marker='o', linestyle='--', dashes=(5, 3), color='blue', alpha=1, label='vvDD' if i == 0 else "")

# Plot PBS data
for i in range(10):
    axs[0].plot(tumor_vol_pbs_df.index+3, tumor_vol_pbs_df.iloc[:, i], 
             marker='o', linestyle='--', dashes=(5, 3), color='red', alpha=1, label='ctrl' if i == 0 else "")

axs[0].set_xlabel('Time (days)')
axs[0].set_ylabel(r'Tumor Volume ($\mu m^3$)')
axs[0].set_title('Individual Data', pad=20, loc='center')
axs[0].grid(False)
axs[0].set_xticks([3, 4, 5, 6, 7])
axs[0].text(-0.1, 1.22, 'b', transform=axs[0].transAxes, fontsize=40, fontweight='bold', va='top', ha='right')

# Remove top and right lines
axs[0].spines['top'].set_visible(False)
axs[0].spines['right'].set_visible(False)

# Make spines thicker
axs[0].spines['left'].set_linewidth(2)
axs[0].spines['bottom'].set_linewidth(2)

# Make ticks thicker
axs[0].tick_params(width=2)

# Panel b
# Calculate mean and standard error for vvDD
vvDD_mean = tumor_vol_vvDD_df.mean(axis=1)
vvDD_std = tumor_vol_vvDD_df.std(axis=1)
# Calculate mean and standard error for PBS
pbs_mean = tumor_vol_pbs_df.mean(axis=1)
pbs_std = tumor_vol_pbs_df.std(axis=1)

# Plot provided data
axs[1].errorbar(vvDD_mean.index+3, vvDD_mean, yerr=vvDD_std,
             fmt='s--', dashes=(5, 3), color='blue', ecolor='blue', capsize=5, label='vvDD (calculated)')
axs[1].errorbar(pbs_mean.index+3, pbs_mean, yerr=pbs_std,
             fmt='s--', dashes=(5, 3), color='red', ecolor='red', capsize=5, label='PBS (calculated)')


axs[1].set_xlabel('Time (days)')
axs[1].set_ylabel(r'Tumor Volume ($\mu m^3$)') 
axs[1].set_title('Population Data', pad=20, loc='center')
axs[1].grid(False)
axs[1].set_xticks([3, 4, 5, 6, 7])
axs[1].text(-0.1, 1.22, 'c', transform=axs[1].transAxes, fontsize=40, fontweight='bold', va='top', ha='right')

# Remove top and right lines
axs[1].spines['top'].set_visible(False)
axs[1].spines['right'].set_visible(False)

# Make spines thicker
axs[1].spines['left'].set_linewidth(2)
axs[1].spines['bottom'].set_linewidth(2)

# Make ticks thicker
axs[1].tick_params(width=2)

# Adjust layout and save the figure
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.05))
plt.tight_layout()
plt.subplots_adjust(wspace=0.6)  # Add more space between subplots
plt.savefig(folder_path + 'original_data.pdf', dpi=300, bbox_inches='tight')
plt.show()