# Modelling Zi
## Objectives

### Model Alternatives


### Imports

In [1]:
%matplotlib inline
import site
site.addsitedir('/home/b3053674/Documents/pycotools')
from pycotools.Examples.zi_model_varients_competitive_inhibition import Models
from pycotools import model, tasks, viz, misc
import os
from lxml import etree

root:INFO:27:    Initializing pycotools
root:INFO:28:    Initializing logging System
root:INFO:29:    logging config file at: /home/b3053674/Documents/pycotools/pycotools/logging_config.conf


  from pandas.parser import CParserError


## Get Modified Zi Models
9 Model varients have been prebuilt using the copasi GUI. The models were opened with a text editor and the xml strings were copied and pasted into a python class called `Models` in the `zi_model_varients` module. The `Model` class has a single property for each model which just returns the model string. This system of handling many copasi models has the advantage of being highly organized and minimizes mistakes by human error. 

### A function to get models as strings

In [2]:
import zi_model_varients 


def get_models(directory):
    """
    Get models from Models class and save as cps
    files in a directory of users choosing. 
    
    :param directory: where to save the model varients
    :return: dict[model_id] = FullPathToModel
    """
    ## if directory not exists create it
    if not os.path.isdir(directory):
        os.makedirs(directory)
    
    ## get all methods of the Models class
    all_methods = dir(Models)
    
    ## remove magic methods
    all_model_methods = [i for i in all_methods if i[:2]!='__']
    all_model_methods = [i for i in all_methods if i != 'published_zi']
    
    M = zi_model_varients.Models()
    dct = {}
    for model_id in all_model_methods:
        m = getattr(zi_model_varients.Models, model_id)
        if type(m) == property:
            model_str = m.fget(M)
            cps_file = os.path.join(directory, '{}.cps'.format(model_id))
            dct[model_id] = cps_file

            ## if already exists remove
            if os.path.isfile(cps_file):
                os.remove(cps_file)

            ## write file
            with open(cps_file, 'w') as f:
                f.write(model_str)

            ## raise error if not exists
            if not os.path.isfile(cps_file):
                raise Exception

    return dct

   
directory = r'/home/b3053674/Documents/Models/2017/10_Oct/Smad7Fit2'
    
fit_dir = os.path.join(directory, 'Fit1Dir')

model_paths = get_models(fit_dir)

## Read models into pycotools

In [3]:
def read_models_into_pycotools(files_dct):
    dct = {}
    for v in files_dct.values():
        dct[v] = model.Model(v)
    return dct

models = read_models_into_pycotools(model_paths)
for k, v in models.items():
    print (v.open())

None
None
None


# Experimental Data
## Parse into pandas

In [None]:
import pandas

def read_data_file(fle):
    """
    read data into pandas dataframe for each 
    data set
    """
    data = pandas.read_csv(fle)
    data = data.set_index(['Cell Type', 'Repeat'])
    time = [int(i)*60 for i in data.columns]
    data.columns = time
    return data
    
smad7_mRNA_data_file = os.path.join(directory, 'smad7_pcr_data.csv')
ski_data_file = os.path.join(directory, 'ski_pcr_data.csv')

smad7_mRNA_data = read_data_file(smad7_mRNA_data_file)
ski_data = read_data_file(ski_data_file)


## Derive Smad7 data from Smad7 mRNA data
A (rather bold) assumption being made here is that Smad7 translation is delayed by 30 minutes compared to transcription and is 100 times the magnitude. 

Here we computer this data from the Smad7 mRNA data

In [None]:
import copy
def make_smad_protein_data(df):
    ## deep copy for reproducability
    data = copy.deepcopy(df)
    time = [i+30 for i in data.columns]
    data.columns = time
    
    ## add 0 time point = 75% of time 30
    data[0] = 0.75 * data[30]
    data = data[sorted(data.columns)]
    return data*100

smad7_protein_data = make_smad_protein_data(smad7_mRNA_data)


## Visualize the data

In [None]:
import matplotlib.pyplot as plt
import seaborn
seaborn.set_context(context='poster')

def plot_experimental(data, title):
    for label, df in data.groupby(level=0):
        
        
        if label == 'Neonatal':
            plt.figure()
#             print(df.head())
            df.columns = [i/60 for i in df.columns]
            ax = df.transpose().plot(legend=False)
    #         ax.legend(loc=(1, 0.1))
            plt.title('{}'.format(title))
            plt.ylabel('Signal (AU)')
            plt.xlabel('Time (hours)')
            f = os.path.join('/home/b3053674/Documents/Models/2017/10_Oct', "{}_{}.pdf".format(title, label))
            from matplotlib import rcParams
            rcParams.update({'figure.autolayout': True})
            plt.savefig("{}".format(f), bbox_inches='tight', dpi=400)
            print ('saved to {}'.format(f))
        
        
plot_experimental(smad7_mRNA_data, 'Smad7 mRNA')
plot_experimental(ski_data, 'ski mRNA')
plot_experimental(smad7_protein_data, 'Smad7')


## Create Copasi Style data files
-  Format data files for use in copasi.
-  Organize into folders
-  Make sure data has labels that correspond exactly to model quantities 
    -  (i.e. here its the global quantities: Smad7Obs, SkiObs and Smad7mRNAObs)
-  The interesting dynamic in this data seems to be over by 12h. Truncate the data at 12h
-  Ensure data is in the same time units as the model (minutes)

In [None]:
def format_copasi(data, data_directory, data_name, truncate_number):
    file_dct = {}
    ## iterate over cell types
    for label, df in data.groupby(level=0):
        ## nested dict for resutls collection
        
        file_dct[label] = {}
        ## reset index, transpose and rename index
        df = df.reset_index(drop='True')
        df = df.transpose()
        df.index.name = 'Time'
        
        ##iterate over each column
        for i in df.columns:
            ##create subdirectory for each cell type
            dir2 = os.path.join(data_directory, label)
            if not os.path.isdir(dir2):
                os.makedirs(dir2)
                
            ## get experimental repeat
            smad7 = pandas.DataFrame(df[i])
            smad7 = smad7.astype(float)
            smad7 = smad7.reset_index()
            
            ## relabel to match model variable
            smad7.columns=['Time', data_name]
            
            ## ensure consistent time units
            smad7['Time'] = (smad7['Time'].astype(float)*60)
            smad7 = smad7.iloc[:truncate_number]
            
            ## write to file
            fle = os.path.join(dir2, '{}_{}_data.csv'.format(i, data_name))
            file_dct[label][i] = fle
            smad7.to_csv(fle, index=False, sep='\t')
    return file_dct


# ## Directories for data
smad7_protein_data_directory = os.path.join(directory, 'Smad7ProteinDataDirectory')
ski_data_directory = os.path.join(directory, 'SkiDataDirectory')
smad7_mRNA_directory = os.path.join(directory, 'Smad7mRNADataDirectory')

## format and write the data files
smad7_protein_data_files = format_copasi(smad7_protein_data, smad7_protein_data_directory, 'Smad7Obs', truncate_number=6)
ski_data_files = format_copasi(ski_data, ski_data_directory, 'SkiObs', truncate_number=6)
smad7_mRNA_data_files = format_copasi(smad7_mRNA_data, smad7_mRNA_directory, 'Smad7mRNAObs', truncate_number=6)


## Move data into fit folder


In [None]:
import shutil

def move_data_into_fit_folder(folder):
    for i in smad7_protein_data_files['Neonatal'].values() + ski_data_files['Neonatal'].values() + smad7_mRNA_data_files['Neonatal'].values():
        shutil.copy(i, fit_dir)
    
move_data_into_fit_folder(fit_dir)

## Set Initial Conditions of New Components
Compute averages of the 6 repeats and set as initial concentrations 
### Compute averages of repeat experiments

In [None]:
import numpy
smad7_mRNA_starting_values = pandas.DataFrame(smad7_mRNA_data[0].groupby(level=0).agg(numpy.mean))
ski_starting_values = pandas.DataFrame(ski_data[0].groupby(level=0).agg(numpy.mean))
smad7_protein_starting_values = pandas.DataFrame(smad7_protein_data[0].groupby(level=0).agg(numpy.mean))

## Set initial conditions of new species 

In [None]:
def set_initial_values(all_models, cell_type):
    new_models = []
    for mod in all_models.values():
        mod = mod.set('global_quantity', 'Smad7mRNAInitial', 
                      float(smad7_mRNA_starting_values.loc[cell_type]), match_field='name', change_field='initial_value')
        
        mod = mod.set('global_quantity', 'SkiInitial', 
                      float(ski_starting_values.loc[cell_type]), match_field='name', change_field='initial_value') 
                      
        mod = mod.set('global_quantity', 'Smad7ProteinInitial', 
                      float(smad7_protein_starting_values.loc[cell_type]), match_field='name', change_field='initial_value')
        
        new_models.append(mod)
    return new_models

models = set_initial_values(models, 'Neonatal')
models

## Save the model to file
Thus far our changes have been in held in memory. To write to file use the `save` method. 

In [None]:
[i.save() for i in models]

# Run Parameter Estimation

In [None]:
PE = tasks.MultiModelFit(
    fit_dir,
    # smad7_mRNA_data_files['Neonatal'].values() + smad7_protein_data_files['Neonatal'].values() + ski_data_files['Neonatal'].values(),
    metabolites=['Ski'], global_quantities=['Smad7SF', 'SkiSF'],
    overwrite_config_file=True,
    method='genetic_algorithm', population_size=150, number_of_generations=300,
    upper_bound=1e4, run_mode='multiprocess', copy_number=2, pe_number=1,
)
PE.write_config_file()
PE.setup()
# PE.run()

# Visualize Results
## Compare Model Selection Criteria

In [None]:
seaborn.set_context(context='notebook')

In [None]:
MS = viz.ModelSelection(PE, log10=True)

## Visualize Rss Vs Iterations

In [None]:
for m in MS:
    viz.RssVsIterations(m, savefig=True)

## Ensemble Time Courses

In [None]:
for m in MS:
    viz.PlotTimeCourseEnsemble(m)

## Plot parameter distributions as box plots

In [None]:
for m in MS:
    viz.Boxplots(m, savefig=True, log10=True)