In [54]:
import pandas as pd
import scipy as sc
import matplotlib.pylab as pl
import seaborn as sns
import numpy as np
from scipy import stats
import math

In [55]:
data = pd.read_csv("../data/modified_data.csv") #read csv data

In [56]:
print(data.columns.values)

['Unnamed: 0' 'ID' 'Time' 'PopBio' 'Species' 'Citation' 'Log_PopBio']


In [57]:
data.drop('Unnamed: 0',axis=1,inplace=True) #drop useless columns
data

Unnamed: 0,ID,Time,PopBio,Species,Citation,Log_PopBio
0,1,669.879518,0.283276,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.261335
1,1,646.987952,0.283342,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.261100
2,1,622.891566,0.285151,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.254736
3,1,597.590361,0.281746,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.266749
4,1,574.698795,0.273117,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.297856
...,...,...,...,...,...,...
4230,295,0.057355,2.447187,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.894939
4231,295,2.492604,2.327517,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.844802
4232,295,1.743012,2.485061,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.910297
4233,295,0.994529,2.182619,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.780526


In [58]:
def cal_start_value(curve_subset): #define a function to calculate the starting values of each parameter
    curve = curve_subset.sort_values('Time') # sort the curve data by "time"
    end = curve.shape[0] #calculate the number of data in each subset
    curve = curve.reset_index(drop=True) #reset the index 
    rmax = 0 #give Rmax an initial value
    e = np.exp(1)
    for i in range(end-4):
        Nmax = curve['PopBio'].max()#Nmax is the carrying capacity, here I make Nmax equal to the maximum popbio
        nmax = curve['Log_PopBio'].max() #nmax is the maximum log_PopBio
        tmax = curve[curve['PopBio']==curve['PopBio'].max()].iat[0,1] #Tmax is the time at which Nmax is reached. 
        N0 = curve.at[0,"PopBio"]#N0 is the initial population size
        n0 = curve.at[0,"Log_PopBio"] #n0 is the loged initial population size
        a = np.log(Nmax/N0) #A is the asymptote 
        data_subset = curve.loc[i:i+3] #choose four points as a group from initial point in the time order
        x = data_subset['Time']
        y = data_subset['Log_PopBio']
        line = stats.linregress(x,y) #draw lines among every four points to find the maximum slope which can represent Rmax( the maximum growth rate)
        if line[0] > rmax :
            rmax = line[0] #use the max slope to take place of Rmax
            tlag = -line[1]/line[0] #tlag is the x-intercept
            h0 = 1/(e**(tlag*rmax)-1)
            u = (nmax-n0)/(tmax-tlag)
            curve_subset['Rmax'] = rmax #add new columns to the dataframe, which are will be needed in model fitting
            curve_subset['Tlag'] = tlag
            curve_subset['Nmax'] = nmax
            curve_subset['Tmax'] = tmax
            curve_subset['N0'] = n0
            curve_subset['A'] = a
            curve_subset['H0'] = h0
            curve_subset["u"] = u
    return(curve_subset)


In [59]:
data = data.groupby("ID").apply(cal_start_value) #divide the whole data into subsets according to ID, and then apply the function



In [60]:
data #check the data

Unnamed: 0,ID,Time,PopBio,Species,Citation,Log_PopBio,Rmax,Tlag,Nmax,Tmax,N0,A,H0,u
0,1,669.879518,0.283276,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.261335,0.022817,225.961154,-1.254736,622.891566,-4.752592,3.497856,0.005800,0.008812
1,1,646.987952,0.283342,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.261100,0.022817,225.961154,-1.254736,622.891566,-4.752592,3.497856,0.005800,0.008812
2,1,622.891566,0.285151,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.254736,0.022817,225.961154,-1.254736,622.891566,-4.752592,3.497856,0.005800,0.008812
3,1,597.590361,0.281746,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.266749,0.022817,225.961154,-1.254736,622.891566,-4.752592,3.497856,0.005800,0.008812
4,1,574.698795,0.273117,Chryseobacterium.balustinum,"Bae, Y.M., Zheng, L., Hyun, J.E., Jung, K.S., ...",-1.297856,0.022817,225.961154,-1.254736,622.891566,-4.752592,3.497856,0.005800,0.008812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4230,295,0.057355,2.447187,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.894939,1.909012,5.903457,10.209196,47.954541,0.894939,9.314257,0.000013,0.221499
4231,295,2.492604,2.327517,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.844802,1.909012,5.903457,10.209196,47.954541,0.894939,9.314257,0.000013,0.221499
4232,295,1.743012,2.485061,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.910297,1.909012,5.903457,10.209196,47.954541,0.894939,9.314257,0.000013,0.221499
4233,295,0.994529,2.182619,Lactobaciulus plantarum,"Zwietering, M.H., De Wit, J.C., Cuppers, H.G.A...",0.780526,1.909012,5.903457,10.209196,47.954541,0.894939,9.314257,0.000013,0.221499


In [61]:
data.to_csv('../data/start_val_data.csv')