In [1]:
# Script to convert pickled simulation data to information
# plotted in the parameter scan - DS8 two-state model

# Load necessary packages
import pandas as pd
import numpy as np

In [2]:
def analyze_model_DS8(pkl_file, population): #, data, num_params):
    ## Read in dataset
    df = pd.read_pickle(pkl_file)
    df['cell line'] = population

    ## Create pandas objects from simulated data parameters
    df_dip = pd.DataFrame(df['DIP rate'].values.tolist(), columns=['DIP1','DIP2'])
    df_div = pd.DataFrame(df['division rate'].values.tolist(), columns=['div1','div2'])
    df_dth = pd.DataFrame(df['death rate'].values.tolist(), columns=['dth1','dth2'])
#     df_prop = pd.DataFrame(df['proportions'].values.tolist(), columns=['prop1','prop2'])
    df_cellline = df['cell line']

    sim = np.array(df['sim DIPs'])

    KSval = []
    ADval = []
#     sumLL = []
#     aic_n = []
#     aic_c = []

    for index, row in df.iterrows():
#         KSval.append(np.mean(np.array(df['KS p-value'][index])) + 2*np.std(np.array(df['KS p-value'][index])))
#         ADval.append(np.mean(np.array(df['AD p-value'][index])) + 2*np.std(np.array(df['AD p-value'][index])))
        KSval.append(np.mean(np.array(df['KS p-value'][index])) - 2*np.std(np.array(df['KS p-value'][index])))
        ADval.append(np.mean(np.array(df['AD p-value'][index])) - 2*np.std(np.array(df['AD p-value'][index])))
#         sll, aicn, aicc = model_comparison(dat = data, sim = sim, i = index, param_num = num_params)
#         sumLL.append(sll)
#         aic_n.append(aicn)
#         aic_c.append(aicc)

    df_c = pd.concat([df_dip.reset_index(drop=True),
                      df_div.reset_index(drop=True),
                      df_dth.reset_index(drop=True),
                      df_cellline.reset_index(drop=True)],
                     axis = 1)

    df_c['KS val'] = KSval
    df_c['AD val'] = ADval
#     df_c['LLC'] = sumLL
#     df_c['AIC'] = aic_n
#     df_c['AICc'] = aic_c
    
    return(df_c)

In [None]:
df_DS8_00_1 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs00_1_allDIP.pkl', population='PC9.DS8')
df_DS8_00_2 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs00_2_allDIP.pkl', population='PC9.DS8')
df_DS8_1 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs1_allDIP.pkl', population='PC9.DS8')
df_DS8_2 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs2_allDIP.pkl', population='PC9.DS8')
df_DS8_3 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs3_allDIP.pkl', population='PC9.DS8')
df_DS8_4 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs4_allDIP.pkl', population='PC9.DS8')
df_DS8_5 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs5_allDIP.pkl', population='PC9.DS8')
df_DS8_6 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs6_allDIP.pkl', population='PC9.DS8')
df_DS8_7 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs7_allDIP.pkl', population='PC9.DS8')
df_DS8_8 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs8_allDIP.pkl', population='PC9.DS8')
df_DS8_9 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs9_allDIP.pkl', population='PC9.DS8')
df_DS8_10 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs10_allDIP.pkl', population='PC9.DS8')
df_DS8_11 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs11_allDIP.pkl', population='PC9.DS8')
df_DS8_12 = analyze_model_DS8(pkl_file='PC9-DS8_param-scan_twoState_Divs12_allDIP.pkl', population='PC9.DS8')

In [52]:
## Compile into a single common data frame
DS8_all = pd.concat([df_DS8_00_1, df_DS8_00_2, df_DS8_1, df_DS8_2,
                    df_DS8_3, df_DS8_4, df_DS8_5, df_DS8_6,
                    df_DS8_7, df_DS8_8, df_DS8_9, df_DS8_10,
                    df_DS8_11, df_DS8_12], ignore_index = True)

In [53]:
df_DS8_00_2.head()

Unnamed: 0,DIP1,DIP2,div1,div2,dth1,dth2,cell line,KS val,AD val
0,0.0005,0.0055,0.016,0.016,0.0155,0.0105,PC9.DS8,0.026799,0.014046
1,0.0005,0.0057,0.016,0.016,0.0155,0.0103,PC9.DS8,0.111203,0.041085
2,0.0005,0.0059,0.016,0.016,0.0155,0.0101,PC9.DS8,0.050645,0.023568
3,0.0005,0.0061,0.016,0.016,0.0155,0.0099,PC9.DS8,0.012772,0.006181
4,0.0005,0.0063,0.016,0.016,0.0155,0.0097,PC9.DS8,0.005985,0.004279


In [54]:
## Annotate with modified identifier (ease of plotting)
DS8_all['cell line'] = np.where(DS8_all['AD val']>0.05, 'PC9-DS8', 'not.assigned')
DS8_all['param pair'] = range(DS8_all.shape[0])

In [55]:
## Create subsets of data frame (see below)
DS8_sig1 = DS8_all[['DIP1', 'div1', 'dth1', 'cell line', 'param pair', 'KS val', 'AD val']]
DS8_sig2 = DS8_all[['DIP2', 'div2', 'dth2', 'cell line', 'param pair', 'KS val', 'AD val']]

In [56]:
DS8_sig1.head()

Unnamed: 0,DIP1,div1,dth1,cell line,param pair,KS val,AD val
0,0.0005,0.011,0.0105,not.assigned,0,0.002805,0.001039
1,0.0005,0.011,0.0105,not.assigned,1,0.021955,0.005497
2,0.0005,0.011,0.0105,not.assigned,2,0.012806,0.00405
3,0.0005,0.011,0.0105,not.assigned,3,0.005202,0.003276
4,0.0005,0.011,0.0105,PC9-DS8,4,0.181521,0.08119


In [57]:
## Rename columns of data frame subsets
DS8_sig1.rename(columns={'DIP1': 'DIP Rate',
                         'div1': 'Division Rate',
                         'dth1': 'Death Rate'},
                 inplace=True)
DS8_sig2.rename(columns={'DIP2': 'DIP Rate',
                         'div2': 'Division Rate',
                         'dth2': 'Death Rate'},
                inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(**kwargs)


In [58]:
## Separate subsets based on which DS8 state it resides in
DS8_sig1['Cell Line'] = np.where(DS8_sig1['cell line'] == "PC9-DS8", 'PC9-DS8.1', 'not.assigned')
DS8_sig2['Cell Line'] = np.where(DS8_sig2['cell line'] == "PC9-DS8", 'PC9-DS8.2', 'not.assigned')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [59]:
## Concatenate datasets back together
DS8_sig_all = pd.concat([DS8_sig1, DS8_sig2])

## Save concatenated datasets into common CSV (for plotting)
DS8_sig_all.to_csv('DS8_twoState_tile_expandedRange_lowVal_forPaper.csv')

In [48]:
DS8_sig_all.sort_values(by='AD val', ascending=False).head(20)

Unnamed: 0,DIP Rate,Division Rate,Death Rate,cell line,param pair,KS val,AD val,Cell Line
9402,0.0071,0.038,0.0309,PC9-DS8,9402,0.625541,0.34631,PC9-DS8.2
9402,0.0009,0.039,0.0381,PC9-DS8,9402,0.625541,0.34631,PC9-DS8.1
7685,0.0069,0.032,0.0251,PC9-DS8,7685,0.733946,0.344746,PC9-DS8.2
7685,0.0009,0.034,0.0331,PC9-DS8,7685,0.733946,0.344746,PC9-DS8.1
3858,0.0009,0.022,0.0211,PC9-DS8,3858,0.575798,0.334753,PC9-DS8.1
3858,0.0071,0.024,0.0169,PC9-DS8,3858,0.575798,0.334753,PC9-DS8.2
8079,0.0009,0.035,0.0341,PC9-DS8,8079,0.854173,0.333692,PC9-DS8.1
8079,0.0065,0.033,0.0265,PC9-DS8,8079,0.854173,0.333692,PC9-DS8.2
8419,0.0063,0.038,0.0317,PC9-DS8,8419,0.588825,0.328968,PC9-DS8.2
8419,0.0011,0.036,0.0349,PC9-DS8,8419,0.588825,0.328968,PC9-DS8.1


In [5]:
a = np.load('/Users/Corey/Documents/GMM_res.npy')
b = np.load('/Users/Corey/Documents/DS9_res.npy')
import scipy.stats as sp
st,cv,sv = sp.anderson_ksamp([a,b])
D,p = sp.ks_2samp(a,b)
print(sv)
print(p)

