In [None]:
import pickle
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import pandas as pd
import shutil
import time

#import pickle5 as pickle

import warnings
warnings.simplefilter("ignore")

### Read pickles

In [None]:
#file_path = '/Users/timrudge/cellmodeller/data/param_scan_psi_hd_10x/'
file_path = '../../../cellmodeller/data/killifish/Sims 11-05_long'

# Location of the data we want to analyse, assume all folders are results
folders = os.listdir(file_path)
folders.sort()

df = pd.DataFrame(columns=['Wc', 'psi', 'D', 'density', 'sim', 'time', 'max_size', 'mean_size', 'number'])
df_dist = pd.DataFrame(columns=['Wc', 'psi', 'D', 'density', 'time', 'size'])

## Add sim index
prev_pars = {}
i = 0
first = True
################

steps = 10000
for folder in folders:
    fname = os.path.join(file_path, folder, 'step-%05d.pickle')
    
    ## NEW SIMS VERSION:
    # Extract parameters from folder name
    parts = fname.split('__')
    Wc = float(parts[1].replace('_', '.'))
    psi = float(parts[3].replace('_', '.'))
    D = float(parts[5].replace('_', '.'))
    N = int(parts[8].split('_')[0])
    sphere_rad = float(parts[10].replace('_', '.'))
    density = N / (4 * sphere_rad**2)
    forg = parts[12].split('-')[0] # later check if necessary to add to the df
    
    
    ### Add sim index to df
    prev_pars_aux = {'Wc':Wc,'psi':psi,'D':D,'density':density}
    if first:
        prev_pars = prev_pars_aux
        first = False
    if prev_pars != prev_pars_aux:
        #print("not eq")
        i=0
        prev_pars = prev_pars_aux
    elif prev_pars == prev_pars_aux:
        #print("eq")
        prev_pars = prev_pars_aux
        i+=1
    #print(i)
    
    
    # Look at the first 1000 time steps of this simulation
    for t in range(0,steps,10):
        # Get the cell states
        data = pickle.load(open(fname%t, 'rb'))
        cs = data['cellStates']
        # Construct a graph with cells as nodes and cell-cell contacts as edges
        G = nx.Graph()
        for id,cell in cs.items():
            for n in cell.neighbours:
                G.add_edge(id, n)
        # Get the sizes of the connected components = clusters
        sizes = [len(c) for c in nx.connected_components(G)]
        if len(sizes)>0:
            # Dominant cluster size
            max_size = np.max(sizes)
            mean_size = np.mean(sizes)
            # Count the number of clusters, including single cells
            number = nx.number_connected_components(G) + N - np.sum(sizes)
            row = {
                'Wc': Wc, 
                'psi':psi,
                'D': D,
                'density':density,
                'sim': i,
                'time':t,                
                'max_size':max_size, 
                'mean_size':mean_size, 
                'number':number,
            }
            df = df.append(row, ignore_index=True)
            
            ## Distribution dataframe
            #rows = pd.DataFrame()
            #rows['size'] = sizes
            #rows['Wc'], rows['psi'], rows['D'], rows['time'], rows['density'] = Wc, psi, D, t, density
            #df_dist = df_dist.append(rows)

In [None]:
df.to_json('df_sims_11-05_long.JSON')
#df_dist.reset_index(inplace=True)
#df_dist.to_json('df_dist_sims_10-21_2.JSON')

In [None]:
df = pd.read_json('df_sims_11-05_long.JSON')

## Parameters that form one cluster

In [None]:
df_param_comb = df[(df.density==0.2)&(df.number==1)].groupby(['Wc', 'psi', 'D', 'density']).size().reset_index().rename(columns={0:'Count'})
df_param_comb

In [None]:
for id, row in df_param_comb.iterrows():
    df_plot = df[(df.Wc==row['Wc'])&
             (df.psi==row['psi'])&
             (df.D==row['D'])&
             (df.density==row['density'])]
    
    df_gg = df_plot.groupby('sim')
    leg = f"Wc={row['Wc']}, psi={row['psi']}, D={row['D']}, density={row['density']}"
    for sim, df_sim in df_gg:
        plt.plot(df_sim['time'], df_sim['number'], '.')
    plt.title(leg)
    plt.show()

In [None]:
df_param_comb = df[(df.density==0.2)&(df.number==1)].groupby(['Wc', 'psi', 'D', 'density', 'sim', 'number']).size().reset_index().rename(columns={0:'Count'})
df_param_comb

## Slopes computing

In [None]:
df = pd.read_json('df_sims_10-21_index.JSON')

In [None]:
ds = df.density.unique()
df_final = pd.DataFrame()

for d in ds:
    df_param_comb = df[df.density==d].groupby(['Wc', 'psi', 'D', 'density']).size().reset_index().rename(columns={0:'Count'})
    for id, row in df_param_comb.iterrows():
        df_selected = df[(df.Wc==row['Wc'])&
                     (df.psi==row['psi'])&
                     (df.D==row['D'])&
                     (df.density==row['density'])]

        df_gg = df_selected.groupby('sim')
        leg = []
        slopes_total = []
        interceptions_total = []
        
        for sim, df_sim in df_gg:
            x_0 = df_sim['time']
            x = np.delete(np.array(df_sim['time']), 0, axis=0)
            y = np.delete(np.array(df_sim['number']), 0, axis=0)
            A, B = np.polyfit(np.log(x), np.log(y), 1)
            
            slopes_total.append(A)
            interceptions_total.append(B)
        
        try:
            df_990 = df_selected[df_selected.time==990]
            df_990['slope'] = slopes_total
            df_final = pd.concat([df_final, df_990], ignore_index=True)
        except:
            df_990 = df_selected[df_selected.time==980]
            df_990['slope'] = slopes_total
            df_final = pd.concat([df_final, df_990], ignore_index=True)

In [None]:
df_final

In [None]:
df_final.to_csv('df_all_clusters.csv')

## Heatmaps

In [None]:
df_final = pd.read_csv('df_all_clusters.csv')

### Slopes

In [None]:
plt.rcParams['figure.figsize'] = 8, 8
for sr, gg_sr in df_final.groupby('D'):
    for density, gg_density in gg_sr.groupby('density'):
        fig, ax = plt.subplots()

        tc = ax.tricontourf(gg_density['Wc'], gg_density['psi'], gg_density['slope'],  cmap="jet")

        cbar = fig.colorbar(tc, ax=ax)
        cbar.set_label("Slope (power law)")

        ax.set_aspect("equal")
        ax.set_title(f"Dr = {sr}; Density = {round(density, 2)}")
        ax.set_xlabel('Wc')
        ax.set_ylabel('Psi')
        fig.savefig(f"heatmap_slope__Dr__{sr}__dens__{round(density, 2)}.png", dpi=300)

plt.show()

### N° of clusters

In [None]:
for sr, gg_sr in df_final[df_final['time']==990].groupby('D'):
    for density, gg_density in gg_sr.groupby('density'):
        fig, ax = plt.subplots()

        tc = ax.tricontourf(gg_density['Wc'], gg_density['psi'], gg_density['number'],  cmap="jet")

        cbar = fig.colorbar(tc, ax=ax)
        cbar.set_label("N° of Clusters")

        ax.set_aspect("equal")
        ax.set_title(f"Dr = {sr}; Density = {round(density, 2)}")
        ax.set_xlabel('Wc')
        ax.set_ylabel('Psi')
        fig.savefig(f"heatmap_number__Dr__{sr}__dens__{round(density, 2)}.png", dpi=300)

plt.show()

### Imshow

In [None]:
df_final = pd.read_csv('df_all_clusters.csv')

In [None]:
plt.rcParams['figure.figsize'] = 8, 8

dr_gg = df_final.groupby('D')
vals = {'number':'N° of clusters', 'slope': 'Power law'}
for val in ['number', 'slope']:
    for dr, dr_df in dr_gg:
        ds_gg = dr_df.groupby('density')
        for ds, ds_df in ds_gg:
            df_heatmap = ds_df.pivot_table(values=val,
                                    index='psi',
                                    columns='Wc', aggfunc=np.mean)
            df_heatmap = df_heatmap.sort_index(axis=0, ascending=False)

            fig, ax = plt.subplots()
            
            if val == 'number':
                im = ax.imshow(df_heatmap,vmin=1, vmax=500)
            else:
                im = ax.imshow(df_heatmap,vmin=-1.05, vmax=0.15)
                
            # We want to show all ticks...
            ax.set_xticks(np.arange(len(df_heatmap.columns)))
            ax.set_yticks(np.arange(len(df_heatmap.index)))
            # ... and label them with the respective list entries
            ax.set_xticklabels(df_heatmap.columns, fontsize=16)
            ax.set_yticklabels(df_heatmap.index, fontsize=18)
            # Set axis titles
            ax.set_xlabel("Cell adhesion", fontsize=20)
            ax.set_ylabel("CIL", fontsize=20)
            
            # Loop over N° of cluster data to annotate when < 1.5
            #if val=='number':
            
            for i in range(len(df_heatmap.index)):
                for j in range(len(df_heatmap.columns)):
                    if val=='number':
                        if df_heatmap.iloc[i,j] < 1.5:
                            text = ax.text(j, i, df_heatmap.iloc[i,j],
                                           ha="center", va="center", color="w", fontsize=16)
                    elif val=='slope':
                        if df_heatmap.iloc[i,j] <= -2/3:
                            text = ax.text(j, i, round(df_heatmap.iloc[i,j],2),
                                           ha="center", va="center", color="w", fontsize=16)
            
            # colorbar set up
            cb = plt.colorbar(im,fraction=0.046, pad=0.04)
            cb.ax.tick_params(labelsize=18)
            cb.set_label(vals[val],size=20)
            
            ax.set_title(f"Dr={dr}, density={round(ds, 2)}", fontsize=20)
            fig.tight_layout()
            fig.savefig(f"heatmap_{val}__Dr__{dr}__dens__{round(ds, 2)}.png", dpi=300)
plt.show()

### 8x8 images

#### Moving folders to Videos/

In [None]:
df_final = pd.read_csv('df_all_clusters.csv')

In [None]:
file_path = '../../../cellmodeller/data/killifish/Sims 10-21'

# Location of the data we want to analyse, assume all folders are results
folders = os.listdir(file_path)
folders.sort()

wcs = [i.replace('.','_').split('_')[0] if int(i.replace('.','_').split('_')[1])==0 \
                                         else i.replace('.','_') \
                                         for i in map(str, df_final.Wc.unique())]
psis = [i.replace('.','_').split('_')[0] if int(i.replace('.','_').split('_')[1])==0 \
                                         else i.replace('.','_') \
                                         for i in map(str, df_final.psi.unique())]

dens = 100
i = 0
for wc in wcs:
    for psi in psis:
        name = f"Wc__{wc}__psi__{psi}__D__1__ftax__0__500__cells_sphere__{dens}__"
        fold_arr = np.array(folders)
        idx = np.flatnonzero(np.core.defchararray.find(fold_arr,name)!=-1)[0]
        folder = fold_arr[idx]
        #print(folder)
        start = time.time()
        #print(f"{file_path}"+"/"+folder+"/")
        shutil.move(f"{file_path}"+"/"+folder+"/", "Videos/All/")
        end = time.time()
        #print(end-start)
        
print(i)

#### Get generated images

In [None]:
path = "Videos/All/"
for folder in os.listdir(path):
    parts = folder.split('__')
    Wc = parts[1]
    psi = parts[3]
    D = parts[5]
    N = int(parts[8].split('_')[0])
    sphere_rad = float(parts[10].replace('_', '.'))
    density = round(N / (4 * sphere_rad**2),2)
    density = str(density).replace('.', '_')
    try:
        file = os.path.join(path,folder,"step-00990.png")
        shutil.copyfile(file, f"Videos/'Wc__{Wc}__psi__{psi}__D__{D}__dens__{density}.png")
    except:
        file = os.path.join(path,folder,"step-00980.png")
        shutil.copyfile(file, f"Videos/'Wc__{Wc}__psi__{psi}__D__{D}__dens__{density}.png")

### Mean size

In [None]:
# Dr = 0.1
D = 0.1
plt.rcParams['figure.figsize'] = 30,16
fig,axs = plt.subplots(4,5)

i = 0
for sr, gg_sr in df[df.D==D].groupby('density'):
    for psi, gg_psi in gg_sr.groupby('psi'):
        legend = []
        for Wc, gg_Wc in gg_psi.groupby('Wc'):
            mean = gg_Wc.sort_values('time').groupby('time').mean()
            mean.plot(y='mean_size', ax=axs[i // 5][i % 5], loglog=True, style='.-')
            legend.append(r'$\psi=%0.2g$, $\phi=%0.2g$, Wc=%0.2g'%(psi,sr, Wc))
            #print(f"D: {D}, sr: {sr}, psi: {psi}, ")
        axs[i // 5][i % 5].legend(legend)
        log_min = np.log10(mean['mean_size'].min())
        axs[i // 5][i % 5].plot([1e1,1e3], [10**log_min, 10**(log_min+2/3)], 'k--')
        axs[i // 5][i % 5].set_ylim([0,500])
        i+=1

plt.suptitle(f"Mean cluster size, Dr = {D}")
plt.tight_layout()

### Max size

In [None]:
# Dr = 0.1
D = 0.1
plt.rcParams['figure.figsize'] = 30,16
fig,axs = plt.subplots(4,5)

i = 0
for sr, gg_sr in df[df.D==D].groupby('density'):
    for psi, gg_psi in gg_sr.groupby('psi'):
        legend = []
        for Wc, gg_Wc in gg_psi.groupby('Wc'):
            mean = gg_Wc.sort_values('time').groupby('time').mean()
            mean.plot(y='max_size', ax=axs[i // 5][i % 5], loglog=True, style='.-')
            legend.append(r'$\psi=%0.2g$, $\phi=%0.2g$, Wc=%0.2g'%(psi,sr, Wc))
            #print(f"D: {D}, sr: {sr}, psi: {psi}, ")
        axs[i // 5][i % 5].legend(legend)
        log_min = np.log10(mean['mean_size'].min())
        axs[i // 5][i % 5].plot([1e1,1e3], [10**log_min, 10**(log_min+2/3)], 'k--')
        axs[i // 5][i % 5].set_ylim([0,500])
        i+=1

plt.suptitle(f"Max cluster size, Dr = {D}")
plt.tight_layout()

### Dists

In [None]:
gg = df_ld_dist[(df_ld_dist.time==990) * (df_ld_dist.Wc==1) * (df_ld_dist.psi==1) * (df_ld_dist.D==0.1) * (df_ld_dist.density==0.1)]
plt.hist(gg['size'], bins=20, log=True, alpha=0.5)
gg = df_ld_dist[(df_ld_dist.time==990) * (df_ld_dist.Wc==1) * (df_ld_dist.psi==0)]
plt.hist(gg['size'], bins=20, log=True, alpha=0.5)
plt.legend(['$\psi=1$', '$\psi=0$'])

plt.figure()
gg = df_md_dist[(df_md_dist.time==990) * (df_md_dist.Wc==1) * (df_md_dist.psi==1)]
plt.hist(gg['size'], bins=20, log=True, alpha=0.5)
gg = df_md_dist[(df_md_dist.time==990) * (df_md_dist.Wc==1) * (df_md_dist.psi==0)]
plt.hist(gg['size'], bins=20, log=True, alpha=0.5)
plt.legend(['$\psi=1$', '$\psi=0$'])

In [None]:
df.head()

In [None]:
D

In [None]:
density

In [None]:
df_dist.head()

In [None]:
df[df.time==990][df.psi==1].sort_values('Wc').plot(x='Wc', y='number', style='-')
df[df.time==990][df.psi==1].sort_values('Wc').plot(x='Wc', y='max_size', style='-')
df[df.time==990][df.psi==1].sort_values('Wc').plot(x='Wc', y='mean_size', style='-')

In [None]:
dfend = df[df.time==990]
c1,bins1 = pd.cut(dfend.Wc, bins=10, retbins=True)
c2,bins2 = pd.cut(dfend.psi, bins=10, retbins=True) 
hm_number = dfend.groupby([c1, c2]).number.mean().unstack()
hm_max_size = dfend.groupby([c1, c2]).max_size.mean().unstack()
hm_mean_size = dfend.groupby([c1, c2]).mean_size.mean().unstack()

In [None]:
fig,axs = plt.subplots(2,1, figsize=(5,7))
df_Wc1 = df[df.Wc==1]
grouped = df_Wc1.groupby('psi')
for psi,g in grouped:
    mean = g.sort_values('time').groupby('time').mean()
    mean.plot(y='number', ax=axs[0], loglog=True, style='-.')
    mean.plot(y='mean_size', ax=axs[1], loglog=True, style='-.')
legend = ['$\psi=%0.2g$'%psi for psi,g in grouped]
axs[0].legend([])
axs[1].legend(legend, loc=(1.1,0))
axs[0].plot([1e1,1e3], [10**1.5, 10**(1.5-2/3)], 'k--')
axs[0].set_ylim([1e0,1e2])
axs[1].plot([1e1,1e3], [1e2, 10**(2+2/3)], 'k--')
axs[1].set_ylim([10**1.75,1e3])
plt.tight_layout()