In [7]:
%config IPCompleter.greedy=True
#%matplotlib inline
%matplotlib notebook
#%matplotlib widget # for ipympl (needs jupyterlab etc.)

import os, glob
from lammps_multistate_rods.tools.clusters import read_cluster_data, composition_by_states
from functools import reduce

import numpy as np

import matplotlib
from matplotlib import pyplot as plt
from matplotlib import cm
from matplotlib import colors

font = {'size' : 13}
matplotlib.rc('font', **font)

def calc_c(number, volume):
    '''
    Concentration in M/L for volume in nm^3
    '''
    return (10.0*number)/(6.022*volume)

#base_dir = '/media/data_ntfs/znanost/projects/amyloid MD simulations/simulation data/soluble_only/5p_v3/'
base_dir = '/home/simpsonim/programming/LAMMPS/amyloids/test/'
data_dirs = []
#data_dirs.extend(['cossq-1.0_3.00', 'LJ-0.6_5.5', 'LJ-1.0_3.90', 'LJ-1.75_3.25'])
#data_dirs.extend(['cossq-1.0_3.25', 'LJ-0.6_6.0', 'LJ-1.0_4.25', 'LJ-1.75_3.50'])
#data_dirs.extend(['cossq-1.0_3.37', 'LJ-0.6_6.25', 'LJ-1.0_4.42', 'LJ-1.75_3.62'])
#data_dirs.extend(['cossq-1.0_3.50', 'LJ-0.6_6.5', 'LJ-1.0_4.60', 'LJ-1.75_3.75'])
#data_dirs.extend(['cossq-1.0_3.75', 'LJ-0.6_7.0', 'LJ-1.0_4.95', 'LJ-1.75_4.00'])
data_dirs.append('5p_v3_base_micelles')

In [8]:
# get all the data into appropriate structures

cell_sizes = set()
nums_cells = set()
all_data = {}
data_struct = ['ts', 'Vs', 'Nsols', 'Nmons', 'mic_stats', 'max_mic_size']

for data_dir in data_dirs:
    filepaths = sorted(glob.glob(os.path.join(base_dir, data_dir, '*_micelle_data')))
    n_sims = len(filepaths)
    for n in range(n_sims):
        cell_size, num_cells, tag = os.path.basename(filepaths[n]).split('_')[:3]
        cell_size = float(cell_size)
        num_cells = float(num_cells)
        cell_sizes.add(cell_size)
        nums_cells.add(num_cells)
        key = (data_dir, cell_size, num_cells)
        if not key in all_data:
            all_data[key] = {}
        data = []
        all_data[key][tag] = data
    
        timesteps, box_sizes, rod_counts, cluster_data = read_cluster_data(filepaths[n])
        n_snapshots = len(timesteps)
        data.append(np.array(timesteps)) # timesteps
        data.append(np.array([reduce(lambda x,y: x*y, box_dims) for box_dims in box_sizes])) # volumes
        data.append(np.zeros(n_snapshots)) # total soluble rods
        data.append(np.zeros(n_snapshots)) # free soluble monomers
        data.append(np.zeros(n_snapshots)) # total clusters (of any size, including free)
        data.append([None]*n_snapshots) # lists of (size,occurrences) pairs
        data.append(0) # max micelle size
    
        for i in range(n_snapshots):
            data[5][i] = []
            for cluster_ID, cluster in cluster_data[i].items():
                cluster_size = len(cluster)
                data[2][i] += cluster_size
                if cluster_size == 1:
                    data[3][i] += 1
                data[4][i] += 1
                data[5][i].append(cluster_size) # includes free monomers
            max_i = max(data[5][i])
            data[6] = max_i if max_i > data[6] else data[6]
            data[5][i] = zip(*np.unique(data[5][i], return_counts=True))
            assert data[2][i] == rod_counts[i][0] # total soluble rods has to match!

cell_sizes = sorted(cell_sizes)
nums_cells = sorted(nums_cells)

In [9]:
# process the data into useful stuff

skip_first = 10 # how many snapshots
cmc_stats = {}
distributions = {}
for key in all_data.keys():
    n_sims = len(all_data[key])
    max_i = min([len(data[0]) for data in all_data[key].values()])
    common_ts = list(all_data[key].values())[0][0][:max_i]
    
    all_total_cs = np.concatenate([[calc_c(n, data[1][0]) for n in data[2][skip_first:max_i]]
                                   for data in all_data[key].values()])
    all_free_cs = np.concatenate([[calc_c(n, data[1][0]) for n in data[3][skip_first:max_i]]
                                   for data in all_data[key].values()])
    total_sols = (np.mean(all_total_cs), np.std(all_total_cs))
    free_sols = (np.mean(all_free_cs), np.std(all_free_cs))
    cmc_stats[key] = (common_ts, total_sols, free_sols)
    
    max_mic_size = max([data[6] for data in all_data[key].values()])
    agg_occurr = [None]*avg_over
    for i in range(avg_over):
        agg_occurr[i] = np.zeros((max_mic_size, n_sims), float)
        j = 0
        for data in all_data[key].values():
            t = max_i - avg_over + i
            for size, occurr in data[5][t]:
                agg_occurr[i][size-1,j] = float(occurr)/data[4][t] #float(occurr*size)/data[2][t]
            j += 1
    agg_occurr = np.concatenate(agg_occurr, 1)
    avg_dist = (np.mean(agg_occurr, 1), np.std(agg_occurr, 1))
    
    distributions[key] = (common_ts, avg_dist, max_mic_size)

In [10]:
# draw the distribution (& numbers of total and free solubles in time) for a single simulation
data_dir = data_dirs[0]
num_cells = nums_cells[0]

fig = plt.figure('Averaged micelle size distributions for {}'.format(data_dir), figsize=[9,6])
ax = plt.axes()

for i in range(len(cell_sizes)):
    cell_size = cell_sizes[i]
    try:
        key = (data_dir, cell_size, num_cells)
        data = all_data[key]
    except KeyError:
        print("ERROR: No simulations with those parameters!")
        assert False
    data_values = list(data.values())
    common_ts, avg_dist, max_mic_size = distributions[key]
    c = calc_c(data_values[0][2][0], data_values[0][1][0])*1000
    sizes = range(1, max_mic_size+1)
    
    color = colors.to_rgb(cm.tab10(i))
    color_fill = (color + (0.1,))
    ax.plot(sizes, avg_dist[0], '.-', lw=1, color=color,
            label=r'{} @ {:.2f} mM'.format(data_dir, c))
    ax.fill_between(sizes, avg_dist[0], avg_dist[0]+avg_dist[1], color=color_fill)
    ax.fill_between(sizes, avg_dist[0], avg_dist[0]-avg_dist[1], color=color_fill)

ax.set_xlabel(r'micelle size ($N$)')
ax.set_ylabel(r'$P(N)$', rotation='vertical')
ax.set_xlim(0, 22)
ax.set_ylim(0.0, 0.05)
ax.grid(linestyle='--')
ax.legend(loc='upper right')

plt.show()

<IPython.core.display.Javascript object>

In [11]:
# draw free vs. total concentration (CMC graph)

num_cells = nums_cells[0]

xs = []
xdevs = []
ys = []
ydevs = []
for data_dir in data_dirs:
    xs.append([])
    xdevs.append([])
    ys.append([])
    ydevs.append([])
    for cell_size in cell_sizes:
        key = (data_dir, cell_size, num_cells)
        try:
            common_ts, total_sols, free_sols = cmc_stats[key]
        except:
            continue
        xs[-1].append(np.log(total_sols[0]))
        xdevs[-1].append(total_sols[1]/total_sols[0])
        ys[-1].append(np.log(free_sols[0]))
        ydevs[-1].append(free_sols[1]/free_sols[0])
    xs[-1] = np.array(xs[-1])
    xdevs[-1] = np.array(xdevs[-1])
    ys[-1] = np.array(ys[-1])
    ydevs[-1] = np.array(ydevs[-1])
xmin = np.min(np.array(xs))
xmax = np.max(np.array(xs))

fig = plt.figure('CMC', figsize=(8,6))
ax = plt.axes()
for i in range(len(data_dirs)):
    color = colors.to_rgb(cm.tab10(i%4))
    color_fill = (color + (0.1,))
    #ax.errorbar(xs[i], ys[i], xerr=xdevs[i], fmt='.-', capsize=4, lw=1.0, color=color, label=data_dirs[i])
    ax.plot(xs[i], ys[i], '.-', lw=1.0, color=color, label=data_dirs[i])
    ax.fill_between(xs[i], ys[i], ys[i]+ydevs[i], color=color_fill)
    ax.fill_between(xs[i], ys[i], ys[i]-ydevs[i], color=color_fill)
plt.plot((xmin-0.5, xmax+0.5), (xmin-0.5, xmax+0.5), 'k--', lw=1.0)
ax.set_xlabel(r'$\ln\left(c_{total}\;[M]\right)$')
ax.set_ylabel(r'$\ln\left(c_{free}\;[M]\right)$', rotation='vertical')
plt.axis(ymin=xmin-0.5, ymax=xmax+0.5, xmin=xmin-0.5, xmax=xmax+0.5)
ax.grid(linestyle='--')
ax.legend(loc='upper left')

plt.show()

<IPython.core.display.Javascript object>