In [1]:
import pandas as pd
import pathlib
import polyflexmd.data_analysis.data.constants as data_constants
import polyflexmd.data_analysis.transform.transform as transform
import polyflexmd.data_analysis.theory.kremer_grest as kremer_grest
import numpy as np
import dask.dataframe
import dask.distributed

import copy

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
PATH_DF_TRAJECTORY_PROCESSED = pathlib.Path("/home/egor/Projects/polyflexmd/data/trajectories_test.csv")

In [4]:
client = dask.distributed.Client(n_workers=8, processes=True)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 8
Total threads: 8,Total memory: 15.51 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:33229,Workers: 8
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 15.51 GiB

0,1
Comm: tcp://127.0.0.1:33117,Total threads: 1
Dashboard: http://127.0.0.1:33855/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:38859,
Local directory: /tmp/dask-scratch-space/worker-buh36024,Local directory: /tmp/dask-scratch-space/worker-buh36024

0,1
Comm: tcp://127.0.0.1:40111,Total threads: 1
Dashboard: http://127.0.0.1:38981/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:39113,
Local directory: /tmp/dask-scratch-space/worker-_umacdtz,Local directory: /tmp/dask-scratch-space/worker-_umacdtz

0,1
Comm: tcp://127.0.0.1:35525,Total threads: 1
Dashboard: http://127.0.0.1:36155/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:36565,
Local directory: /tmp/dask-scratch-space/worker-ylb5kfjm,Local directory: /tmp/dask-scratch-space/worker-ylb5kfjm

0,1
Comm: tcp://127.0.0.1:42035,Total threads: 1
Dashboard: http://127.0.0.1:44245/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:44293,
Local directory: /tmp/dask-scratch-space/worker-ti5oriwh,Local directory: /tmp/dask-scratch-space/worker-ti5oriwh

0,1
Comm: tcp://127.0.0.1:40525,Total threads: 1
Dashboard: http://127.0.0.1:46045/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:41801,
Local directory: /tmp/dask-scratch-space/worker-pl6vsve3,Local directory: /tmp/dask-scratch-space/worker-pl6vsve3

0,1
Comm: tcp://127.0.0.1:34395,Total threads: 1
Dashboard: http://127.0.0.1:43193/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:46857,
Local directory: /tmp/dask-scratch-space/worker-8j2dmplc,Local directory: /tmp/dask-scratch-space/worker-8j2dmplc

0,1
Comm: tcp://127.0.0.1:36687,Total threads: 1
Dashboard: http://127.0.0.1:45987/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:42971,
Local directory: /tmp/dask-scratch-space/worker-i_qqs71a,Local directory: /tmp/dask-scratch-space/worker-i_qqs71a

0,1
Comm: tcp://127.0.0.1:33811,Total threads: 1
Dashboard: http://127.0.0.1:34741/status,Memory: 1.94 GiB
Nanny: tcp://127.0.0.1:43143,
Local directory: /tmp/dask-scratch-space/worker-ej23q475,Local directory: /tmp/dask-scratch-space/worker-ej23q475


In [5]:
traj_column_types = copy.deepcopy(data_constants.RAW_TRAJECTORY_DF_COLUMN_TYPES)
traj_column_types.pop("ix")
traj_column_types.pop("iy")
traj_column_types.pop("iz")
traj_column_types["kappa"] = "category"

df_trajectories = dask.dataframe.read_csv(
        PATH_DF_TRAJECTORY_PROCESSED,
        dtype=traj_column_types
)

time_steps_per_partition = 30000

divisions = df_trajectories["t"].loc[
    df_trajectories["t"] % time_steps_per_partition == 0
    ].unique().compute().sort_values().tolist()

df_trajectories = df_trajectories.set_index("t", divisions=divisions)

df_trajectories.persist()

df_trajectories

Unnamed: 0_level_0,id,type,x,y,z,molecule-ID,kappa
npartitions=2832,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
450030000,uint16,uint8,float32,float32,float32,int64,category[unknown]
450060000,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...
534960000,...,...,...,...,...,...,...
534990000,...,...,...,...,...,...,...


In [6]:
df_trajectories.head(5)

Unnamed: 0_level_0,id,type,x,y,z,molecule-ID,kappa
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
450030000,1,1,0.0,0.0,0.0,1,190.2
450030000,2,1,-0.336178,0.186169,0.890632,1,190.2
450030000,3,2,-0.635311,0.332767,1.84965,1,190.2
450030000,4,2,-0.900711,0.595613,2.73062,1,190.2
450030000,5,2,-1.18856,0.787639,3.60975,1,190.2


In [7]:
N_beads = 64
kappa = 190.2
l_b = .97

In [8]:
l_k_theory = kremer_grest.bare_kuhn_length(kappa, l_b)
l_k_theory

368.018

In [9]:
l_K_estimate = transform.estimate_kuhn_length_df(
    df_trajectory=df_trajectories,
    group_by_params=["kappa"],
    N_beads=N_beads,
    l_b=l_b
).compute()
l_K_estimate

Unnamed: 0_level_0,Unnamed: 1_level_0,l_K,d_l_K
t,kappa,Unnamed: 2_level_1,Unnamed: 3_level_1
450030000,190.2,428.037989,2.493113
450040000,190.2,389.221405,1.571433
450050000,190.2,371.061552,1.378175
450060000,190.2,362.321018,1.278343
450070000,190.2,344.221371,1.648810
...,...,...,...
534960000,190.2,361.655764,1.652846
534970000,190.2,376.939321,1.604062
534980000,190.2,354.721865,1.545850
534990000,190.2,359.192390,1.927330


In [10]:
l_K_estimate['l_K'].mean()

373.61305845426693

In [11]:
l_K_estimate['l_K'].std()

36.86202451568973