In [1]:
import os

import pulse3D
from pulse3D import constants
from pulse3D import metrics
from pulse3D import peak_detection
from pulse3D import plate_recording
from pulse3D.metrics import *
from pulse3D.constants import *

from pulse3D.plate_recording import WellFile

import pandas as pd
from pandas import DataFrame, Series

import pyarrow as pa
import pyarrow.parquet as pq

In [2]:
PATH_OF_CURRENT_FILE='/Users/kristianeschenburg/Documents/Code/CuriBio/sdk_refactor'

In [3]:
data_file='./src/tests/h5/v0.3.1/MA201110001__2020_09_03_213024/MA201110001__2020_09_03_213024__A1.h5'
w = WellFile(os.path.join(PATH_OF_CURRENT_FILE,
                          "src",
                          "tests",
                          "h5", 
                          "v0.3.1",
                          "MA201110001__2020_09_03_213024",
                          "MA201110001__2020_09_03_213024__A1.h5"))

In [7]:
twitch_width_percents = np.arange(10, 95, 5)

filtered_data = w.noise_filtered_magnetic_data
peaks_and_valleys = pulse3D.peak_detection.peak_detector(filtered_data)
twitch_indices = pulse3D.peak_detection.find_twitch_indices(peaks_and_valleys)

metric_parameters = {
                    "peak_and_valley_indices": peaks_and_valleys,
                    "filtered_data": filtered_data,
                    "twitch_indices": twitch_indices
                    }

rounded=False

metric_mapper = {
    AMPLITUDE_UUID: TwitchAmplitude(rounded=rounded),
    AUC_UUID: TwitchAUC(rounded=rounded),
    BASELINE_TO_PEAK_UUID: TwitchPeakToBaseline(rounded=rounded, is_contraction=True),
    CONTRACTION_TIME_UUID: TwitchPeakTime(rounded=rounded, is_contraction=True),
    CONTRACTION_VELOCITY_UUID: TwitchVelocity(rounded=rounded, is_contraction=True),
    FRACTION_MAX_UUID: TwitchFractionAmplitude(rounded=rounded),
    IRREGULARITY_INTERVAL_UUID: TwitchIrregularity(rounded=rounded),
    PEAK_TO_BASELINE_UUID: TwitchPeakToBaseline(rounded=rounded, is_contraction=False),
    RELAXATION_TIME_UUID: TwitchPeakTime(rounded=rounded, is_contraction=False),
    RELAXATION_VELOCITY_UUID: TwitchVelocity(rounded=rounded, is_contraction=False),
    TWITCH_FREQUENCY_UUID: TwitchFrequency(rounded=rounded),
    TWITCH_PERIOD_UUID: TwitchPeriod(rounded=rounded),
    WIDTH_UUID: TwitchWidth(rounded=rounded),
}

metric_mapper_names = {
    AMPLITUDE_UUID: 'amplitude',
    AUC_UUID: 'auc',
    BASELINE_TO_PEAK_UUID: 'baseline_to_peak',
    CONTRACTION_TIME_UUID: 'contraction_time',
    CONTRACTION_VELOCITY_UUID: 'contraction_velocity',
    FRACTION_MAX_UUID: 'fraction_max',
    IRREGULARITY_INTERVAL_UUID: 'irregularity_interval',
    PEAK_TO_BASELINE_UUID: 'peak_to_baseline',
    RELAXATION_TIME_UUID: 'relaxation_time',
    RELAXATION_VELOCITY_UUID: 'relaxation_velocity',
    TWITCH_FREQUENCY_UUID: 'twitch_frequency',
    TWITCH_PERIOD_UUID: 'twitch_period',
    WIDTH_UUID: 'width',
}

In [9]:
# per scalar dataframes
metric_list=[AMPLITUDE_UUID, 
             AUC_UUID, 
             BASELINE_TO_PEAK_UUID, 
             CONTRACTION_VELOCITY_UUID, 
             FRACTION_MAX_UUID, 
             IRREGULARITY_INTERVAL_UUID, 
             PEAK_TO_BASELINE_UUID, 
             RELAXATION_VELOCITY_UUID, 
             TWITCH_FREQUENCY_UUID,
             TWITCH_PERIOD_UUID]

print('##### SCALAR #####')
per_twitch_scalar = pd.DataFrame(index=list(twitch_indices.keys()), 
                                 columns=metric_list)
columns = pd.MultiIndex.from_product([metric_list,['n','mean','std','min','max','cov','sem']], 
                                     names=['metric', 'statistic'])
aggregate_scalar = pd.DataFrame(index=[0], 
                                columns=columns)

for i, metric_id in enumerate(metric_list):
    print(f'Metric {i}')
    metric = metric_mapper[metric_id]
    estimate = metric.fit(**metric_parameters)
    estimate_df = pd.DataFrame(estimate)
    table = pa.Table.from_pandas(estimate_df, preserve_index=True)
    
    metric_name=metric_mapper_names[metric_id]
    file_path = os.path.join(PATH_OF_CURRENT_FILE, 
                             "src",
                             "tests",
                             "data_metrics", 
                             "v0.3.1", 
                             f'{metric_name}_MA201110001__2020_09_03_213024__A1.parquet'
                            )

    pq.write_table(table=table, where=file_path)
    
    print(type(estimate))
    metric.add_per_twitch_metrics(per_twitch_scalar, metric_id, estimate)
    metric.add_aggregate_metrics(aggregate_scalar, metric_id, estimate)
    print()
    
print('##### BY WIDTH #####')
metric_list=[CONTRACTION_TIME_UUID, RELAXATION_TIME_UUID, WIDTH_UUID]
columns = pd.MultiIndex.from_product([metric_list, list(np.arange(10,95,5))], names=['metric', 'width'])
per_twitch_by_width = pd.DataFrame(index=list(twitch_indices.keys()), columns=columns)

columns = pd.MultiIndex.from_product([metric_list, np.arange(10,95,5), 
                                      ['n','mean','std','min','max','cov','sem']], 
                                     names=['metric', 'width', 'statistic'])
aggregate_by_width = pd.DataFrame(index=[0], columns=columns)

for i, metric_id in enumerate(metric_list):
    print(f'Metric {i}')
    metric = metric_mapper[metric_id]
    estimate = metric.fit(**metric_parameters)
    metric_name=metric_mapper_names[metric_id]
    file_path = os.path.join(PATH_OF_CURRENT_FILE, 
                             "src",
                             "tests",
                             "data_metrics", 
                             "v0.3.1", 
                             f'{metric_name}_MA201110001__2020_09_03_213024__A1.parquet'
                            )

    table = pa.Table.from_pandas(estimate, preserve_index=True)
    pq.write_table(table=table, where=file_path)
    print(type(estimate))
    metric.add_per_twitch_metrics(per_twitch_by_width, metric_id, estimate)
    metric.add_aggregate_metrics(aggregate_by_width, metric_id, estimate)
    print()

##### SCALAR #####
Metric 0
<class 'pandas.core.series.Series'>

Metric 1
<class 'pandas.core.series.Series'>

Metric 2
<class 'pandas.core.series.Series'>

Metric 3
<class 'pandas.core.series.Series'>

Metric 4
<class 'pandas.core.series.Series'>

Metric 5
<class 'pandas.core.series.Series'>

Metric 6
<class 'pandas.core.series.Series'>

Metric 7
<class 'pandas.core.series.Series'>

Metric 8
<class 'pandas.core.series.Series'>

Metric 9
<class 'pandas.core.series.Series'>

##### BY WIDTH #####
Metric 0
<class 'pandas.core.frame.DataFrame'>

Metric 1
<class 'pandas.core.frame.DataFrame'>

Metric 2
<class 'pandas.core.frame.DataFrame'>



In [14]:
metric_id = AUC_UUID
metric = metric_mapper[metric_id]
estimate = metric.fit(**metric_parameters)

In [15]:
parquet_file='/Users/kristianeschenburg/Documents/Code/CuriBio/sdk_refactor/src/tests/data_metrics/v0.3.1/auc_MA201110001__2020_09_03_213024__A1.parquet'
parquet = pq.read_table(parquet_file).to_pandas().squeeze()

In [16]:
parquet

134      6.171740e+10
237      5.448984e+10
338      5.174481e+10
442      5.571002e+10
544      5.365478e+10
             ...     
22541    2.802726e+10
22633    2.792326e+10
22721    2.742706e+10
22805    2.724373e+10
22894    2.803332e+10
Name: 0, Length: 242, dtype: float64

In [17]:
estimate

134      6.171740e+10
237      5.448984e+10
338      5.174481e+10
442      5.571002e+10
544      5.365478e+10
             ...     
22541    2.802726e+10
22633    2.792326e+10
22721    2.742706e+10
22805    2.724373e+10
22894    2.803332e+10
Length: 242, dtype: float64

In [144]:
w = WellFile(os.path.join(PATH_OF_CURRENT_FILE,
                          "src",
                          "tests",
                          "h5", 
                          "v0.3.1",
                          "MA201110001__2020_09_03_213024",
                          "MA201110001__2020_09_03_213024__A1.h5"))
pv = peak_detection.peak_detector(w.noise_filtered_magnetic_data)
twitch_indices = peak_detection.find_twitch_indices(pv)

metric = metrics.TwitchFractionAmplitude()
estimate_2 = metric.fit(pv, w.force, twitch_indices)

In [152]:
estimate_2[237]

0.9929389890022211

In [153]:
estimate[237]

0.9929388451895301

In [145]:
estimate

134      0.954977
237      0.992939
338      0.961568
442      0.961285
544      0.968974
           ...   
22541    0.901879
22633    0.928281
22721    0.915745
22805    0.903670
22894    0.925244
Length: 242, dtype: float64

In [132]:
try:
    table = pq.read_table(file_path)
except:
    raise FileNotFoundError('Parquet file for width not found.')
else:
    expected = table.to_pandas()

In [133]:
file_path

'/Users/kristianeschenburg/Documents/Code/CuriBio/sdk_refactor/src/tests/data_metrics/v0.3.1/width_MA201110001__2020_09_03_213024__A1.parquet'

In [70]:
from stdlib_utils import get_current_file_abs_directory

get_current_file_abs_directory()

''

In [90]:
def concat(dfs, axis=0, *args, **kwargs):   
    """
    Wrapper for `pandas.concat'; concatenate pandas objects even if they have 
    unequal number of levels on concatenation axis.
    
    Levels containing empty strings are added from below (when concatenating along
    columns) or right (when concateniting along rows) to match the maximum number 
    found in the dataframes.
    
    Parameters
    ----------
    dfs : Iterable
        Dataframes that must be concatenated.
    axis : int, optional
        Axis along which concatenation must take place. The default is 0.

    Returns
    -------
    pd.DataFrame
        Concatenated Dataframe.
    
    Notes
    -----
    Any arguments and kwarguments are passed onto the `pandas.concat` function.
    
    See also
    --------
    pandas.concat
    """
    def index(df):
        return df.columns if axis==1 else df.index
    
    def add_levels(df):
        need = want - index(df).nlevels
        if need > 0:
            df = pd.concat([df], keys=[('',)*need], axis=axis) # prepend empty levels
            for i in range(want-need): # move empty levels to bottom
                df = df.swaplevel(i, i+need, axis=axis) 
        return df
    
    want = np.max([index(df).nlevels for df in dfs])    
    dfs = [add_levels(df) for df in dfs]
    return pd.concat(dfs, axis=axis, *args, **kwargs)

In [91]:
per_twitch = concat([per_twitch_scalar, per_twitch_by_width], axis=1)
aggregate = concat([aggregate_scalar, aggregate_by_width], axis=1)

In [82]:
import pyarrow as pa

In [158]:
# Pandas.DataFrame to Arrow.Table
table = pa.Table.from_pandas(aggregate)
# Arrow.Table to Pandas.DataFrame
df_new = table.to_pandas()
# convert str uuids to UUID
df_new.columns.set_levels(list(map(uuid.UUID, df_new.columns.levels[0])), level=0, inplace=True)
df_new.columns.set_levels([str2int(k) for k in df_new.columns.levels[1]], level=1, inplace=True)

  


ValueError: invalid literal for int() with base 10: 'cov'

In [140]:
def str2int(k):
    
    if k != '':
        return int(k)
    else:
        return k

In [102]:
import uuid
from uuid import UUID

In [196]:
per_twitch_metrics = concat([per_twitch_scalar, per_twitch_metrics.T.loc[pd.IndexSlice[:, [50, 90]], :].T], axis=1).T
aggregate_metrics = concat([aggregate_scalar, aggregate_by_width.T.loc[pd.IndexSlice[:, [50, 90]], :].T], axis=1).T