In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm.notebook import trange
from tqdm import tqdm
from einops import rearrange, repeat
from pathlib import Path
import seaborn as sns
import datetime
import math
import os
import time
import sys
import re

%matplotlib inline

In [2]:
Anode_datasets = sorted([x for x in Path("../data/230412_Anode/").glob("*.csv")])
Anode_datasets

[PosixPath('../data/230412_Anode/20230412_105553_0__01_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_105804_0__02_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_110032_0__03_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_110249_0__04_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_110451_0__05_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_110735_0__06_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_110928_0__07_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_111152_0__08_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_111719_0__09_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_111935_0__10_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_112153_0__11_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_112415_0__12_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_112642_0__13_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_112855_0__14_Total.csv'),
 PosixPath('../data/230412_Anode/20230412_113056

In [3]:
Cathode_datasets = sorted([x for x in Path("../data/230330_Cathode/").glob("*.csv")])
Cathode_datasets

[PosixPath('../data/230330_Cathode/20230330_134146_0__02_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_134728_0__03_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_134935_0__04_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_135219_0__05_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_135426_0__06_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_140214_0__07_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_140723_0__08_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_140937_0__09_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_141214_0__10_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_141439_0__11_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_141754_0__12_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_142019_0__13_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_142247_0__14_Total.csv'),
 PosixPath('../data/230330_Cathode/20230330_142451_0__15_Total.csv'),
 PosixPath('../data/

## Functions

In [99]:
def detect_bead(signal, degree=0.3, verbose=True):
    bead_index = []
    max_value = max(signal)
    start = None
    end = None
    for i in range(1, len(signal)):
        if signal[i] - signal[i-1] >= max_value * degree:
            if start is None:
                start, end = i-1, i-1
            else:
                print(f'error index: {i}')
        elif signal[i-1] - signal[i] >= max_value * degree:
            if end is not None:
                bead_index.append(np.array([start, i]))
                if verbose:
                    print(f"Value at index start: {start}, end: {i}, length: {i - start}")
                start, end = None, None
            else:
                print(f'error index: {i}')
    print(f'Detected bead num: {len(bead_index)}')
    return np.array(bead_index)

In [157]:
def LO_features(bead_sequence):
    mean_values = np.mean(bead_sequence)
    valid_values = bead_sequence[np.where(bead_sequence >= (mean_values*0.9))]
    height_mean = np.mean(valid_values)
    height_min = np.min(valid_values)
    height_max = np.max(valid_values)
    height_std = np.std(valid_values)
    FWHM = len(bead_sequence)
    area = np.sum(bead_sequence)
    
    return np.array([height_mean, height_min, height_max, height_std, FWHM, area])

def BR_features(bead_sequence):
    peak = np.max(bead_sequence)
    peak_time = np.where(bead_sequence == peak)[0][0]
    diff_peak = peak - bead_sequence[-1]
    return np.array([peak, peak_time, diff_peak])

def NIR_feautures(NIR, bead_len):
    peak = np.max(NIR)
    nir_time = np.where(NIR >= (peak*0.5))[0][0]
    over_area = np.sum(NIR[np.where(NIR>=1)])
    extra_time = np.where(NIR >= 0.03)[0][-1] - bead_len
    extra_area = np.sum(NIR[bead_len:np.where(NIR >= 0.03)[0][-1]])
    sum_area = np.sum(NIR[nir_time:bead_len])
#     extra_area = np.sum(np.where(NIR >= 0.03)[np.where(np.where(NIR >= 0.03)>bead_len)])
    return np.array([peak, nir_time, over_area, extra_time, extra_area, sum_area])
    
def VIS_feaures(VIS, bead_len):
    peak = np.max(VIS)
    vis_time = np.where(VIS >= (peak*0.5))[0][0]
    over_area = np.sum(VIS[np.where(VIS>=0.7)])
    extra_time = np.where(VIS >= 0.03)[0][-1] - bead_len
    extra_area = np.sum(VIS[bead_len:np.where(VIS >= 0.03)[0][-1]])
    return np.array([peak, vis_time, over_area, extra_time, extra_area])
    

In [158]:
def rule_based(signal):
    LO = np.array(signal['ch0'])
    BR = np.array(signal['ch1'])
    NIR = np.array(signal['ch2'])
    VIS = np.array(signal['ch3'])
    bead_index = detect_bead(LO, verbose=False)
    df = pd.DataFrame(columns=[
        'LO_height_MEAN', 'LO_height_MIN', 'LO_height_Peak', 'LO_height_STD',
        'LO_FWHM', 'LO_AREA', 'BR_Peak', 'BR_Peak_Time', 'BR_diff_peak', 'NIR_Peak',
        'NIR_Time', 'NIR_Over_AREA', 'NIR_extra_time', 'NIR_extra_area', 'NIR_sum_area',
        'VIS_Peak', 'VIS_Time', 'VIS_Over_AREA', 'VIS_extra_time', 'VIS_extra_area'
        ])
        
    for i, [start, end] in enumerate(bead_index):
        features = np.array([])
        bead_len = end - start
        if i+1 == len(bead_index):
            next_start = end + 100
        else:
            next_start = bead_index[i+1][0]
        
        features = np.concatenate((features, LO_features(LO[start:end])), axis = None)
        features = np.concatenate((features, BR_features(BR[start:end])), axis = None)
        features = np.concatenate((features, NIR_feautures(NIR[start:next_start], bead_len)), axis = None)
        features = np.concatenate((features, VIS_feaures(VIS[start:next_start], bead_len)), axis = None)
#         df.iloc[-1] = features
        df = df.append(pd.Series(features, index=df.columns), ignore_index=True)
    return df


## Anode

In [5]:
ref_data = pd.read_csv(Anode_datasets[2], header=None).dropna(axis=1)
ref_data.columns = ['ch0', 'ch1', 'ch2', 'ch3']
ref_data

Unnamed: 0,ch0,ch1,ch2,ch3
0,-0.037602,-0.046263,-0.001663,-0.000759
1,-0.004319,-0.008454,-0.001663,-0.000759
2,0.005698,0.004149,-0.001663,-0.000435
3,0.009899,0.010935,-0.001663,-0.000759
4,0.028640,0.031940,-0.001663,-0.000759
...,...,...,...,...
149995,-0.035017,-0.045293,-0.001987,-0.000435
149996,0.005052,0.007380,-0.001339,-0.000435
149997,0.026378,0.025800,-0.000692,-0.000759
149998,0.050613,0.046158,-0.001339,-0.000435


In [97]:
test_data = pd.read_csv(Anode_datasets[7], header=None).dropna(axis=1)
test_data.columns = ['ch0', 'ch1', 'ch2', 'ch3']
test_data

Unnamed: 0,ch0,ch1,ch2,ch3
0,0.010222,0.008673,-0.001663,-0.000435
1,0.009899,0.008673,-0.001987,-0.000759
2,0.014422,0.011581,-0.001339,-0.000435
3,0.009899,0.006734,-0.001663,-0.000435
4,0.014422,0.008350,-0.001663,-0.000435
...,...,...,...,...
152495,0.014422,0.012551,-0.001339,-0.000759
152496,0.018300,0.017398,-0.001663,-0.000435
152497,0.037042,0.039695,-0.001339,-0.000759
152498,0.009252,0.006411,-0.001663,-0.000435


In [159]:
rule_based(ref_data).iloc[24]

Detected bead num: 54


LO_height_MEAN      3.281964
LO_height_MIN       3.054144
LO_height_Peak      3.346257
LO_height_STD       0.035058
LO_FWHM           100.000000
LO_AREA           323.012597
BR_Peak             7.969796
BR_Peak_Time       21.000000
BR_diff_peak        6.594522
NIR_Peak            0.359710
NIR_Time           46.000000
NIR_Over_AREA       0.000000
NIR_extra_time      6.000000
NIR_extra_area      0.475089
NIR_sum_area       12.447495
VIS_Peak            0.205081
VIS_Time           42.000000
VIS_Over_AREA       0.000000
VIS_extra_time      4.000000
VIS_extra_area      0.498295
Name: 24, dtype: float64

In [163]:
pd.concat([rule_based(ref_data).iloc[24], rule_based(test_data).iloc[24]], axis=1)

Detected bead num: 54
error index: 43546
Detected bead num: 54


Unnamed: 0,24,24.1
LO_height_MEAN,3.281964,3.279973
LO_height_MIN,3.054144,2.989517
LO_height_Peak,3.346257,3.347873
LO_height_STD,0.035058,0.036923
LO_FWHM,100.0,120.0
LO_AREA,323.012597,387.863219
BR_Peak,7.969796,7.861218
BR_Peak_Time,21.0,22.0
BR_diff_peak,6.594522,6.624898
NIR_Peak,0.35971,0.813369


In [134]:
rule_based(test_data).iloc[24]

error index: 43546
Detected bead num: 54


LO_height_MEAN      3.279973
LO_height_MIN       2.989517
LO_height_Peak      3.347873
LO_height_STD       0.036923
LO_FWHM           120.000000
LO_AREA           387.863219
BR_Peak             7.861218
BR_Peak_Time       22.000000
NIR_Peak            6.624898
NIR_Time            0.813369
NIR_Over_AREA       8.000000
NIR_extra_time      0.000000
NIR_extra_area    518.000000
NIR_sum_area      170.724576
VIS_Peak           60.008563
VIS_Time            0.237122
VIS_Over_AREA      47.000000
VIS_extra_time      0.000000
VIS_extra_area      4.000000
VIS_sum_area        0.504121
Name: 24, dtype: float64