# Extracting sample of features for prototyping

In [1]:
# data
import numpy as np
import pandas as pd
import time
from scipy import stats
from scipy.special import binom
from scipy.spatial import distance_matrix
import itertools
from collections import Counter
from math import sin
from random import random
import random
from numpy.random import choice
import math
from db import DB
from scipy.spatial import distance_matrix
import datetime

# dionysus
import dionysus as d

# plotting
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import collections  as mc
import matplotlib.path as mpath
import matplotlib.patches as mpatches


# format notebook output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.core.display import display, HTML, Javascript
display(HTML("<style>.container { width:90% !important; }</style>"))

# style pandas display
pd.set_option('display.max_columns', None)

# matplotlib magic
%matplotlib inline

In [2]:
def rolling_window(a, window):
    '''
    Take in an array and return array of rolling windows of specified length
    
    Parameters:
    - a: numpy array that will be windowed
    - window: integer that will be the length of the window
    
    Returns:
    - a_windowed: array where each entry is an array of length window
    '''
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    a_windowed = np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
    return a_windowed

In [3]:
def time_series_to_point_cloud(time_series, dimension_embed=2):
    '''
    Convert a time series into a point cloud in the dimension specified by dimension_embed
    
    Parameters:
    - time_series: numpy array of time series values
    - dimension_embed: dimension of Euclidean space in which to embed the time series into by taking windows of dimension_embed length,
        e.g. if the time series is [t_1,...,t_n] and dimension_embed is 2, then the point cloud would be [(t_0, t_1), (t_1, t_2),...,(t_(n-1), t_n)]
        
    Returns:
    - point_cloud_normalized: point cloud normalized to take values between [0, 1]
    '''

    assert len(time_series) >= dimension_embed, 'dimension_embed larger than length of time_series'
    
    # time series to float type
    time_series = time_series.astype(float)

    # compute point cloud
    point_cloud = rolling_window(a=time_series, window=dimension_embed)
    
    return np.array(point_cloud)

In [4]:
def point_cloud_to_persistent_homology(point_cloud, epsilon_range, max_simplex_dim):
    # take unique points in the point cloud only
    point_cloud = np.unique(point_cloud, axis=0)
    
    # compute distance matrix
    distance_mtx = distance_matrix(x=point_cloud, y=point_cloud, p=2)
    
    # find max distance between points
    max_distance = distance_mtx.max()
    
    # scale epsilon_range
    epsilon_range = epsilon_range*(max_distance**2)

    # build filtration with fill_rips
    filtration = d.fill_rips(point_cloud, k=max_simplex_dim, r=max_distance**2)
    
    # calculate persistent homology
    persistence = d.homology_persistence(filtration)
    
    # initialize persistence diagrams
    diagrams = d.init_diagrams(persistence, filtration)
    
    ep_ran_len = len(epsilon_range)
    
    homology = {dimension: np.zeros(ep_ran_len) for dimension in range(max_simplex_dim)}

    for dimension, diagram in enumerate(diagrams):
        if dimension < max_simplex_dim and len(diagram) > 0:
            homology[dimension] = np.array([np.array(((epsilon_range >= point.birth) & (epsilon_range <= point.death)).astype(int)) 
                                        for point in diagram
                                       ]).sum(axis=0)
        
    return homology

In [5]:
def time_series_to_persistent_homology(time_series, epsilon_range, max_simplex_dim):
    '''
    Wrapper function that takes in a time series and outputs
    the persistent homology object, along with other
    auxiliary objects.
    
    Parameters: 
    - time_series: Numpy array of time series values
    - num_epsilon_steps: Either a positive integer or a list of epsilon values. If an integer, it is the number of segments the inverval [0, 1] 
    will be divided into. The method parameter will determine how the division will be set.
    - max_simplex_dim: Integer denoting the maximum dimension of simplexes to create in filtration
    
    Returns:
    '''
    
    # create point cloud from time series
    point_cloud = time_series_to_point_cloud(time_series, dimension_embed=max_simplex_dim)
    
#     # take unique points in the point cloud only
#     point_cloud = np.unique(point_cloud, axis=0)

#     # build filtration with fill_rips
#     filtration = d.fill_rips(point_cloud, max_simplex_dim, 1.0)
    
#     # calculate persistent homology
#     persistence = d.homology_persistence(filtration)
    
#     # initialize persistence diagrams
#     diagrams = d.init_diagrams(persistence, filtration)
    
#     homology = {}

#     for dimension in range(max_simplex_dim):
#         homology[dimension] = np.array([np.array(((epsilon_range >= point.birth) & (epsilon_range <= point.death)).astype(int)) 
#                                         for point in diagrams[dimension]
#                                        ]).sum(axis=0)
        
#     return homology

    homology = point_cloud_to_persistent_homology(point_cloud=point_cloud, epsilon_range=epsilon_range, max_simplex_dim=max_simplex_dim)
    return homology

In [6]:
def betti(time_series, epsilon_range, max_simplex_dim, agg=None):
    homology = time_series_to_persistent_homology(time_series=time_series,
                                                  epsilon_range=epsilon_range,
                                                  max_simplex_dim=max_simplex_dim)
    
    if agg == 'sum':
        return np.array(homology.values()).sum().sum()
    elif agg == 'mean':
#         return np.array(homology.values()).mean()
        return np.array(homology.values()).mean(axis=1).sum()
    elif agg == 'median':
        return np.median(np.array(homology.values()))
    else:
        return np.array(homology.values())

In [7]:
def time_series_rolling_betti(time_series, epsilon_range, max_simplex_dim, window, agg=None):
    '''
    Parameters:
    - time_series: pandas Series
    - epsilon_range: numpy array
    - max_simplex_dim: int
    - window: int
    - agg: string or None. If string, one of 'sum', 'mean', or 'median'
    '''
    betti_results = [betti(time_series=wdw,
                       epsilon_range=epsilon_range,
                       max_simplex_dim=max_simplex_dim,
                       agg=agg) for wdw in rolling_window(time_series, window)]
    
    return pd.concat([time_series, pd.Series(data=betti_results, index=time_series.index[window-1:], name='betti')], axis=1)

In [8]:
def simplex_conditions(row, columns, n):
    '''
    Helper function to apply conditions that single 
    out a simplex. To be used applied to a pandas DataFrame.
    The columns are assumed to be {1, 2,..., n, (1, 2), (1, 3), ...} where 
    n is the dimension of simplex being checked for. The columns 1, ..., n
    are assumed to be integer type and the columns of the form (i, j) are 
    assumed to be tuple type, and to have i < j.
    
    Parameters:
    - row: row of the DataFrame
    - columns: list of the columns of the DataFrame
    - n: dimnesion of the simplex being checked for
    
    Returns:
    - Boolean based on whether the row in question satisfies simplex conditions
    '''
    
    for col in columns:
        if type(col) == int and col < n:
            if row[col] == row[col+1]:
                return False
            
        elif type(col) == tuple:
            if row[col[1]] != row[col]:
                return False
            
    else:
        return True

In [9]:
def create_n_simplexes(one_spxs, n):
    '''
    Helper function that creates an array of the 
    n simplexes from the data frame of the 
    one simplexes
    
    Parameters:
    - one_spxs: DataFrame of one simplexes 
    - n: dimension of simplexes being created
    
    Returns:
    - list of n+1-tuples that represent n simplexes
    '''
    
    # create copy of one simplexes
    df = one_spxs
    
    # iteratively merge on one simplexes
    for combo in list(itertools.combinations(range(n+1), 2))[1:]:
        if combo[0] == 0:
            df = pd.merge(
                left=df,
                right=one_spxs.rename(columns={1: combo[1]}),
                on=0,
                how='left'
            ).dropna().drop_duplicates().astype(int)
        else:
            df = pd.merge(
                left=df,
                right=one_spxs.rename(columns={0: combo[0], 1: combo}),
                on=combo[0],
                how='left'
            ).dropna().drop_duplicates().astype(int)
    
    # extract vertexes that make up the n simplexes
    cols = df.columns

    n_simplexes = []
    if len(df) > 0:
        n_simplexes = [tuple(simplex) 
                       for simplex in df[df.apply(lambda x: simplex_conditions(x, cols, n), axis=1)][[col 
                                                                                                      for col in df.columns if type(col) == int]].values]
    
    return n_simplexes

In [10]:
def plot_two_simplexes(point_cloud, distance_matrix, epsilon, one_simplexes_color='#6DA5BA', two_simplexes_color='#E0513B'):
    one_simplicies = [(x,y) for (x,y) in zip(*np.where(distance_matrix <= epsilon)) if x < y]
    two_simplicies = create_n_simplexes(one_spxs=pd.DataFrame(one_simplicies), n=2)

    lines = [(point_cloud[x], point_cloud[y]) for (x, y) in one_simplicies]

    fig, ax = plt.subplots(figsize=(7, 7))

    lc2 = mc.LineCollection(
        lines,
        colors=one_simplexes_color,
        linewidths=0.5
    );

    pd.DataFrame(point_cloud).rename(columns={0: 'x', 1: 'y'}).plot(
        kind='scatter',
        x='x',
        y='y',
        ax=ax,
        s=7,
        c=one_simplexes_color
    );

    ax.add_collection(lc2);
    ax.autoscale();

    for two_simplex in two_simplicies:
        Path = mpath.Path
        path_data = [
                (Path.MOVETO, point_cloud[two_simplex[0]]),
                (Path.LINETO, point_cloud[two_simplex[1]]),
                (Path.LINETO, point_cloud[two_simplex[2]]),
                (Path.CLOSEPOLY, point_cloud[two_simplex[0]]),
                ]
        codes, verts = zip(*path_data)
        path = mpath.Path(verts, codes)
        patch = mpatches.PathPatch(path, facecolor=two_simplexes_color, alpha=0.2);
        ax.add_patch(patch);

    plt.show();

In [11]:
def plot_two_simplexes_multi(point_cloud, distance_matrix, epsilon_list, one_simplexes_color='#6DA5BA', two_simplexes_color='#E0513B'):
    lgth = len(epsilon_list)
    
    fig, axs = plt.subplots(1,lgth, figsize=(7*lgth, 7))
    
    for idx, epsilon in enumerate(epsilon_list):
        one_simplicies = [(x,y) for (x,y) in zip(*np.where(distance_matrix <= epsilon)) if x < y]
        two_simplicies = create_n_simplexes(one_spxs=pd.DataFrame(one_simplicies), n=2)

        lines = [(point_cloud[x], point_cloud[y]) for (x, y) in one_simplicies]

    #     fig, ax = plt.subplots(figsize=(7, 7))

        lc2 = mc.LineCollection(
            lines,
            colors=one_simplexes_color,
            linewidths=0.5
        );

        pd.DataFrame(point_cloud).rename(columns={0: 'x', 1: 'y'}).plot(
            kind='scatter',
            x='x',
            y='y',
            ax=axs[idx],
            s=7,
            c=one_simplexes_color
        );
        
        axs[idx].set_title('epsilon = {EPSILON}'.format(EPSILON=epsilon))

        axs[idx].add_collection(lc2);
        axs[idx].autoscale();

        for two_simplex in two_simplicies:
            Path = mpath.Path
            path_data = [
                    (Path.MOVETO, point_cloud[two_simplex[0]]),
                    (Path.LINETO, point_cloud[two_simplex[1]]),
                    (Path.LINETO, point_cloud[two_simplex[2]]),
                    (Path.CLOSEPOLY, point_cloud[two_simplex[0]]),
                    ]
            codes, verts = zip(*path_data)
            path = mpath.Path(verts, codes)
            patch = mpatches.PathPatch(path, facecolor=two_simplexes_color, alpha=0.2);
            axs[idx].add_patch(patch);

plt.show();

In [121]:
def compute_persistent_homology_df(df, value_col, epsilon_range, max_simplex_dim, window):
    tmp = df.copy()
    
    # compute rolling betti matrix
    bt_mtx = time_series_rolling_betti(time_series=tmp[value_col],
               epsilon_range=epsilon_range,
               max_simplex_dim=max_simplex_dim,
               window=window,
               agg=None)
    
    # add betti matrix column, being careful with the index
    tmp.loc[tmp.index,'betti'] = pd.Series(data=bt_mtx['betti'].values, index=tmp.index)
    
    # drop nulls that arise from windowing
    tmp.dropna(inplace=True)
    
    tmp.reset_index(inplace=True)
    
    tmp.rename(columns={'index': 'second'}, inplace=True)
    
    # separate out dimensions of betti matrix
    for dim in range(max_simplex_dim):
        tmp['betti_'+str(dim)] = tmp['betti'].apply(lambda x: x if type(x) == float else x[dim])
    
    # create list that will be turned into data frame
    # this is the equivalent of 'exploding' (a la SQL) the lists in
    # the betti column
    rows = []
    _ = tmp.apply(lambda row: [rows.append(
        [
#         row['account_id'],
#         row['date'],
#         row['total_amount_sum']
            row['second']
    ]+
        list(tpl)) for tpl in list(zip(*( [row['betti_{DIM}'.format(DIM=dim)] for dim in range(max_simplex_dim) ] + [epsilon_range] ))) ], axis=1)
    
    df_new = pd.DataFrame(
        rows, 
        columns=['second'] + ['betti_{DIM}'.format(DIM=dim) for dim in range(max_simplex_dim)] + ['epsilon']
    )
    
    return df_new

In [12]:
def desc(df):
    display(df.head())
    print(df.shape)
    print(df.dtypes)

# Data

In [94]:
df = pd.read_csv('/Users/eric.bunch/seizure_sample_12_27.csv', encoding='utf-8')

In [95]:
df_secs = df.groupby(by=lambda x: int(x/float(2**8))).agg('mean')

In [117]:
df_secs.head()
df_secs.shape

Unnamed: 0,F7-CS2,T7-CS2,P7-CS2,FP1-CS2,F3-CS2,C3-CS2,P3-CS2,O1-CS2,FZ-CS2,CZ-CS2,PZ-CS2,FP2-CS2,F4-CS2,C4-CS2,P4-CS2,O2-CS2,F8-CS2,T8-CS2,P8-CS2,C2-CS2,C6-CS2,CP2-CS2,CP4-CS2,CP6-CS2,seizure_flag
0,-16.733822,-5.844017,-0.320513,-3.930098,-7.876984,-3.246337,-1.884921,-13.34707,-6.336996,-3.047924,-4.154457,-2.910562,-41.953602,-122.367216,-83.568376,-16.678877,-2.864774,-2.553419,-0.154151,-15.737179,-9.960317,-2.040598,-1.498779,-4.212454,0
1,8.936203,4.136142,46.228632,52.48779,-0.23199,47.762515,0.367827,3.093712,-1.106532,47.611416,53.37149,51.166056,51.080586,53.696581,47.278694,11.700244,43.925519,36.053114,38.353175,12.100122,2.284799,38.021978,-1.593407,-0.645604,0
2,2.599206,0.108364,8.008242,9.810745,4.310134,13.931624,4.409341,5.865385,5.473138,10.934066,8.815629,11.016484,14.004884,6.112637,11.851343,6.979548,9.134615,15.405983,11.837607,4.467338,5.979853,11.147741,4.253663,2.443529,0
3,12.069597,7.942613,-2.904457,0.592186,9.175824,-9.282662,13.949939,15.538767,1.935287,-0.815018,-4.723748,-2.606838,-6.10348,-3.45696,-6.671245,9.026252,0.593712,-2.931929,-3.913309,11.961233,2.351954,0.117521,-5.821123,-4.635226,0
4,1.434676,-2.96398,-9.137668,-12.156593,2.837302,-9.047619,-4.490232,-4.7558,2.693834,-8.253968,-6.259158,-12.593101,-9.194139,-11.094322,-9.604701,-4.65812,-11.831502,-12.68315,-4.90232,-5.277778,1.008852,-10.094628,-6.097375,-2.457265,0


(3600, 25)

In [118]:
df_secs['seizure_flag'].value_counts()

0    3365
1     235
Name: seizure_flag, dtype: int64

In [119]:
epsilon_range = np.array([y*.01 for y in range(100)])

In [122]:
%%time

bt_df = compute_persistent_homology_df(
    df=df_secs,
    value_col='F7-CS2',
    epsilon_range=epsilon_range,
    max_simplex_dim=3,
    window=10
)

CPU times: user 6.6 s, sys: 143 ms, total: 6.74 s
Wall time: 6.69 s


In [123]:
desc(bt_df)

Unnamed: 0,second,betti_0,betti_1,betti_2,epsilon
0,9,8.0,0.0,0.0,0.0
1,9,8.0,0.0,0.0,0.01
2,9,8.0,0.0,0.0,0.02
3,9,8.0,0.0,0.0,0.03
4,9,8.0,0.0,0.0,0.04


(359100, 5)
second       int64
betti_0    float64
betti_1    float64
betti_2    float64
epsilon    float64
dtype: object


In [128]:
bt_df.describe()

Unnamed: 0,second,betti_0,betti_1,betti_2,epsilon
count,359100.0,359100.0,359100.0,359100.0,359100.0
mean,1804.0,1.975394,0.027664,0.000482,0.495
std,1036.633812,1.923356,0.174743,0.021944,0.288661
min,9.0,1.0,0.0,0.0,0.0
25%,906.0,1.0,0.0,0.0,0.2475
50%,1804.0,1.0,0.0,0.0,0.495
75%,2702.0,2.0,0.0,0.0,0.7425
max,3599.0,8.0,3.0,1.0,0.99


In [124]:
%%time

homology_cnn_features = bt_df.groupby(['second'], as_index=True).apply(
    lambda x: x.set_index('epsilon').drop(['second'], axis=1).T
).reset_index().rename(columns={'level_1': 'betti'})

CPU times: user 3.58 s, sys: 24.7 ms, total: 3.6 s
Wall time: 3.6 s


In [125]:
homology_cnn_features.head()
homology_cnn_features.shape

epsilon,second,betti,0.0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99
0,9,betti_0,8.0,8.0,8.0,8.0,8.0,7.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,9,betti_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,9,betti_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10,betti_0,8.0,8.0,8.0,8.0,8.0,7.0,7.0,7.0,7.0,7.0,7.0,6.0,6.0,6.0,6.0,6.0,6.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,10,betti_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


(10773, 102)

In [129]:
def compute_features_from_ts(df, value_col, epsilon_range, max_simplex_dim, window):
    df_new = compute_persistent_homology_df(
        df=df,
        value_col=value_col,
        epsilon_range=epsilon_range,
        max_simplex_dim=3,
        window=10
    )
    
    features = bt_df.groupby(['second'], as_index=True).apply(
        lambda x: x.set_index('epsilon').drop(['second'], axis=1).T
    ).reset_index().rename(columns={'level_1': 'betti'})
    
    features['channel'] = value_col
    
    return features

In [133]:
features_dict = {
    channel: compute_features_from_ts(
        df=df_secs,
        value_col=channel,
        epsilon_range=epsilon_range,
        max_simplex_dim=3,
        window=10
    )
    for channel in df_secs.columns if channel not in ['-', 'seizure_flag']
}

In [136]:
seiz_betti = pd.concat(features_dict.values(), ignore_index=True)

In [137]:
seiz_betti.head()
seiz_betti.shape

epsilon,second,betti,0.0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,channel
0,9,betti_0,8.0,8.0,8.0,8.0,8.0,7.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,F8-CS2
1,9,betti_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F8-CS2
2,9,betti_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F8-CS2
3,10,betti_0,8.0,8.0,8.0,8.0,8.0,7.0,7.0,7.0,7.0,7.0,7.0,6.0,6.0,6.0,6.0,6.0,6.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,F8-CS2
4,10,betti_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F8-CS2


(258552, 103)