In [1]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np

In [2]:
## umap csv
df = pd.read_csv('umap_appended.csv')

## Split into dataframes based on labels (umap)
df_umap_1 = df.loc[df['labels'] == 0]
df_umap_2 = df.loc[df['labels'] == 1]
df_umap_3 = df.loc[df['labels'] == 2]
df_umap_1_describe = df_umap_1.describe()
df_umap_2_describe = df_umap_2.describe()
df_umap_3_describe = df_umap_3.describe()

In [4]:
df_umap_1.shape

(7172, 344)

In [6]:
subsetDataFrame = df_umap_1[df_umap_1['respiratory_rate2'] > 100]
subsetDataFrame.head()
subsetDataFrame['respiratory_rate2']

5818    1177788.5
Name: respiratory_rate2, dtype: float64

In [18]:
df_umap_1_describe['respiratory_rate2']

count    7.172000e+03
mean     1.830226e+02
std      1.390722e+04
min      0.000000e+00
25%      1.466667e+01
50%      1.800000e+01
75%      2.200000e+01
max      1.177788e+06
Name: respiratory_rate2, dtype: float64

In [9]:
## sampling function for each label
def sample_plot(df_label, df_describe, n, features, y_label):
    sample = df_label.sample(n)
    new_df = sample.loc[:, sample.columns.to_series().str.contains(features)]
    transpose  = new_df.transpose()
    transpose.columns = ['sample'+'_{}'.format(i) for i in range(len(transpose.columns))]
    ## mean median
    mean_df = df_describe.loc[['mean'], df_describe.columns.to_series().str.contains(features)]
    median_df = df_describe.loc[['50%'], df_describe.columns.to_series().str.contains(features)]
    mean_Transpose = mean_df.transpose()
    median_Transpose = median_df.transpose()
    ## merge the two dataframes
    mean_median_Transpose = pd.merge(mean_Transpose, median_Transpose, left_index=True, right_index=True)
    mean_median_Transpose.columns = ['mean', 'median']

    fig = go.Figure()
    for i in range(len(transpose.columns)):
        fig.add_trace(go.Scatter(x=transpose.index, y=transpose.iloc[:,i], name=transpose.columns[i], line = dict(width=2, dash='dash')))
    fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['mean'], name='mean', line=dict(color='firebrick', width=2)))
    fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['median'], name='median', line=dict(color='royalblue', width=2)))
    fig.update_layout(xaxis_title='Time', yaxis_title=y_label)
    fig.show()
        

In [11]:
sample_plot(df_umap_2, df_umap_2_describe, 5, 'heart_rate', 'heart_rate')

In [8]:
def find_mean_median(df, features):
    mean_df = df.loc[['mean'], df.columns.to_series().str.contains(features)]
    median_df = df.loc[['50%'], df.columns.to_series().str.contains(features)]
    mean_Transpose = mean_df.transpose()
    median_Transpose = median_df.transpose()
    mean_median_Transpose = pd.merge(mean_Transpose, median_Transpose, left_index=True, right_index=True)
    mean_median_Transpose.columns = ['mean', 'median']
    return mean_median_Transpose

def plot_feature_label(features, label1, label2, label3, y_label, stat='mean'):
    mean_median_1 = find_mean_median(label1, features)
    mean_median_2 = find_mean_median(label2, features)
    mean_median_3 = find_mean_median(label3, features)
    ## plot mean for different labels
    if stat == 'mean':
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=mean_median_1.index, y=mean_median_1['mean'], name = 'label 1' , line=dict(color='firebrick', width=2)))
        fig.add_trace(go.Scatter(x=mean_median_2.index, y=mean_median_2['mean'], name = 'label 2' ,line=dict(color='royalblue', width=2)))
        fig.add_trace(go.Scatter(x=mean_median_3.index, y=mean_median_3['mean'], name = 'label 3' ,line=dict(color='green', width=2)))
        fig.update_layout(xaxis_title='Time', yaxis_title=y_label, title=y_label+'_mean')
        fig.show()
    ## plot median for different labels
    elif stat == 'median':
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=mean_median_1.index, y=mean_median_1['median'], name = 'label 1' ,line=dict(color='firebrick', width=2)))
        fig.add_trace(go.Scatter(x=mean_median_2.index, y=mean_median_2['median'], name = 'label 2' ,line=dict(color='royalblue', width=2)))
        fig.add_trace(go.Scatter(x=mean_median_3.index, y=mean_median_3['median'], name = 'label 3' ,line=dict(color='green', width=2)))
        fig.update_layout(xaxis_title='Time', yaxis_title=y_label, title=y_label+'_median')
        fig.show()


In [9]:
plot_feature_label('respiratory_rate', df_umap_1_describe, df_umap_2_describe, df_umap_3_describe, 'respiratory_rate', 'mean')

In [14]:
plot_feature_label('heart_rate', 'label_1', 'label_2', 'label_3', 'heart_rate', 'median')

In [3]:
## Label analysis function
def plot_features(features, df, y_label):
    mean_df = df.loc[['mean'], df.columns.to_series().str.contains(features)]
    median_df = df.loc[['50%'], df.columns.to_series().str.contains(features)]
    mean_Transpose = mean_df.transpose()
    median_Transpose = median_df.transpose()
    ## merge the two dataframes
    mean_median_Transpose = pd.merge(mean_Transpose, median_Transpose, left_index=True, right_index=True)
    mean_median_Transpose.columns = ['mean', 'median']
    ## plot the dataframe
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['mean'], name='mean', mode='lines'))
    fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['median'], name='median', mode='lines'))
    fig.update_layout(xaxis_title='Time', yaxis_title=y_label)
    return fig

## randomly sample 10 points from each label and plot against the mean and median for each feature
def random_sample(features, df, y_label):
    mean_df = df.loc[['mean'], df.columns.to_series().str.contains(features)]
    median_df = df.loc[['50%'], df.columns.to_series().str.contains(features)]
    mean_Transpose = mean_df.transpose()
    median_Transpose = median_df.transpose()
    ## merge the two dataframes
    mean_median_Transpose = pd.merge(mean_Transpose, median_Transpose, left_index=True, right_index=True)
    mean_median_Transpose.columns = ['mean', 'median']
    ## randomly sample 10 points from each label and plot against the mean and median for each feature
    

In [4]:
sample_2 = df_umap_2.sample(n=5)
new_df = sample_2.loc[:, sample_2.columns.to_series().str.contains('heart_rate')]
new_df.head()

Unnamed: 0,heart_rate0,heart_rate1,heart_rate2,heart_rate3,heart_rate4,heart_rate5,heart_rate6,heart_rate7,heart_rate8,heart_rate9,...,heart_rate38,heart_rate39,heart_rate40,heart_rate41,heart_rate42,heart_rate43,heart_rate44,heart_rate45,heart_rate46,heart_rate47
7974,78.666667,78.666667,78.666667,78.666667,64.75,62.5,63.5,63.5,59.0,59.0,...,68.0,71.0,73.0,73.0,71.0,93.0,98.0,100.0,104.0,87.0
1988,78.0,78.0,78.0,78.0,78.0,78.0,78.0,78.0,78.0,81.142857,...,84.0,81.0,82.0,82.0,82.0,89.0,83.0,91.0,79.0,80.0
2304,80.25,80.25,80.25,80.25,77.0,80.0,80.0,80.0,80.5,81.2,...,123.0,109.666667,115.5,113.2,112.5,121.0,112.0,124.0,111.0,101.0
6511,60.333333,60.333333,60.333333,60.333333,72.0,68.0,70.0,71.0,78.0,82.0,...,72.0,74.0,78.0,83.0,88.0,97.0,82.0,86.0,92.0,94.0
7077,80.0,80.0,87.833333,87.5,90.0,79.5,67.5,90.0,77.0,74.0,...,85.0,81.0,81.0,86.0,80.0,79.0,93.0,78.0,75.0,74.0


In [5]:
transpose  = new_df.transpose()
transpose.head()
## UPDATE COLUMNS NAMES with for loop
transpose.columns = ['sample'+'_{}'.format(i) for i in range(len(transpose.columns))]
transpose.head()


Unnamed: 0,sample_0,sample_1,sample_2,sample_3,sample_4
heart_rate0,78.666667,78.0,80.25,60.333333,80.0
heart_rate1,78.666667,78.0,80.25,60.333333,80.0
heart_rate2,78.666667,78.0,80.25,60.333333,87.833333
heart_rate3,78.666667,78.0,80.25,60.333333,87.5
heart_rate4,64.75,78.0,77.0,72.0,90.0


In [6]:
mean_df = df_umap_2_describe.loc[['mean'], df_umap_2_describe.columns.to_series().str.contains('heart_rate')]
median_df = df_umap_2_describe.loc[['50%'], df_umap_2_describe.columns.to_series().str.contains('heart_rate')]
mean_Transpose = mean_df.transpose()
median_Transpose = median_df.transpose()
## merge the two dataframes
mean_median_Transpose = pd.merge(mean_Transpose, median_Transpose, left_index=True, right_index=True)
mean_median_Transpose.columns = ['mean', 'median']

In [7]:
## plot the dataframe
fig = go.Figure()
for i in range(len(transpose.columns)):
    fig.add_trace(go.Scatter(x=transpose.index, y=transpose.iloc[:,i], name=transpose.columns[i], line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['mean'], name='mean', line=dict(color='firebrick', width=2)))
fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['median'], name='median', line=dict(color='royalblue', width=2)))
fig.update_layout(xaxis_title='Time', yaxis_title='Heart Rate')
fig.show()

In [8]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['mean'], name='mean', mode='lines'))
fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['median'], name='median', mode='lines'))
fig.update_layout(xaxis_title='Time', yaxis_title='Heart Rate')
fig.show()

In [None]:
## Label analysis function
def plot_features(features, df, y_label):
    mean_df = df.loc[['mean'], df.columns.to_series().str.contains(features)]
    median_df = df.loc[['50%'], df.columns.to_series().str.contains(features)]
    mean_Transpose = mean_df.transpose()
    median_Transpose = median_df.transpose()
    ## merge the two dataframes
    mean_median_Transpose = pd.merge(mean_Transpose, median_Transpose, left_index=True, right_index=True)
    mean_median_Transpose.columns = ['mean', 'median']
    ## plot the dataframe
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['mean'], name='mean', mode='lines'))
    fig.add_trace(go.Scatter(x=mean_median_Transpose.index, y=mean_median_Transpose['median'], name='median', mode='lines'))
    fig.update_layout(xaxis_title='Time', yaxis_title=y_label)
    return fig