In [None]:
import numpy as np
import os
import pandas as pd
import plotly.io as pio

data_dir = "" # directory with dataset file

# Specify the path to the pickle file you want to read
file_path = "HRV_ACC_RR_timeseries_24hour_clean_25percent_classes_090124.pkl"  # Replace with the actual file path

data = pd.read_pickle(os.path.join(data_dir,file_path))

data_all = pd.DataFrame(data).transpose()
data_all = data_all.reset_index()

print("Dataset includes " + str(len(data_all)) + " data points")
print("from " + str(len(data_all.Participant_ID.unique())) + " unique participants")
print("of which " + str(len(data_all[data_all.depression==1].Participant_ID.unique())) + " have a depression diagnosis")

## Count of data entries for each participant

This section generates a bar plot to show the count of data entries for each participant. The bars are color-coded based on the 'depression' status, while a horizontal line represents the median count of entries.


In [None]:
# visualize instogram with the number of labels for each participant
import plotly.express as px

# Grouping the DataFrame by 'Participant_ID' and counting the occurrences
grouped_data = data_all.groupby('Participant_ID').size().reset_index(name='Count')

# Merge Depression values with grouped data
depression_values = data_all.groupby('Participant_ID')['depression'].first().reset_index()
depression_values.loc[depression_values['depression']==1,'group_label'] = 'Depressed patient'
depression_values.loc[depression_values['depression']==0,'group_label'] = 'Healthy control'

grouped_data = grouped_data.merge(depression_values, on='Participant_ID')

# Calculate the average count of entries
median_count = grouped_data['Count'].median()

# Plotting using Plotly
# Plotting using Plotly with specified colors
fig = px.bar(grouped_data, x='Participant_ID', y='Count', color='group_label', 
             title='Data Entry Count per Participant (Color by Depression), Total Entries: ' + str(len(data_all)),
             labels={'color': 'Depression (1/0)'},
             color_discrete_map={'Depressed patient': 'darkslategrey', 'Healthy control': 'lightseagreen'})  # Assign colors for 1 (Depressed) and 0 (Not depressed)
fig.add_hline(y=median_count, line_dash="dot", annotation_text=f'Median: {median_count:.2f}', annotation_position='bottom right')
fig.update_layout(xaxis_title='Participant', yaxis_title='Data Entry Count',xaxis = dict(tickmode = 'array',
        tickangle=45,
        tickvals= grouped_data['Participant_ID'],
        ticktext = ['P001','P002','P003','P004','P005','P006','P007','H001','P008','H002','H003','P009','P010','H004','P011','P012','P013','H005','H006','H007',
        'H008','H009','H010','P014','P015','P016']
    ))


# Display the plot
fig.show()

## Plot the average HR for each participant

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

def calculate_mean_curve(data):
    filtered_data = data[data.apply(len) == 288]
    return np.nanmean(np.vstack(filtered_data), axis=0)

def moving_average(data, window_size):
    cumsum = np.cumsum(data)
    return (cumsum[window_size:] - cumsum[:-window_size]) / window_size

unique_participants = data_all['Participant_ID'].unique()

for participant_id in unique_participants:
    participant_data = data_all[data_all['Participant_ID'] == participant_id]
    mean_curve = calculate_mean_curve(participant_data['step_count'])
    depression_status = participant_data['depression'].iloc[0]  # Assuming depression status is consistent for each participant
    
    color = 'purple' if depression_status == 1 else 'green'
    
    smoothed_curve = moving_average(mean_curve, 12)  # Applying moving average with window size 5
    
    fig.add_trace(go.Scatter(y=smoothed_curve, mode='lines', name=f'Participant {participant_id}', line=dict(color=color)))

fig.update_layout(
    title='Smoothed Average Heart Rate Curves for Participants by Depression Status',
    xaxis_title='Time',
    yaxis_title='Heart Rate'
)

fig.show()

## Plot valence level distribution 

### Depressed vs Healthy

In [None]:
# Create histogram plots using Plotly Express with 'barmode' set to 'group'
fig = px.histogram(data_all, x="valence_level", color="depression", barmode='stack',
                   title="Distribution of Valence Level by Depression Status",
                   labels={'color': 'Depression (1/0)'},
                   color_discrete_map={1: 'purple', 0: 'green'},
                   nbins=12)

fig.update_layout(bargap=0.1)  # To adjust the gap between bars

fig.show()

from plotly.subplots import make_subplots

# Get unique participants
unique_participants = data_all['Participant_ID'].unique()

# Create a subplot grid
num_rows = 6  # Number of rows in the grid
num_cols = 4  # Number of columns in the grid

fig = make_subplots(rows=num_rows, cols=num_cols,subplot_titles=unique_participants)

row, col = 1, 1  # Initialize starting position

for participant_id in unique_participants:
    # Filter data for the participant
    participant_data = data_all[data_all['Participant_ID'] == participant_id]

    # Set the color based on depression status
    color = 'purple' if participant_data['depression'].iloc[0] == 1 else 'green'

    # Create a histogram for valence level with color based on depression status
    fig.add_trace(go.Histogram(x=participant_data['valence_level'], name=participant_id, marker_color=color), row=row, col=col)

    # Update position in the subplot grid
    col += 1
    if col > num_cols:
        col = 1
        row += 1

# Update layout
fig.update_layout(height=900, width=1200, title='Valence Level Histograms for Each Participant')
fig.update_xaxes(title_text='Valence Level')
fig.update_yaxes(title_text='Frequency')

fig.show()

In [None]:
# libraries & dataset
pio.templates.default = "plotly_white"
import plotly.express as px
fig = px.histogram(data_all, x="kss", color="depression", color_discrete_map = {0:'lightseagreen',1:'darkslategrey'},
labels={0:'healthy controls',1:'depression patients'}, barmode="overlay",opacity=.7,range_y=[0,160])
newnames = {'1': 'Depressed patient', '0': 'Healthy control'}
fig.update_layout(
    height=500,
    width=450,
    autosize=False,legend_title_text='',xaxis_title='Arousal', yaxis_title='Number of assessments',font=dict(size=14))
fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
fig.show()

### Morning vs Afternoon vs Evening

In [None]:
data_depressed = data_all[data_all.depression==1]
# Create histogram plots using Plotly Express with 'barmode' set to 'group'
fig = px.histogram(data_depressed, x="valence_level", color="quest_type", barmode='group',
                   title="Distribution of Valence Level by Period of the Day",
                   nbins=12, histnorm="percent")

fig.update_layout(bargap=0.1)  # To adjust the gap between bars

fig.show()

## Plot arousal level distribution 

In [None]:
# Create histogram plots using Plotly Express with 'barmode' set to 'group'
fig = px.histogram(data_all, x="arousal_level", color="depression", barmode='stack',
                   title="Distribution of Arousal Level by Depression Status",
                   labels={'color': 'Depression (1/0)'},
                   color_discrete_map={1: 'purple', 0: 'green'},
                   nbins=12)

fig.update_layout(bargap=0.1)  # To adjust the gap between bars

fig.show()

# Get unique participants
unique_participants = data_all['Participant_ID'].unique()

# Create a subplot grid
num_rows = 6  # Number of rows in the grid
num_cols = 4  # Number of columns in the grid

fig = make_subplots(rows=num_rows, cols=num_cols,subplot_titles=unique_participants)

row, col = 1, 1  # Initialize starting position

for participant_id in unique_participants:
    # Filter data for the participant
    participant_data = data_all[data_all['Participant_ID'] == participant_id]

    # Set the color based on depression status
    color = 'purple' if participant_data['depression'].iloc[0] == 1 else 'green'

    # Create a histogram for valence level with color based on depression status
    fig.add_trace(go.Histogram(x=participant_data['arousal_level'], name=participant_id, marker_color=color), row=row, col=col)

    # Update position in the subplot grid
    col += 1
    if col > num_cols:
        col = 1
        row += 1

# Update layout
fig.update_layout(height=900, width=1200, title='Arousal Level Histograms for Each Participant')
fig.update_xaxes(title_text='Arousal Level')
fig.update_yaxes(title_text='Frequency')

fig.show()

### Morning vs Afternoon vs Evening

In [None]:
data_depressed = data_all[data_all.depression==1]
# Create histogram plots using Plotly Express with 'barmode' set to 'group'
fig = px.histogram(data_depressed, x="arousal_level", color="quest_type", barmode='group',
                   title="Distribution of Arousal Level by Period of the Day",
                   nbins=12, histnorm="percent")

fig.update_layout(bargap=0.1)  # To adjust the gap between bars

fig.show()

## Plot MOOD 1 level distribution 

In [None]:
# Create histogram plots using Plotly Express with 'barmode' set to 'group'
fig = px.histogram(data_all, x="mood1", color="depression", barmode='stack',
                   title="Distribution of Mood 1 Level by Depression Status",
                   labels={'color': 'Depression (1/0)'},
                   color_discrete_map={1: 'purple', 0: 'green'},
                   nbins=12)

fig.update_layout(bargap=0.1)  # To adjust the gap between bars

fig.show()

# Get unique participants
unique_participants = data_all['Participant_ID'].unique()

# Create a subplot grid
num_rows = 6  # Number of rows in the grid
num_cols = 4  # Number of columns in the grid

fig = make_subplots(rows=num_rows, cols=num_cols,subplot_titles=unique_participants)

row, col = 1, 1  # Initialize starting position

for participant_id in unique_participants:
    # Filter data for the participant
    participant_data = data_all[data_all['Participant_ID'] == participant_id]

    # Set the color based on depression status
    color = 'purple' if participant_data['depression'].iloc[0] == 1 else 'green'

    # Create a histogram for valence level with color based on depression status
    fig.add_trace(go.Histogram(x=participant_data['mood1'], name=participant_id, marker_color=color), row=row, col=col)

    # Update position in the subplot grid
    col += 1
    if col > num_cols:
        col = 1
        row += 1

# Update layout
fig.update_layout(height=900, width=1200, title='Mood1 Level Histograms for Each Participant')
fig.update_xaxes(title_text='Mood 1 Level')
fig.update_yaxes(title_text='Frequency')

fig.show()