In [27]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np


In [28]:
from pathlib import Path
paths = ['respondent_1', 'respondent_2', 'respondent_3', 'respondent_4']
file_paths = {}
for name in paths:
    folder = Path(name)
    files = [name + '/' + f.name for f in folder.iterdir() if f.is_file()]
    for i, file in enumerate(sorted(files, key=lambda x: x[-24:-5])):
        timestamp = file[-24:-5]
        if name == 'Амина' and pd.to_datetime(timestamp, format='%Y_%m_%d_%H_%M_%S') == pd.to_datetime('2025-04-21 20:20:49'):
            continue
        file_paths[(name, i + 1, timestamp)] = file

# file_paths

In [29]:
def calculate_fatigue_score(num, answer):
    """
    Функция принимает DataFrame с ответами на вопросы опросника и вычисляет итоговый балл.

    Вопросы 1, 2, 5-16: "Да" = 2 балла, "Нет" = 0 баллов.
    Вопросы 3, 17, 18: "Нет" = 2 балла, "Да" = 0 баллов.

    :param df: pandas DataFrame, где строки - респонденты, столбцы - ответы ("Да" или "Нет").
    :return: pandas Series с итоговым баллом каждого респондента.
    """
    # Определяем номера вопросов для разных систем начисления баллов
    positive_scoring_questions = {1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
    negative_scoring_questions = {3, 17, 18}

    # Функция для подсчёта баллов для одного респондента
    if answer == "Затрудняюсь":
        return 1
    score = 0
    if num in positive_scoring_questions:
        score = 2 if answer == "Да" else 0
    elif num in negative_scoring_questions:
        score = 2 if answer == "Нет" else 0
    return score



In [32]:
import pandas as pd
import json

def transform_df(name: str, df: pd.DataFrame):
    df["id"] = name
    df['datetime'] = pd.to_datetime(date, format='%Y_%m_%d_%H_%M_%S')
    # Извлекаем день недели в новый столбец
    df['weekday'] = df['datetime'].dt.day_name()
    df['weekday_num'] = df['datetime'].dt.dayofweek
    df["hour"] = df['datetime'].dt.hour
    # добавляем логику что пользовтели могли засиживаться до 5 утра и это часть одного дня
    df["hour"] = df["hour"].apply(lambda h: h + 24 if h < 5 else h)

    df['date'] = df['datetime'].dt.date
    df['date'] =  df[['datetime', 'hour', 'date']].apply(lambda row: (row['datetime'] - pd.Timedelta(days=1)).date() if row['hour'] > 23 else row['date'], axis=1)
    return df


# Initialize empty DataFrames
answersLog_df = pd.DataFrame()
testAnswersLog_df = pd.DataFrame()
arr = []
# Read and concatenate data from all files
for ((name, num, date), file_path) in file_paths.items():
    # print(name, date)
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    arr.append(data.items())
    # Convert JSON lists to Pandas DataFrames and add filename as a column
    if "answersLog" in data:
        temp_df = transform_df(name, pd.DataFrame(data["answersLog"]))
        answersLog_df = pd.concat([answersLog_df, temp_df], ignore_index=True)

    if "testAnswersLog" in data:
        temp_df = transform_df(name, pd.DataFrame(data["testAnswersLog"]))

        temp_df["score"] = temp_df.apply(lambda x: calculate_fatigue_score(x['num'], x['answer']), axis=1)
        # display(temp_df)
        testAnswersLog_df = pd.concat([testAnswersLog_df, temp_df], ignore_index=True)
display(testAnswersLog_df.head())
answersLog_df.head()

Unnamed: 0,num,question,answer,id,datetime,weekday,weekday_num,hour,date,score
0,1,Чувствую общую слабость,Нет,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,0
1,2,Мне приходится заставлять себя как можно быстр...,Да,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,2
2,3,Я спокоен и собран,Затрудняюсь,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,1
3,4,Мне душно,Нет,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,0
4,5,Хочется хоть немного отвлечься от работы,Да,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,2


Unnamed: 0,q_num,startTime,endTime,num1,num2,formattedNum1,formattedNum2,formatType,operation,question,answer,correct,correctFlg,id,datetime,weekday,weekday_num,hour,date
0,0,1741590576813,1741590580346,6,5,шесть,пять,2,+,(шесть + пять) % 10 = ?,1,1,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10
1,1,1741590580346,1741590582990,5,2,5,2,0,-,|5 - 2| = ?,3,3,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10
2,2,1741590582990,1741590585852,6,5,6,5,0,-,|6 - 5| = ?,1,1,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10
3,3,1741590585852,1741590590315,9,7,9,7,0,+,(9 + 7) % 10 = ?,6,6,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10
4,4,1741590590315,1741590594251,4,1,4,1,0,-,|4 - 1| = ?,3,3,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10


### Обработка опросника острого умственного утомления

Индекс умственного утомления (ИУУ)
1) ИУУ < 10 баллов - Отсутствие признаков умственного утомления
2) 10 <= ИУУ < 16 баллов - Легкая степень умственного утомления
3) 16<= ИУУ < 28 баллов - Умеренная степень умственного утомления
4) ИУУ >= 28 баллов - Сильная степень умственного утомления

In [33]:
activity_df = testAnswersLog_df[testAnswersLog_df['num'] == 19][['id', 'datetime', 'answer']]
activity_df.columns = ['id', 'datetime', 'activity_type']
activity_df['cog_load_flg'] = activity_df['activity_type'].str.startswith('умственная работа').astype(int)

self_capacity_df = testAnswersLog_df[testAnswersLog_df['num'] == 20][['id', 'datetime', 'answer']]
self_capacity_df.columns = ['id', 'datetime', 'self_score']
self_capacity_df['self_score'] = self_capacity_df['self_score'].astype(int)
display(activity_df.head())
self_capacity_df.head()

Unnamed: 0,id,datetime,activity_type,cog_load_flg
18,respondent_1,2025-03-10 10:09:26,"отдых (не у экрана, сон)",0
38,respondent_1,2025-03-10 15:39:44,умственная работа(включая комп игры)/работа с ...,1
58,respondent_1,2025-03-10 19:27:25,умственная работа(включая комп игры)/работа с ...,1
78,respondent_1,2025-03-10 20:39:20,умственная работа(включая комп игры)/работа с ...,1
98,respondent_1,2025-03-11 08:55:58,"отдых (не у экрана, сон)",0


Unnamed: 0,id,datetime,self_score
19,respondent_1,2025-03-10 10:09:26,7
39,respondent_1,2025-03-10 15:39:44,7
59,respondent_1,2025-03-10 19:27:25,4
79,respondent_1,2025-03-10 20:39:20,2
99,respondent_1,2025-03-11 08:55:58,8


In [34]:
fatigue_test_scores = testAnswersLog_df.groupby(['id', 'datetime','date', 'weekday', 'weekday_num', 'hour']).agg({
    'score':'sum'
}).reset_index()
fatigue_test_scores = pd.merge(fatigue_test_scores, activity_df, on=['id', 'datetime'], how='left')
fatigue_test_scores = pd.merge(fatigue_test_scores, self_capacity_df, on=['id', 'datetime'], how='left')
fatigue_test_scores['fatigue_lvl'] = fatigue_test_scores['score'].apply(lambda x: 4 if x >= 28 else 3 if x >= 16 else 2 if x >= 10 else 1)

fatigue_test_scores.head()

Unnamed: 0,id,datetime,date,weekday,weekday_num,hour,score,activity_type,cog_load_flg,self_score,fatigue_lvl
0,respondent_1,2025-03-10 10:09:26,2025-03-10,Monday,0,10,15,"отдых (не у экрана, сон)",0.0,7.0,2
1,respondent_1,2025-03-10 15:39:44,2025-03-10,Monday,0,15,9,умственная работа(включая комп игры)/работа с ...,1.0,7.0,1
2,respondent_1,2025-03-10 19:27:25,2025-03-10,Monday,0,19,25,умственная работа(включая комп игры)/работа с ...,1.0,4.0,3
3,respondent_1,2025-03-10 20:39:20,2025-03-10,Monday,0,20,30,умственная работа(включая комп игры)/работа с ...,1.0,2.0,4
4,respondent_1,2025-03-11 08:55:58,2025-03-11,Tuesday,1,8,6,"отдых (не у экрана, сон)",0.0,8.0,1


In [35]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
for i, (name, group) in enumerate(fatigue_test_scores.groupby('id')):

# Создаем пустой холст с двумя графиками по вертикали
    fig = make_subplots(rows=3, cols=1, shared_xaxes=True, 
                        subplot_titles=("Индекс умственного утомления", "Уровень утомления"))

    # Первый график — answer
    fig.add_trace(
        go.Scatter(x=group['datetime'], y=group['score'], name='Скор стресса'),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=group['datetime'], 
                y=group['score'], 
                    mode='markers',            
                    marker=dict(opacity=0.6),
                name='Скор стресса'
                ),
        row=1, col=1
    )

    # Второй график — stress_lvl
    fig.add_trace(
        go.Scatter(x=group['datetime'], y=group['fatigue_lvl'], name='Уровень стресса'),
        row=2, col=1
    )

    fig.add_trace(
        go.Scatter(x=group['datetime'], 
                y=group['fatigue_lvl'], 
                    mode='markers',            
                    marker=dict(opacity=0.6),
                name='Уровень стресса'),
        row=2, col=1
    )
    # Второй график — stress_lvl
    fig.add_trace(
        go.Scatter(x=group['datetime'], y=group['self_score'], name='Уровень стресса'),
        row=3, col=1
    )

    fig.add_trace(
        go.Scatter(x=group['datetime'], 
                y=group['self_score'], 
                    mode='markers',            
                    marker=dict(opacity=0.6),
                name='Уровень стресса'),
        row=3, col=1
    )

    # Общий заголовок
    fig.update_layout(height=300, title_text=f'{name}. Динамика утомления', showlegend=False)

    # Обновляем подписи осей
    fig.update_yaxes(title_text="Индекс умственного утомления", row=1, col=1)
    fig.update_yaxes(title_text="Уровень утомления", row=2, col=1)
    fig.update_yaxes(title_text="Самооценка работоспособности", row=2, col=1)
    fig.update_xaxes(title_text="Время", row=2, col=1)

    fig.show()


## Аримфметические задачи

In [36]:
df = answersLog_df
df['response_time'] = (df['endTime'] - df['startTime'])/1000
df.head()

Unnamed: 0,q_num,startTime,endTime,num1,num2,formattedNum1,formattedNum2,formatType,operation,question,answer,correct,correctFlg,id,datetime,weekday,weekday_num,hour,date,response_time
0,0,1741590576813,1741590580346,6,5,шесть,пять,2,+,(шесть + пять) % 10 = ?,1,1,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,3.533
1,1,1741590580346,1741590582990,5,2,5,2,0,-,|5 - 2| = ?,3,3,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,2.644
2,2,1741590582990,1741590585852,6,5,6,5,0,-,|6 - 5| = ?,1,1,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,2.862
3,3,1741590585852,1741590590315,9,7,9,7,0,+,(9 + 7) % 10 = ?,6,6,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,4.463
4,4,1741590590315,1741590594251,4,1,4,1,0,-,|4 - 1| = ?,3,3,1,respondent_1,2025-03-10 10:09:26,Monday,0,10,2025-03-10,3.936


In [37]:
meta_df = df.groupby(['id', 'date','weekday',  'datetime', 'hour', 'weekday_num']).agg({
    'response_time': ['mean', 'std', 'median', 'max', 'min'],
    'correctFlg': 'mean'
}).reset_index()
meta_df.columns = ['_'.join(col).strip('_') for col in meta_df.columns.values]

q_df = pd.merge(fatigue_test_scores, meta_df , on=['id', 'date', 'hour', 'datetime', 'weekday', 'weekday_num'], how='inner')
q_df.set_index(['id', 'date', 'weekday']).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,datetime,weekday_num,hour,score,activity_type,cog_load_flg,self_score,fatigue_lvl,response_time_mean,response_time_std,response_time_median,response_time_max,response_time_min,correctFlg_mean
id,date,weekday,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
respondent_1,2025-03-10,Monday,2025-03-10 10:09:26,0,10,15,"отдых (не у экрана, сон)",0.0,7.0,2,3.09848,1.193218,2.8975,6.148,1.283,0.94
respondent_1,2025-03-10,Monday,2025-03-10 15:39:44,0,15,9,умственная работа(включая комп игры)/работа с ...,1.0,7.0,1,3.04702,1.411728,2.621,7.762,0.33,0.96
respondent_1,2025-03-10,Monday,2025-03-10 19:27:25,0,19,25,умственная работа(включая комп игры)/работа с ...,1.0,4.0,3,3.1307,1.548254,2.6245,7.491,1.331,0.92
respondent_1,2025-03-10,Monday,2025-03-10 20:39:20,0,20,30,умственная работа(включая комп игры)/работа с ...,1.0,2.0,4,2.98634,1.550592,2.544,9.57,0.156,0.94
respondent_1,2025-03-11,Tuesday,2025-03-11 08:55:58,1,8,6,"отдых (не у экрана, сон)",0.0,8.0,1,3.20938,1.450811,3.1585,7.734,1.2,1.0


### Корреляции

In [39]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import dcor

ids = q_df['id'].unique()
num_ids = len(ids)

rows, cols = num_ids, 3

numeric_df = q_df.select_dtypes(include=[np.number])
numeric_columns = numeric_df.columns
selected_rows = ['fatigue_lvl', 'score',  'weekday_num', 'hour', 'cog_load_flg']
fig = make_subplots(
    rows=rows,
    cols=cols,
    subplot_titles=[f"ID {id} - {method}" for id in ids for method in ['Pearson', 'Spearman', 'Kendall']],
    horizontal_spacing=0.1,
    vertical_spacing=0.05
)
for i, (name, sub_df) in enumerate(q_df.groupby('id')):
    # Calculate different correlation methods
    pearson_corr = sub_df[numeric_columns].corr(method='pearson')
    spearman_corr = sub_df[numeric_columns].corr(method='spearman')
    kendall_corr = sub_df[numeric_columns].corr(method='kendall')

    for j, (corr, method) in enumerate([
        (pearson_corr, 'Pearson'),
        (spearman_corr, 'Spearman'),
        (kendall_corr, 'Kendall'),
        # (dcor_corr, 'DCOR')
    ]):
        # Create a copy of the correlation matrix
        corr_values = corr.values.copy()
        np.fill_diagonal(corr_values, None)
        
        # Select only the rows we want to keep
        corr_values = corr_values[[list(corr.index).index(row) for row in selected_rows]]
        corr_index = selected_rows
        
        heatmap = go.Heatmap(
            z=corr_values,
            x=corr.columns,
            y=corr_index,
            colorscale='RdBu',
            zmid=0,
            colorbar=dict(title="r", len=0.3),
            showscale=(i == num_ids - 1 and j == 2)  # show colorbar only on the last plot
        )
        
        # Add heatmap to the appropriate subplot
        fig.add_trace(heatmap, row=i+1, col=j+1)

# Update layout
fig.update_layout(
    height=300 * num_ids,  # Adjust height based on number of respondents
    width=900,
    title_text="Correlation Matrices by Respondent and Method",
    font=dict(size=10),
    margin=dict(l=50, r=80, t=100, b=50)
)

# Rotate x-axis labels
for i in range(1, rows * cols + 1):
    fig['layout'][f'xaxis{i}'].update(tickangle=45)

fig.show()

In [43]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import dcor

# Get unique IDs
ids = q_df['id'].unique()
num_ids = len(ids)

# Parameters for subplot grid
rows, cols = 1, 4

# Select only numeric columns for correlation
# Select only numeric columns for correlation
numeric_df = q_df.select_dtypes(include=[np.number])
numeric_columns = numeric_df.columns

# Define the rows we want to keep
selected_rows = ['fatigue_lvl', 'score',  'weekday_num', 'hour', 'cog_load_flg']

# Create figure with subplots
fig = make_subplots(
    rows=rows,
    cols=cols,
    subplot_titles=[f"ID {id} - {method}" for id in ids for method in ['Pearson', 'Spearman', 'Kendall']],
    horizontal_spacing=0.1,
    vertical_spacing=0.05
)

# Calculate correlations for each respondent
for i, (name, sub_df) in enumerate(q_df.groupby('id')):
    # Calculate different correlation methods
    pearson_corr = sub_df[numeric_columns].corr(method='pearson')
        # Create a copy of the correlation matrix
    corr_values = pearson_corr.values.copy()
    np.fill_diagonal(corr_values, None)
    
    # Select only the rows we want to keep
    corr_values = corr_values[[list(corr.index).index(row) for row in selected_rows]]
    corr_index = selected_rows
    
    heatmap = go.Heatmap(
        z=corr_values,
        x=corr.columns,
        y=corr_index,
        colorscale='RdBu',
        zmid=0,
        colorbar=dict(title="r", len=0.3),
        showscale=(i == num_ids - 1 and j == 2)  # show colorbar only on the last plot
    )
    
    # Add heatmap to the appropriate subplot
    fig.add_trace(heatmap, row=1, col=i+1)

# Update layout
fig.update_layout(
    height=300,  # Adjust height based on number of respondents
    width=900,
    title_text="Correlation Matrices by Respondent and Method",
    font=dict(size=10),
    margin=dict(l=50, r=80, t=100, b=50)
)

# Rotate x-axis labels
for i in range(1, rows * cols + 1):
    fig['layout'][f'xaxis{i}'].update(tickangle=45)

fig.show()

### Все метрики и утомление

In [47]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression
import numpy as np
from scipy import stats

# Get unique IDs for coloring
ids = q_df['id'].unique()
num_ids = len(ids)

# Select only numeric columns for correlation
numeric_columns =numeric_columns = ['weekday_num', 'hour', "cog_load_flg", 'score', 
                  'fatigue_lvl', 'response_time_mean', 'response_time_median', 'response_time_std', 
                  'response_time_max', 'response_time_min', 'correctFlg_mean']

# Create a color palette for respondents
colors = [
    '#FF0000',  # Bright Red
    '#00FF00',  # Bright Green
    '#0000FF',  # Bright Blue
    '#FF00FF',  # Magenta
    '#00FFFF',  # Cyan
    '#FFFF00',  # Yellow
    '#FFA500',  # Orange
    '#800080',  # Purple
    '#008000',  # Green
    '#000080',  # Navy
    '#800000',  # Maroon
    '#008080'   # Teal
]

# Calculate number of rows and columns for subplots
metrics = [col for col in numeric_columns if col != 'score']
n_metrics = len(metrics)
cols = 3
rows = (n_metrics + 2) // cols

# Create figure with subplots
fig = make_subplots(
    rows=rows,
    cols=cols,
    subplot_titles=[f"{metric} vs Score" for metric in metrics],
    horizontal_spacing=0.1,
    vertical_spacing=0.1
)

# Create scatter plots for each metric
for i, metric in enumerate(metrics):
    row = i // cols + 1
    col = i % cols + 1
    
    # Add scatter plot for each respondent
    for j, respondent_id in enumerate(ids):
        # print(metric, respondent_id)

        respondent_data = q_df[q_df['id'] == respondent_id].sort_values('datetime')

        if metric == 'cog_load_flg':
            respondent_data = respondent_data[respondent_data['datetime'] > pd.to_datetime('2025-02-25 00:00:00')]

        # Calculate correlations
        pearson_corr = respondent_data[metric].corr(respondent_data['score'], method='pearson')
        spearman_corr = respondent_data[metric].corr(respondent_data['score'], method='spearman')
        kendall_corr = respondent_data[metric].corr(respondent_data['score'], method='kendall')
        dcor_corr = dcor.distance_correlation(
                    respondent_data[metric].dropna().astype(float),
                    respondent_data['score'].dropna().astype(float))
        scatter = go.Scatter(
            x=respondent_data[metric],
            y=respondent_data['score'],
            mode='markers',
            name=f'ID: {respondent_id}',
            marker=dict(
                color=colors[j % len(colors)],
                size=8,
                opacity=0.8
            ),
            showlegend=(i == 0),
            legendgroup=f'{respondent_id}',
            hovertemplate=(
                f"ID: {respondent_id}<br>" +
                "DateTime: %{customdata}<br>" +
                f"{metric}: %{{x:.2f}}<br>" +
                "Score: %{y:.2f}<br>" +
                f"Pearson r: {pearson_corr:.3f}<br>" +
                f"Spearman r: {spearman_corr:.3f}<br>" +
                f"Kendall τ: {kendall_corr:.3f}<br>" +
                f"DCOR: {dcor_corr:.3f}"
            ),
            customdata=respondent_data['datetime']
        )
        fig.add_trace(scatter, row=row, col=col)

        # Add trendline
        x_vals = respondent_data[metric].values.reshape(-1, 1)
        y_vals = respondent_data['score'].values

        if len(x_vals) > 1:
            model = LinearRegression().fit(x_vals, y_vals)
            x_range = np.linspace(x_vals.min(), x_vals.max(), 100).reshape(-1, 1)
            y_pred = model.predict(x_range)

            # Create annotation text with correlation values
            annotation_text = (
                f"ID: {respondent_id}<br>" +
                f"Pearson r: {pearson_corr:.3f}<br>" +
                f"Spearman r: {spearman_corr:.3f}<br>" +
                f"Kendall τ: {kendall_corr:.3f}<br>" +
                f"DCOR: {dcor_corr:.3f}"
            )

            fig.add_trace(
                go.Scatter(
                    x=x_range.flatten(),
                    y=y_pred,
                    mode='lines',
                    name=f'Trend {respondent_id}',
                    line=dict(
                        color=colors[j % len(colors)],
                        width=2,
                        dash='dash'
                    ),
                    showlegend=False,
                    legendgroup=f'{respondent_id}',
                    hovertemplate=annotation_text
                ),
                row=row, col=col
            )

# Update layout
fig.update_layout(
    height=300 * rows,
    width=1200,
    title_text="Metrics vs Score by Respondent",
    font=dict(size=12),
    margin=dict(l=50, r=50, t=100, b=50),
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=1.05
    )
)

# Update axes labels
for i in range(1, rows * cols + 1):
    if i > len(metrics):
        continue
    fig['layout'][f'xaxis{i}'].update(title=metrics[i-1])
    fig['layout'][f'yaxis{i}'].update(title="Fatigue score")

fig.show()

In [48]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

q_df['f_score_window'] = q_df['score'] // 5 * 5
metric = 'response_time_mean'
group_col = 'fatigue_lvl'

rows, cols = 2, 2
fig = make_subplots(rows=rows, cols=cols, subplot_titles=paths, shared_xaxes=True)

for i, name in enumerate(paths):
    row = i // cols + 1
    col = i % cols + 1

    for group_value in sorted(q_df[group_col].dropna().unique()):
        group_df = q_df[(q_df[group_col] == group_value) & (q_df['id'] == name)]

        fig.add_trace(
            go.Box(
                y=group_df[metric],
                name=str(group_value),
                boxmean=True,
                marker=dict(opacity=0),
                showlegend=False
            ),
            row=row, col=col
        )

        fig.add_trace(
            go.Scatter(
                x=[str(group_value)] * len(group_df),
                y=group_df[metric],
                mode='markers',
                marker=dict(size=5, color='black', opacity=0.4),
                text=group_df['datetime'].astype(str),
                hovertemplate=(
                    f"{group_col}: {group_value}<br>" +
                    f"{metric}: %{{y}}<br>" +
                    "Время: %{text}<extra></extra>"
                ),
                showlegend=False
            ),
            row=row, col=col
        )

ordered_groups = sorted(q_df[group_col].dropna().unique())
fig.update_layout(
    xaxis=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups]),
    xaxis2=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups])
)

fig.update_layout(
    height=600,
    width=1200,
    title=f"Распределения метрик по группам {group_col} по метрике {metric}",
    margin=dict(t=100),
    font=dict(size=12)
)

for i in range(1, rows * cols + 1):
    fig.update_xaxes(title_text=group_col, row=(i - 1) // cols + 1, col=(i - 1) % cols + 1)
    fig.update_yaxes(title_text=metric, row=(i - 1) // cols + 1, col=(i - 1) % cols + 1)

fig.show()


In [49]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

q_df['f_score_window'] = q_df['score'] // 5 * 5
metric = 'response_time_mean'
group_col = 'cog_load_flg'

rows, cols = 2, 2
fig = make_subplots(rows=rows, cols=cols, subplot_titles=paths, shared_xaxes=True)

for i, name in enumerate(paths):
    row = i // cols + 1
    col = i % cols + 1

    for group_value in sorted(q_df[group_col].dropna().unique()):
        group_df = q_df[(q_df[group_col] == group_value) & (q_df['id'] == name)]

        fig.add_trace(
            go.Box(
                y=group_df[metric],
                name=str(group_value),
                boxmean=True,
                marker=dict(opacity=0),
                showlegend=False
            ),
            row=row, col=col
        )

        fig.add_trace(
            go.Scatter(
                x=[str(group_value)] * len(group_df),
                y=group_df[metric],
                mode='markers',
                marker=dict(size=5, color='black', opacity=0.4),
                text=group_df['datetime'].astype(str),
                hovertemplate=(
                    f"{group_col}: {group_value}<br>" +
                    f"{metric}: %{{y}}<br>" +
                    "Время: %{text}<extra></extra>"
                ),
                showlegend=False
            ),
            row=row, col=col
        )

ordered_groups = sorted(q_df[group_col].dropna().unique())
fig.update_layout(
    xaxis=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups]),
    xaxis2=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups])
)

fig.update_layout(
    height=600,
    width=1200,
    title=f"Распределения метрик по группам {group_col} по метрике {metric}",
    margin=dict(t=100),
    font=dict(size=12)
)

for i in range(1, rows * cols + 1):
    fig.update_xaxes(title_text=group_col, row=(i - 1) // cols + 1, col=(i - 1) % cols + 1)
    fig.update_yaxes(title_text=metric, row=(i - 1) // cols + 1, col=(i - 1) % cols + 1)

fig.show()


### боксплоты

In [54]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Метрики и группирующее поле
# metrics = [
#     'response_time_mean',
#        'response_time_std', 'response_time_max', 'correctFlg_mean'
# ]
# q_df['f_score_window'] = q_df['score'] // 5
metric = 'response_time_mean'
group_col = 'cog_load_flg'

# Сетка: 3 строки × 5 столбцов
rows, cols = 2, 2
fig = make_subplots(rows=rows, cols=cols, subplot_titles=paths, shared_xaxes=True)

# Построение графиков
for i, name in enumerate(paths):
    row = i // cols + 1
    col = i % cols + 1

    for group_value in sorted(q_df[group_col].dropna().unique()):
        group_df = q_df[(q_df[group_col] == group_value) & (q_df['id'] == name)]
        # print(group_df.shape, group_value)

        # Boxplot (без hover)
        fig.add_trace(
            go.Box(
                y=group_df[metric],
                name=str(group_value),
                boxmean=True,
                marker=dict(opacity=0),
                showlegend=False
            ),
            row=row, col=col
        )

        # Scatter с datetime в hover
        fig.add_trace(
            go.Scatter(
                x=[str(group_value)] * len(group_df),
                y=group_df[metric],
                mode='markers',
                marker=dict(size=5, color='black', opacity=0.4),
                text=group_df['datetime'].astype(str),
                hovertemplate=(
                    f"{group_col}: {group_value}<br>" +
                    f"{metric}: %{{y}}<br>" +
                    "Время: %{text}<extra></extra>"
                ),
                showlegend=False
            ),
            row=row, col=col
        )
ordered_groups = sorted(q_df[group_col].dropna().unique())
fig.update_layout(xaxis=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups]))
fig.update_layout(xaxis2=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups]))

# Общие настройки
fig.update_layout(
    height=600,
    width=1200,
    title=f"Распределения метрик по группам {group_col} по метрике {metric} при наведении",
    margin=dict(t=100),
    font=dict(size=12)
)

fig.show()

In [55]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Метрики и группирующее поле
# metrics = [
#     'response_time_mean',
#        'response_time_std', 'response_time_max', 'correctFlg_mean'
# ]
# q_df['f_score_window'] = q_df['score'] // 5
metric = 'response_time_mean'
group_col = 'fatigue_lvl'

# Сетка: 3 строки × 5 столбцов
rows, cols = 2, 2
fig = make_subplots(rows=rows, cols=cols, subplot_titles=paths, shared_xaxes=True)

# Построение графиков
for i, name in enumerate(paths):
    row = i // cols + 1
    col = i % cols + 1

    for group_value in sorted(q_df[group_col].dropna().unique()):
        group_df = q_df[(q_df[group_col] == group_value) & (q_df['id'] == name)]
        # print(group_df.shape, group_value)

        # Boxplot (без hover)
        fig.add_trace(
            go.Box(
                y=group_df[metric],
                name=str(group_value),
                boxmean=True,
                marker=dict(opacity=0),
                showlegend=False
            ),
            row=row, col=col
        )

        # Scatter с datetime в hover
        fig.add_trace(
            go.Scatter(
                x=[str(group_value)] * len(group_df),
                y=group_df[metric],
                mode='markers',
                marker=dict(size=5, color='black', opacity=0.4),
                text=group_df['datetime'].astype(str),
                hovertemplate=(
                    f"{group_col}: {group_value}<br>" +
                    f"{metric}: %{{y}}<br>" +
                    "Время: %{text}<extra></extra>"
                ),
                showlegend=False
            ),
            row=row, col=col
        )
ordered_groups = sorted(q_df[group_col].dropna().unique())
fig.update_layout(xaxis=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups]))
fig.update_layout(xaxis2=dict(categoryorder='array', categoryarray=[str(v) for v in ordered_groups]))

# Общие настройки
fig.update_layout(
    height=600,
    width=1200,
    title=f"Распределения метрик по группам {group_col} по метрике {metric} при наведении",
    margin=dict(t=100),
    font=dict(size=12)
)

fig.show()