In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

In [2]:
df_time = pd.read_csv('../data/data_polar-diagrams_2024-08-09_17-48.csv', encoding='utf-16', 
                       usecols=['TIME009','TIME010','TIME011','TIME012','TIME013','TIME014','TIME015','TIME016',
                       'SD18_01', 'SD18_02', 'SD18_03', 'SD18_04', 'SD18_05'])
# Drop the first row
df_time = df_time.drop(index=[0])
# Reset the index if needed (optional)
df_time.reset_index(drop=True, inplace=True)
df_time[['SD18_01', 'SD18_02', 'SD18_03', 'SD18_04', 'SD18_05']] = df_time[['SD18_01', 'SD18_02', 'SD18_03', 'SD18_04', 'SD18_05']].astype(float)
df_time['SGL_Score'] = df_time[['SD18_01', 'SD18_02', 'SD18_03', 'SD18_04', 'SD18_05']].sum(axis=1)
df_time["Experience"] = df_time["SGL_Score"]>28

## Train and Test Data

In [3]:
df_train_sgl = df_time.melt( id_vars=['Experience'], value_vars=['TIME009', 'TIME011', 'TIME013', 'TIME015'],
                        var_name='Original_Column', 
                        value_name='Time')
df_train_sgl["Question"] = "Train"
df_train_sgl["Time"] = df_train_sgl[["Time"]].astype(float)

df_test_sgl = df_time.melt( id_vars=['Experience'], value_vars=['TIME010', 'TIME012', 'TIME014', 'TIME016'],
                       var_name='Original_Column', 
                        value_name='Time')
df_test_sgl["Question"] = "Test"
df_test_sgl["Time"] = df_test_sgl[["Time"]].astype(float)

In [4]:
df_sgl = pd.concat([df_train_sgl, df_test_sgl])
df_novive = df_sgl[df_sgl["Experience"]== False]
df_experienced = df_sgl[df_sgl["Experience"]== True]

## Results

In [5]:
df_novive_train = df_novive[df_novive["Question"]=="Train"]
df_experienced_train = df_experienced[df_experienced["Question"]=="Train"]
# Perform an independent t-test
t_statistic, p_value = stats.ttest_ind(df_experienced_train["Time"], df_novive_train["Time"])

# Output the results
print("T-statistic:", t_statistic)
print("P-value:", p_value)


T-statistic: -0.3452109757437086
P-value: 0.7308197269982053


In [6]:
df_experienced_test = df_experienced[df_experienced["Question"]=="Test"]
df_novive_test = df_novive[df_novive["Question"]=="Test"]
# Perform an independent t-test
t_statistic, p_value = stats.ttest_ind(df_experienced_test["Time"], df_novive_test["Time"])

# Output the results
print("T-statistic:", t_statistic)
print("P-value:", p_value)

T-statistic: -0.8342281258794537
P-value: 0.4065761566072903


In [7]:
fig = go.Figure()
fig.add_trace(go.Box(
    y=df_experienced["Time"],
    x=df_experienced["Question"],
    name='Expert',
    marker_color='#FC8D62',
    # boxmean=True
))
fig.add_trace(go.Box(
    y=df_novive["Time"],
    x=df_novive["Question"],
    name='Non-expert',
    marker_color='#8DA0CB',
    # boxmean=True
))

fig.update_xaxes(showgrid=False,zeroline=True,)
fig.update_yaxes(type="log", 
                 zeroline=True, zerolinewidth=1, zerolinecolor='#C3C3C3',
                 showgrid=True, gridcolor='#C3C3C3', 
                 )
fig.update_layout(
    template='plotly_dark', 
    plot_bgcolor='rgba(0, 0, 0, 0)', 
    paper_bgcolor='rgba(0, 0, 0, 0)',
    width=500,  # Adjust width in pixels
    yaxis = dict(
        title='Response time (s)',
        tickvals = [0, 5, 10, 20, 40, 100, 200, 400, 900, 2000, 5000],
    ),
    font=dict(
        # family="Courier New, monospace",
        size=12,
        color="black"
        ),
    boxmode='group',
)
fig.show(config= {'displaylogo': False})