In [None]:
# Install and run this if you haven't already
# !pip install matplotlib  

import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


files_to_delete = [
    "rrnl_all_tidy_cleaned_times.csv",
    "rrnl_all_tidy_cleaned_times_1.csv",
]

for file in files_to_delete:
    if os.path.exists(file):
        os.remove(file)
        print(f"Deleted: {file}")
    else:
        print(f"Not found: {file}")   

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

In [None]:
df = pd.read_csv("rrnl_all_tidy(6).csv")

original_rows = len(df)
print("Number of rows:", original_rows)

In [None]:
df.head()

In [None]:
df.columns

In [None]:
columns_to_drop = [
    "Reject reason",
    "reject time",
    "instruction",
    "description",
    "trialNumber"
]

df = df.drop(columns=columns_to_drop, errors='ignore')

In [None]:
original_rows = len(df)

df = df[
    (df["startTime"].str.strip().str.lower() != "undefined") &
    (df["endTime"].str.strip().str.lower() != "undefined")
]

new_rows = len(df)

print("Rows removed:", original_rows - new_rows)
print("Remaining rows:", new_rows)

In [None]:
df.to_csv("rrnl_all_tidy_cleaned_times.csv", index=False)

In [None]:
original_rows = len(df)

valid_prefixes = (
    "A_bloodGlucose_abnormal",
    "A_bloodGlucose_normal",
    "A_bloodPressure_normal",
    "A_bloodPressure_abnormal",
    "B_bloodGlucose_abnormal",
    "B_bloodGlucose_normal",
    "B_bloodPressure_normal",
    "B_bloodPressure_abnormal",
    "C_bloodGlucose_abnormal",
    "C_bloodGlucose_normal",
    "C_bloodPressure_normal",
    "C_bloodPressure_abnormal",
    "D_bloodGlucose_abnormal",
    "D_bloodGlucose_normal",
    "D_bloodPressure_normal",
    "D_bloodPressure_abnormal"
)

df = df[df["trialId"].str.startswith(valid_prefixes, na=False)]

cols_to_check = ["startTime", "endTime", "answer", "correctAnswer"]

for col in cols_to_check:
    df = df[
        df[col].notna() &
        (df[col].str.strip().str.lower() != "undefined")
    ]

new_rows = len(df)

print("Rows removed:", original_rows - new_rows)
print("Remaining rows:", new_rows)

In [None]:
df.to_csv("rrnl_all_tidy_cleaned_times_1.csv", index=False)

In [None]:
import pandas as pd


df = pd.read_csv("rrnl_all_tidy_cleaned_times_1.csv")


df["startTime"] = pd.to_datetime(df["startTime"], utc=True)
df["endTime"] = pd.to_datetime(df["endTime"], utc=True)


df["reactionTime_seconds"] = (
    df["endTime"] - df["startTime"]
).dt.total_seconds()


df.to_csv("rrnl_all_tidy_cleaned_times_1.csv", index=False)

print("Reaction time column added successfully.")


In [None]:
import pandas as pd

df = pd.read_csv("rrnl_all_tidy_cleaned_times_1.csv")

df["Group"] = df["trialId"].str[0]

avg_times = (
    df.groupby("Group")["reactionTime_seconds"]
    .mean()
    .reset_index()
)

print(avg_times)

In [None]:
df = pd.read_csv("rrnl_all_tidy_cleaned_times_1.csv")

df["Group"] = df["trialId"].str[0]

summary = (
    df.groupby("Group")["reactionTime_seconds"]
    .agg(["mean", "std", "count"])
    .reset_index()
)

summary["sem"] = summary["std"] / np.sqrt(summary["count"])

In [None]:
plt.figure(figsize=(10, 6))


plt.errorbar(
    summary["Group"],
    summary["mean"],
    yerr=summary["sem"],
    linestyle='--',
    marker='o',
    capsize=5
)


plt.title('Total time completion for Value & Verbatim Tasks')
plt.ylabel("Task completion time (s)")
plt.xlabel("Graph Group")
plt.ylim(10, 25)
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()