# Hover Time

In [1]:
import os
import pandas as pd
import numpy as np

In [4]:
# Load data
df = pd.read_csv("/Users/marlenerueschoff/Documents/Uni/UzK Master/Masterarbeit/Experiment/masterthesis_experiment/Data/mouse_hovers_rows.csv")

INPUT CSV (must contain):
  - participant_id : string
  - t_enter_ms     : number (ms since task start when entering area)
  - t_leave_ms     : number (ms since task start when leaving area)
  - dwell_ms       : number (optional; computed if missing as t_leave_ms - t_enter_ms)

OUTPUT:
  - hover_metrics_participant.csv
      columns: participant_id, total_ms, n_visits, mean_ms, median_ms, std_ms

In [15]:
# Validate required columns
required = {"participant_id", "t_enter_ms", "t_leave_ms", "duration_ms"}
missing = required - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {', '.join(sorted(missing))}")

# Ensure numeric types
for col in ["t_enter_ms", "t_leave_ms"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")


#  Data Cleaning: keep valid, positive hover times and non-empty participant_id
df = df.loc[
    df["participant_id"].notna()
    & np.isfinite(df["duration_ms"])
    & (df["duration_ms"] > 0)
].copy()


# Aggregate per participant
metrics = (
    df.groupby("participant_id")["duration_ms"]
      .agg(total_hovertime_ms="sum",
           n_hovers="count",
           mean_hovertime_ms="mean",
           median_hovertime_ms="median",
           sd_hovertime_ms="std")      
      .reset_index()
)

# Replace NaN std with 0 for users with a single hover
metrics["sd_hovertime_ms"] = metrics["sd_hovertime_ms"].fillna(0.0)

# Calculate hovertime in seconds
metrics["total_hovertime_s"]  = metrics["total_hovertime_ms"]  / 1000.0
metrics["mean_hovertime_s"]   = metrics["mean_hovertime_ms"]   / 1000.0
metrics["median_hovertime_s"] = metrics["median_hovertime_ms"] / 1000.0
metrics["sd_hovertime_s"]    = metrics["sd_hovertime_ms"]    / 1000.0


# Print
display(metrics.head(10))


Unnamed: 0,participant_id,total_hovertime_ms,n_hovers,mean_hovertime_ms,median_hovertime_ms,sd_hovertime_ms,total_hovertime_s,mean_hovertime_s,median_hovertime_s,sd_hovertime_s
0,5cb2ad99-b1e4-4f9e-b88f-24cb9369f149,13511,10,1351.1,967.0,1155.25759,13.511,1.3511,0.967,1.155258


### Per Participant AND per Product

In [None]:
g2 = df.groupby(["participant_id", "prod_id"])["dwell_ms"]
by_participant_product = pd.DataFrame({
    "total_ms": g2.sum(),
    "n_visits": g2.size(),
    "mean_ms": g2.mean(),
    "median_ms": g2.median(),
    "std_ms": g2.std(ddof=1),
}).reset_index()
by_participant_product["std_ms"] = by_participant_product["std_ms"].fillna(0.0)
by_participant_product.to_csv(
    os.path.join(os.path.dirname(INPUT_CSV), "hover_metrics_participant_product.csv"),
    index=False
)
