Upload & Load Dataset

In [None]:
from google.colab import files
import pandas as pd

uploaded = files.upload()
file_name = list(uploaded.keys())[0]

if file_name.lower().endswith((".xlsx", ".xls")):
    df = pd.read_excel(file_name)
else:
    df = pd.read_csv(file_name, encoding="latin1")


Saving Merge.csv.xlsx to Merge.csv.xlsx


Define Required Columns

In [None]:
COL_ATTEMPT_TEXT = "Attempt Status"
COL_CORRECT_TEXT = "Is Correct"
COL_RT_SEC = "Response Time (sec)"
COL_RTT = "RTT (ms)"
COL_JITTER = "Jitter (ms)"


Create Binary Flags

In [None]:
df["attempt_status"] = df[COL_ATTEMPT_TEXT].str.lower().isin(
    ["completed", "attempted", "done"]
).astype(int)

df["is_correct"] = df[COL_CORRECT_TEXT].str.lower().isin(
    ["yes", "true", "1"]
).astype(int)


Process Network Parameters

In [None]:
for col in [COL_RT_SEC, COL_RTT, COL_JITTER]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

df["net_retrieved"] = (~df[COL_RTT].isna() & ~df[COL_JITTER].isna()).astype(int)

df[[COL_RT_SEC, COL_RTT, COL_JITTER]] = df[
    [COL_RT_SEC, COL_RTT, COL_JITTER]
].fillna(0)


Initialize Engagement Score

In [None]:
df["engagement_score"] = 0.0

attempted = df["attempt_status"] == 1
not_attempted = df["attempt_status"] == 0


Engagement Score – Attempted Students

In [None]:
df.loc[attempted, "engagement_score"] = (
    df.loc[attempted, "is_correct"] * 0.6 +
    (1 / (df.loc[attempted, COL_RT_SEC] + 1)) * 0.4
)


Engagement Score – Not Attempted

In [None]:
POOR_RTT = 3000
POOR_JITTER = 1500

poor_network = (
    (df[COL_RTT] > POOR_RTT) |
    (df[COL_JITTER] > POOR_JITTER)
)

df.loc[not_attempted & poor_network, "engagement_score"] = 0.45
df.loc[not_attempted & ~poor_network, "engagement_score"] = 0.0


Apply Network Penalty

In [None]:
df.loc[
    (df["attempt_status"] == 0) &
    (df["net_retrieved"] == 1),
    "engagement_score"
] -= 0.1

df["engagement_score"] = df["engagement_score"].clip(lower=0)


Scale Engagement Score

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df["engagement_score_scaled"] = scaler.fit_transform(
    df[["engagement_score"]]
)


Split Dataset (70 / 15 / 15)

In [None]:
from sklearn.model_selection import train_test_split

train_df, temp_df = train_test_split(
    df, test_size=0.30, random_state=42
)

val_df, test_df = train_test_split(
    temp_df, test_size=0.50, random_state=42
)


Save & Download Files

In [None]:
train_df.to_csv("engagement_train.csv", index=False)
val_df.to_csv("engagement_val.csv", index=False)
test_df.to_csv("engagement_test.csv", index=False)

files.download("engagement_train.csv")
files.download("engagement_val.csv")
files.download("engagement_test.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>