# Exploratory Analysis: Wearable Stress and Exam Performance

This notebook explores the structure, quality, and basic patterns of the wearable
physiological dataset used to model exam performance under stress.

In [31]:
import os
import pandas as pd

# Base data path
BASE_DATA_PATH = "../data/raw/wearable-exam-stress-dataset"

# Load raw grades file
grades_raw = pd.read_csv(
    os.path.join(BASE_DATA_PATH, "StudentGrades.txt"),
    encoding="latin1",
    header=None,
    engine="python",
    on_bad_lines="skip"
).fillna("")

# Detect exam sections and extract grades
records = []
current_exam = None

for _, row in grades_raw.iterrows():
    row_str = " ".join(row.astype(str))
    
    # Update current exam section
    if "MIDTERM 1" in row_str: # M1 is out of 100 marks
        current_exam = 1
    elif "MIDTERM 2" in row_str: # M2 is out of 100 marks
        current_exam = 2
    elif "FINAL" in row_str: # Final is out of 200 marks
        current_exam = 3
    
    # Extract student grades
    elif current_exam and row_str.strip().startswith("S"):
        parts = row_str.split()
        student_id = next((p for p in parts if p.startswith("S")), None)
        grade_val = next((int(p) for p in parts if p.isdigit()), None)
        
        if student_id and grade_val:
            records.append({
                "student": student_id.replace("S0", "S"),
                "grade": grade_val,
                "exam": current_exam
            })

grades = pd.DataFrame(records)

grades.head()

Unnamed: 0,student,grade,exam
0,S1,78,1
1,S2,82,1
2,S3,77,1
3,S4,75,1
4,S5,67,1
