In [10]:
# Alex Tabakian

import os
import re
import pandas as pd
from statistics import median

INPUT_FILE = "/content/class.txt"
EVAL_DATES = ["Oct 7", "Nov 11", "Nov 18"]

import datetime
def normalize_eval(d):
    return datetime.datetime.strptime(d + " 2025", "%b %d %Y").strftime("%Y-%m-%d")

EVAL_DATES = [normalize_eval(d) for d in EVAL_DATES]

with open(INPUT_FILE, "r", encoding="utf-8") as f:
    text = f.read()

lines = [line.strip() for line in text.splitlines() if line.strip()]

attendance = {}
current_date = None

def normalize_date(date_str):
    """Convert '19 Aug 2025' â†’ '2025-08-19'"""
    try:
        dt = datetime.datetime.strptime(date_str, "%d %b %Y")
    except:
        dt = datetime.datetime.strptime(date_str, "%d %B %Y")
    return dt.strftime("%Y-%m-%d")

for line in lines:
    if line.lower().startswith("date"):
        raw_date = line.split(",",1)[1].strip()
        date_norm = normalize_date(raw_date)
        current_date = date_norm
        attendance[current_date] = []
        continue

    if line.lower().startswith("number,name,username"):
        continue

    parts = [p.strip() for p in line.split(",")]
    if len(parts) >= 2 and parts[0].isdigit():
        number = int(parts[0])
        name = parts[1]
        username = parts[2] if len(parts) > 2 else ""
        attendance[current_date].append((number, name, username))

records = []
for date, rows in attendance.items():
    for r in rows:
        num, name, user = r
        records.append({
            "date": date,
            "number": num,
            "name": name,
            "username": user
        })

df = pd.DataFrame(records)

# a) number of classes + list of dates
num_classes = len(attendance)
class_dates = sorted(attendance.keys())

# b) median attendance
attendance_counts = [len(rows) for rows in attendance.values()]
median_attendance = median(attendance_counts)

# c) lowest + highest attendance
lowest_date  = min(attendance, key=lambda d: len(attendance[d]))
highest_date = max(attendance, key=lambda d: len(attendance[d]))

lowest_att  = len(attendance[lowest_date])
highest_att = len(attendance[highest_date])

# d) correlation with evaluation dates
eval_attendance = [len(attendance[d]) for d in attendance if d in EVAL_DATES]
non_eval_attendance = [len(attendance[d]) for d in attendance if d not in EVAL_DATES]

avg_eval_att  = sum(eval_attendance)/len(eval_attendance) if eval_attendance else None
avg_noneval_att = sum(non_eval_attendance)/len(non_eval_attendance)


print("\nRESULTS\n")

print(f"Number of classes: {num_classes}")
print("Class dates:")
for d in class_dates:
    print(" -", d)

print("\nMedian class attendance:", median_attendance)

print("\nLowest attendance:")
print(f" - Date: {lowest_date}, Attendance: {lowest_att}")

print("\nHighest attendance:")
print(f" - Date: {highest_date}, Attendance: {highest_att}")

print("\nEvaluation Dates Attendance:")
for d in EVAL_DATES:
    print(f" - {d}: {len(attendance.get(d, []))} students")

print("\nAverage attendance on evaluation days:", avg_eval_att)
print("Average attendance on NON-evaluation days:", avg_noneval_att)




RESULTS

Number of classes: 25
Class dates:
 - 2025-08-19
 - 2025-08-21
 - 2025-08-26
 - 2025-08-28
 - 2025-09-04
 - 2025-09-09
 - 2025-09-11
 - 2025-09-16
 - 2025-09-18
 - 2025-09-23
 - 2025-09-25
 - 2025-09-30
 - 2025-10-02
 - 2025-10-07
 - 2025-10-14
 - 2025-10-16
 - 2025-10-21
 - 2025-10-23
 - 2025-10-28
 - 2025-10-30
 - 2025-11-04
 - 2025-11-11
 - 2025-11-13
 - 2025-11-18
 - 2025-11-20

Median class attendance: 33

Lowest attendance:
 - Date: 2025-11-20, Attendance: 14

Highest attendance:
 - Date: 2025-08-21, Attendance: 49

Evaluation Dates Attendance:
 - 2025-10-07: 45 students
 - 2025-11-11: 41 students
 - 2025-11-18: 34 students

Average attendance on evaluation days: 40.0
Average attendance on NON-evaluation days: 32.36363636363637
