In [None]:
# import csv


In [78]:
import csv
import random

def load_data(filepath):
    patients = []

    try:
        with open(filepath,  mode='r', newline='', encoding='utf-8-sig') as file:
            # utf-8-sig automatically removes BOM if present
            reader = csv.DictReader(file)
            
            for row in reader:   # coverting of each patient record based on row
                try:
                    patient = {     
                        "patient_id": int(row["Subject ID"].strip()), 
                        "age": random.randint(20, 60),  # random age between 20 and 60
                        "temperature": float(row["Temperature reading 1"].strip()),
                        "heart_rate": int(row["Heart rate reading 1"].strip()),
                        "systolic_bp": int(row["Systolic blood pressure reading 1"].strip()),
                        "oxygen_saturation": int(row["Oxygen saturation reading 1"].strip())
                    }
                    patients.append(patient)

                except ValueError:
                    # Skip rows with bad numeric data
                    continue

    except FileNotFoundError:
        print("Error: File not found.")

    return patients




In [79]:
import pandas as pd
data = load_data("data/patient_vitals.csv")  
df = pd.DataFrame(data[:5])
print(df)


   patient_id  age  temperature  heart_rate  systolic_bp  oxygen_saturation
0           1   28         36.0          77          110                 98
1           2   26         35.8          67          110                 99
2           3   31         35.6          86          104                 98
3           4   60         37.1          87           96                 97
4           5   34         35.3          95          121                 94


In [80]:
import math

def calculate_mean(values):
    return sum(values) / len(values)

def calculate_median(values):
    sorted_vals = sorted(values)
    n = len(sorted_vals)
    if n % 2 == 0:
        return (sorted_vals[n//2 - 1] + sorted_vals[n//2]) / 2
    else:
        return sorted_vals[n//2]

def calculate_mode(values):
    freq = {}
    for v in values:
        freq[v] = freq.get(v, 0) + 1
    max_freq = max(freq.values())
    modes = [k for k, v in freq.items() if v == max_freq]
    return modes  

# IQR outlier detection 
def calculate_outliers(values):
    if not values:
        return []
    sorted_vals = sorted(values)
    n = len(sorted_vals)
    Q1 = sorted_vals[n // 4]
    Q3 = sorted_vals[(3 * n) // 4]
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return [x for x in values if x < lower_bound or x > upper_bound]


In [81]:
simple_list = [1, 2, 3, 100]

print("Data:", simple_list)
print("Mean:", calculate_mean(simple_list))
print("Median:", calculate_median(simple_list))
print("Mode:", calculate_mode(simple_list))
print("Outliers:", calculate_outliers(simple_list))


Data: [1, 2, 3, 100]
Mean: 26.5
Median: 2.5
Mode: [1, 2, 3, 100]
Outliers: []


In [82]:
patients = load_data("data/patient_vitals.csv")

temperatures = [p["temperature"] for p in patients]
heart_rates = [p["heart_rate"] for p in patients]
systolic = [p["systolic_bp"] for p in patients]
oxygen_saturated = [p["oxygen_saturation"] for p in patients]
age = [p["age"] for p in patients]




In [72]:
print(f"{'Vital':<20} {'Mean':>10}")
print("-" * 30)

mean_temp = calculate_mean(temperatures)
print(f"{'Temperature (°C)':<20} {mean_temp:>10.2f}")

mean_hr = calculate_mean(heart_rates)
print(f"{'Heart Rate (bpm)':<20} {mean_hr:>10.2f}")

mean_sys = calculate_mean(systolic)
print(f"{'Systolic BP':<20} {mean_sys:>10.2f}")

mean_oxy = calculate_mean(oxygen_saturated)
print(f"{'Oxygen Saturation (%)':<20} {mean_oxy:>10.2f}")

mean_age = calculate_mean(age)
print(f"{'age':<20} {mean_age:>10.2f}")




Vital                      Mean
------------------------------
Temperature (°C)          36.20
Heart Rate (bpm)          82.24
Systolic BP              105.99
Oxygen Saturation (%)      97.40
age                       41.16


In [None]:
# Calculate statistics
median_temp = calculate_median(temperatures)
median_hr = calculate_median(heart_rates)
median_sys = calculate_median(systolic)
median_oxy = calculate_median(oxygen_saturated)
median_age = calculate_median(age)

print(f"{'Vital':<20} {'Median':>10}")
print("-" * 30)
print(f"{'Temperature (°C)':<20} {median_temp:>10.2f}")
print(f"{'Heart Rate (bpm)':<20} {median_hr:>10.2f}")
print(f"{'Systolic BP':<20} {median_sys:>10.2f}")
print(f"{'Oxygen Saturation (%)':<20} {median_oxy:>10.2f}")
print(f"{'age' :20} {median_age:>10.2f}")


Vital                    Median
------------------------------
Temperature (°C)          36.35
Heart Rate (bpm)          81.00
Systolic BP              106.00
Oxygen Saturation (%)      98.00
age                       41.50


In [74]:
mode_temp = calculate_mode(temperatures)
mode_hr = calculate_mode(heart_rates)
mode_sys = calculate_mode(systolic)
mode_oxy = calculate_mode(oxygen_saturated)
mode_age = calculate_mode(age)

def format_number(x):
    if isinstance(x, float):
        return f"{x:.2f}"         # 2 decimal places for float
    else:
        return str(x)             # just convert int to string

print(f"{'Vital':<25} {'Mode':>10}")
print("-" * 36)
print(f"{'Temperature (°C)':<25} {format_number(mode_temp):>10}")
print(f"{'Heart Rate (bpm)':<25} {format_number(mode_hr):>10}")
print(f"{'Systolic BP':<25} {format_number(mode_sys):>10}")
print(f"{'Oxygen Saturation (%)':<25} {format_number(mode_oxy):>10}")
print(f"{'age':<25} {format_number(mode_age):>10}")

Vital                           Mode
------------------------------------
Temperature (°C)          [36.6, 35.5]
Heart Rate (bpm)                [67]
Systolic BP               [96, 97, 113]
Oxygen Saturation (%)           [98]
age                         [20, 60]


In [None]:
outlier_temp = calculate_outliers(temperatures)
outlier_hr = calculate_outliers(heart_rates)
outlier_sys = calculate_outliers(systolic)
outlier_oxy = calculate_outliers(oxygen_saturated)
outlier_age = calculate_outliers(age)

print(f"{'Vital':<25} {'outliers':>10}")
print("-" * 36)
print(f"{'Temperature (°C)':<25} {format_number(outlier_temp):>10}")
print(f"{'Heart Rate (bpm)':<25} {format_number(outlier_hr):>10}")
print(f"{'Systolic BP':<25} {format_number(outlier_sys):>10}")
print(f"{'Oxygen Saturation (%)':<25} {format_number(outlier_oxy):>10}")
print(f"{'age' :<25} {format_number(outlier_age) :>10}")


Vital                       outliers
------------------------------------
Temperature (°C)                  []
Heart Rate (bpm)                  []
Systolic BP                       []
Oxygen Saturation (%)          [102]
age                               []


In [None]:
temperatures = [p["temperature"] for p in patients]
heart_rates = [p["heart_rate"] for p in patients]
systolic_bp = [p["systolic_bp"] for p in patients]
oxygen_saturation = [p["oxygen_saturation"] for p in patients]
age = [p["age"] for p in patients]

# outliers for each vital
temp_outliers = calculate_outliers(temperatures)
hr_outliers = calculate_outliers(heart_rates)
sys_outliers = calculate_outliers(systolic_bp)
oxy_outliers = calculate_outliers(oxygen_saturation)
age_outliers = calculate_outliers(age)

print("Temperature Outliers:", temp_outliers)
print("Heart Rate Outliers:", hr_outliers)
print("Systolic BP Outliers:", sys_outliers)
print("Oxygen Saturation Outliers:", oxy_outliers)
print("age:", age_outliers)

Temperature Outliers: []
Heart Rate Outliers: []
Systolic BP Outliers: []
Oxygen Saturation Outliers: [102]
age: []


In [None]:
def generate_summary_report(patients):
    
    vitals_map = {
        "Temperature (°C)": "temperature",
        "Heart Rate (bpm)": "heart_rate"
    }

    print(f"{'Vital':<20} {'Mean':>8} {'Median':>8} {'Mode':>10} {'Outlier IDs':>15}")
    print("-" * 70)

    for display_name, key in vitals_map.items():
        values = [p[key] for p in patients]

        mean_val = calculate_mean(values)
        median_val = calculate_median(values)
        mode_val = calculate_mode(values)
        outliers = calculate_outliers(values)

        # connecting outlier values to patient_ids
        outlier_ids = [p["patient_id"] for p in patients if p[key] in outliers]

        
        mode_str = ", ".join(str(m) for m in mode_val) # for formating

        print(f"{display_name:<20} {mean_val:>8.2f} {median_val:>8.2f} {mode_str:>10} {str(outlier_ids):>15}")




In [85]:
patients = load_data("data/patient_vitals.csv")

generate_summary_report(patients)


Vital                    Mean   Median       Mode     Outlier IDs
----------------------------------------------------------------------
Temperature (°C)        36.20    36.35 36.6, 35.5              []
Heart Rate (bpm)        82.24    81.00         67              []
