In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Set random seed for reproducibility
np.random.seed(42)

# Generate synthetic data
num_samples = 100000
num_outliers = 10000

In [None]:
# Generate heart rates, SpO2, and time spent skating with noise
noise = np.random.normal(0, 5, num_samples)

heart_rate = np.random.randint(50, 200, num_samples) + noise  # Random heart rates between 60-200 BPM
spo2 = np.random.randint(85, 101, num_samples)  # Random SpO2 levels between 85%-100%
time_spent_skating = np.random.randint(1, 45, num_samples)  # Time spent skating in minutes (1 to 45 minutes)



In [None]:
# Define performance levels based on HR, SpO2, and time spent skating
def classify_performance(hr, sp, time):
    if hr < 100 and sp > 96 and time > 30:
        return "Elite"
    elif 100 <= hr < 130 and sp > 94 and time > 20:
        return "Advanced"
    elif 130 <= hr < 160 and sp > 92 and time > 10:
        return "Intermediate"
    else:
        return "Beginner"

performance_levels = [classify_performance(hr, sp, time) for hr, sp, time in zip(heart_rate, spo2, time_spent_skating)]


In [None]:
# Define risk levels based on HR, SpO2, and time spent skating
def classify_risk(hr, sp, time):
    if hr >= 180 or sp < 88 or time > 100:  # Dangerous heart rate, very low oxygen, or excessive skating
        return "High Risk"
    elif (160 <= hr < 180) or (88 <= sp < 92) or (80 <= time <= 100):  # Moderately high HR, low SpO₂, or long skating time
        return "Mid Risk"
    else:  # Normal conditions
        return "Low Risk"

risk_levels = [classify_risk(hr, sp, time) for hr, sp, time in zip(heart_rate, spo2, time_spent_skating)]

In [None]:
# Create DataFrame
df = pd.DataFrame({
    "Heart_Rate": heart_rate,
    "SpO2": spo2,
    "Time_Spent_Skating": time_spent_skating,
    "Performance_Level": performance_levels,
    "Risk_Level": risk_levels
})

In [None]:
df.loc[:num_outliers-1, 'Heart_Rate'] = np.random.choice([220, 30], num_outliers)
df.loc[:num_outliers-1, 'SpO2'] = np.random.choice([70, 110], num_outliers)
df.loc[:num_outliers-1, 'Time_Skating'] = np.random.choice([0, 300], num_outliers)

In [None]:
# Encode target variables (performance levels and risk levels)
df["Performance_Level"] = df["Performance_Level"].astype("category").cat.codes
df["Risk_Level"] = df["Risk_Level"].astype("category").cat.codes  # Encoded for classification

In [None]:
# Split data into train and test sets
X = df[["Heart_Rate", "SpO2"]]  # Features
y_performance = df["Performance_Level"]
y_risk = df["Risk_Level"]

X_train, X_test, y_train_perf, y_test_perf = train_test_split(X, y_performance, test_size=0.2, random_state=42)
X_train, X_test, y_train_risk, y_test_risk = train_test_split(X, y_risk, test_size=0.2, random_state=42)



In [None]:
# Train Random Forest classifiers
clf_performance = RandomForestClassifier(n_estimators=100, random_state=42)
clf_risk = RandomForestClassifier(n_estimators=100, random_state=42)

clf_performance.fit(X_train, y_train_perf)
clf_risk.fit(X_train, y_train_risk)

In [None]:
# Predict and evaluate performance classification
y_pred_perf = clf_performance.predict(X_test)
accuracy_perf = accuracy_score(y_test_perf, y_pred_perf)


In [None]:
# Predict and evaluate risk classification
y_pred_risk = clf_risk.predict(X_test)
accuracy_risk = accuracy_score(y_test_risk, y_pred_risk)


In [None]:
# Display results
print(f"Performance Classification Accuracy: {accuracy_perf}")
print(f"Risk Classification Accuracy: {accuracy_risk}")

# Show sample data
print("\nSample Data with Risk Levels:")
print(df.drop(columns=["Time_Skating"]).tail(30))

Performance Classification Accuracy: 0.8902
Risk Classification Accuracy: 0.9406

Sample Data with Risk Levels:
       Heart_Rate  SpO2  Time_Spent_Skating  Performance_Level  Risk_Level
99970   67.005641    86                  12                  1           0
99971   91.762154    94                  20                  1           1
99972  105.411423    96                  39                  0           1
99973  111.791609    87                  38                  1           0
99974   79.602182    91                  26                  1           2
99975  188.751100    86                  34                  1           0
99976  114.891611    94                  12                  1           1
99977  155.261916    90                  11                  1           2
99978  150.436634    92                  39                  1           1
99979   86.156542    99                  39                  2           1
99980  126.640843    94                  40                  1 