#### Actual Synthetic Dataset cretion with random point, with a 0.3 inserting anomalies, and checking whether it is anomaly or not with threshold values

In [3]:
import numpy as np
import pandas as pd

np.random.seed(42)

N_SAMPLES = 10000
ZONES = ["heel", "ball", "arch", "toe"]

def is_anomaly(row):
    for z in ZONES:
        if row[f"press_{z}"] > 80 and row[f"temp_{z}"] > 38:
            return 1

    if row["spo2"] < 90:
        return 1

    accel_mag = np.sqrt(
        row["acc_x"]**2 + row["acc_y"]**2 + row["acc_z"]**2
    )
    if accel_mag > 15 and row["stepCount"] < 40:
        return 1

    return 0


def generate_sample():
    row = {}

    for z in ZONES:
        row[f"temp_{z}"] = np.random.uniform(32, 36)
        row[f"press_{z}"] = np.random.uniform(20, 60)

    # probabilistic anomaly injection
    if np.random.rand() < 0.3:
        z = np.random.choice(ZONES)
        row[f"temp_{z}"] = np.random.uniform(38, 41)
        row[f"press_{z}"] = np.random.uniform(80, 120)

    row["spo2"] = np.random.uniform(85, 100)
    row["heartRate"] = np.random.randint(60, 130)

    row["acc_x"] = np.random.normal(0, 3)
    row["acc_y"] = np.random.normal(0, 3)
    row["acc_z"] = np.random.normal(9.8, 3)

    row["gyro_x"] = np.random.normal(0, 80)
    row["gyro_y"] = np.random.normal(0, 80)
    row["gyro_z"] = np.random.normal(0, 80)

    row["stepCount"] = np.random.randint(0, 120)
    row["batteryLevel"] = np.random.randint(20, 100)

    row["label"] = is_anomaly(row)
    return row


# Generate dataset
data = [generate_sample() for _ in range(N_SAMPLES)]
df = pd.DataFrame(data)

# Save directly (NO balancing)
df.to_csv("synthetic_foot_ulcer_dataset.csv", index=False)

print("Dataset shape:", df.shape)
print("Class distribution:\n", df["label"].value_counts())

Dataset shape: (10000, 19)
Class distribution:
 label
1    5370
0    4630
Name: count, dtype: int64


##### Synthetic Data based on Risk Score

In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)

N_SAMPLES = 10000
ZONES = ["heel", "ball", "arch", "toe"]

# -----------------------------
# Risk helper functions
# -----------------------------
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def foot_stress_risk(row):
    risks = []
    for z in ZONES:
        # normalize pressure & temperature
        p = (row[f"press_{z}"] - 40) / 40     # center around normal
        t = (row[f"temp_{z}"] - 35) / 3
        risks.append(sigmoid(p + t))
    return max(risks)  # localized risk dominates


def circulation_risk(row):
    # lower SpO2 â†’ higher risk
    return sigmoid((92 - row["spo2"]) / 3)


def gait_risk(row):
    accel_mag = np.sqrt(
        row["acc_x"]**2 + row["acc_y"]**2 + row["acc_z"]**2
    )
    accel_risk = sigmoid((accel_mag - 12) / 3)
    step_risk = sigmoid((40 - row["stepCount"]) / 10)
    return 0.6 * accel_risk + 0.4 * step_risk


def total_risk_score(row):
    r1 = foot_stress_risk(row)
    r2 = circulation_risk(row)
    r3 = gait_risk(row)

    # weighted sum
    return 0.5 * r1 + 0.3 * r2 + 0.2 * r3


# -----------------------------
# Generate one sample
# -----------------------------
def generate_sample():
    row = {}

    for z in ZONES:
        row[f"temp_{z}"] = np.random.uniform(32, 36)
        row[f"press_{z}"] = np.random.uniform(20, 60)

    # probabilistic abnormality
    if np.random.rand() < 0.3:
        z = np.random.choice(ZONES)
        row[f"temp_{z}"] = np.random.uniform(37.5, 41)
        row[f"press_{z}"] = np.random.uniform(70, 120)

    row["spo2"] = np.random.uniform(85, 100)
    row["heartRate"] = np.random.randint(60, 130)

    row["acc_x"] = np.random.normal(0, 3)
    row["acc_y"] = np.random.normal(0, 3)
    row["acc_z"] = np.random.normal(9.8, 3)

    row["gyro_x"] = np.random.normal(0, 80)
    row["gyro_y"] = np.random.normal(0, 80)
    row["gyro_z"] = np.random.normal(0, 80)

    row["stepCount"] = np.random.randint(0, 120)
    row["batteryLevel"] = np.random.randint(20, 100)

    # Risk score
    risk = total_risk_score(row)
    row["risk_score"] = risk

    # Optional classification (can be tuned)
    row["label"] = 1 if risk > 0.6 else 0

    return row


# -----------------------------
# Generate dataset
# -----------------------------
data = [generate_sample() for _ in range(N_SAMPLES)]
df = pd.DataFrame(data)

df.to_csv("synthetic_foot_ulcer_dataset_RISK.csv", index=False)

print("Dataset shape:", df.shape)
print("Class distribution:\n", df["label"].value_counts())
print("\nRisk score stats:\n", df["risk_score"].describe())

Dataset shape: (10000, 20)
Class distribution:
 label
0    6692
1    3308
Name: count, dtype: int64

Risk score stats:
 count    10000.000000
mean         0.545164
std          0.133786
min          0.222999
25%          0.439658
50%          0.543516
75%          0.630402
max          0.924720
Name: risk_score, dtype: float64
