In [1]:
import csv
import random

# Number of synthetic records to generate
num_rows = 50000

# Open a file to write the CSV
with open('depression_vitals_dataset.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write header
    writer.writerow([
        "spo2",
        "heart_rate",
        "body_temperature",
        "preexisting_heart_disease",
        "preexisting_diabetes",
        "depression"
    ])
    
    for _ in range(num_rows):
        # Generate synthetic vital signs
        spo2 = round(random.uniform(90, 100), 1)  # typical spo2 values between 90 and 100
        heart_rate = random.randint(50, 100)       # heart rate between 50 and 100 bpm
        body_temperature = round(random.uniform(36.0, 38.0), 1)  # temperature between 36.0 and 38.0 °C
        
        # Random binary flags for preexisting conditions
        preexisting_heart_disease = random.randint(0, 1)
        preexisting_diabetes = random.randint(0, 1)
        
        # A simple synthetic correlation:
        # Assume a person with lower spo2, higher heart rate, or having preexisting conditions
        # has a slightly higher chance to be labeled as depressed.
        depression_prob = 0.1  # baseline probability
        if spo2 < 94:
            depression_prob += 0.15
        if heart_rate > 90:
            depression_prob += 0.15
        if body_temperature > 37.5:
            depression_prob += 0.1
        if preexisting_heart_disease:
            depression_prob += 0.2
        if preexisting_diabetes:
            depression_prob += 0.2
        
        depression = 1 if random.random() < min(depression_prob, 0.95) else 0
        
        writer.writerow([
            spo2,
            heart_rate,
            body_temperature,
            preexisting_heart_disease,
            preexisting_diabetes,
            depression
        ])

print("CSV dataset 'depression_vitals_dataset.csv' with 1,000 rows has been created.")


CSV dataset 'depression_vitals_dataset.csv' with 1,000 rows has been created.


In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

def generate_depression_data(n_samples=5000):
    np.random.seed(42)
    
    data = {
        # Physiological features
        'resting_hr': np.concatenate([
            np.random.normal(70, 5, int(n_samples*0.8)),  # Healthy
            np.random.normal(85, 7, int(n_samples*0.2))    # Depressed
        ]),
        'hrv_rmssd': np.concatenate([
            np.random.gamma(2, 15, int(n_samples*0.8)),    # Healthy
            np.random.gamma(1, 10, int(n_samples*0.2))     # Depressed
        ]),
        'night_o2': np.concatenate([
            np.random.normal(97.5, 0.5, int(n_samples*0.8)),
            np.random.normal(94.5, 1.2, int(n_samples*0.2))
        ]),
        # ... (similar logic for other features)
        
        # Labels (PHQ-9 based depression severity)
        'depression_status': np.concatenate([
            np.zeros(int(n_samples*0.8)),  # 0: Healthy
            np.ones(int(n_samples*0.2))    # 1: Depressed
        ])
    }
    
    df = pd.DataFrame(data)
    return df.sample(frac=1).reset_index(drop=True)

# Generate and save dataset
dataset = generate_depression_data(10000)
dataset.to_csv('synthetic_depression_vitals.csv', index=False)