In [2]:
import numpy as np
import pandas as pd

In [3]:
np.random.seed(42)

In [4]:
num_rows = 50000

In [5]:
yes_percentage = 0.33  # 33% for "Yes"
no_percentage = yes_percentage - 0.05  # "No" will have 5% less
remaining_percentage = 1 - (yes_percentage + no_percentage)  # Remaining for "N/A"

class_counts = {
    'Yes': int(yes_percentage * num_rows),
    'No': int(no_percentage * num_rows),
    'N/A': int(remaining_percentage * num_rows)
}

In [6]:
def generate_conditional_data(class_counts):
    data = []
    
    # Track how many of each class we've added
    counts = {'Yes': 0, 'No': 0, 'N/A': 0}

    while len(data) < sum(class_counts.values()):
        hr = np.random.uniform(60, 100)
        spo2 = np.random.uniform(90, 100)
        pulse_amplitude = np.random.uniform(0.5, 1.0)
        prv = np.random.uniform(40, 100)
        temperature = np.random.uniform(35.5, 38.5)
        device_status = np.random.choice(['Online', 'Offline'])

        # Determine heart attack risk using if-else logic
        if (hr > 85) and (spo2 < 95) and (prv < 50) and (temperature > 37.5) and (counts['Yes'] < class_counts['Yes']):
            heart_attack_risk = 'Yes'
            counts['Yes'] += 1
        elif (device_status == 'Online') and (counts['No'] < class_counts['No']):
            heart_attack_risk = 'No'
            counts['No'] += 1
        elif counts['N/A'] < class_counts['N/A']:
            heart_attack_risk = 'N/A'
            counts['N/A'] += 1
        else:
            continue  # Skip if we have already filled the required counts
        
        # Append the generated row
        data.append({
            'HR (BPM)': hr,
            'SpO2 (%)': spo2,
            'Pulse Amplitude (AU)': pulse_amplitude,
            'PRV (ms)': prv,
            'Temperature (°C)': temperature,
            'Device Status': device_status,
            'Heart Attack Risk': heart_attack_risk
        })

    return pd.DataFrame(data)


In [41]:
data = generate_conditional_data(class_counts)

In [42]:
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
print("Final Class Distribution:")
print(data['Heart Attack Risk'].value_counts())

In [None]:
# Save the dataset
data.to_csv('synthetic_heart_attack_data_balanced.csv', index=False)
print("\nDataset saved as 'synthetic_heart_attack_data_balanced.csv'.")