In [12]:
import pandas as pd

In [13]:
# Load the dataset
file_path = '../Data/heart_attack_dataset.csv'
df = pd.read_csv(file_path)

In [14]:
# 1. Age Group
def age_group(age):
    if age < 20:
        return "<20"
    elif 20 <= age < 30:
        return "20-29"
    elif 30 <= age < 40:
        return "30-39"
    elif 40 <= age < 50:
        return "40-49"
    elif 50 <= age < 60:
        return "50-59"
    elif 60 <= age < 70:
        return "60-69"
    elif 70 <= age < 80:
        return "70-79"
    else:
        return "80+"

df['Age Group'] = df['Age'].apply(age_group)

In [15]:
# 2. Cholesterol Level Category
def cholesterol_category(cholesterol):
    if cholesterol < 200:
        return "Low"
    elif 200 <= cholesterol < 240:
        return "Borderline High"
    else:
        return "High"

df['Cholesterol Level Category'] = df['Cholesterol (mg/dL)'].apply(cholesterol_category)

In [16]:
# 3. Blood Pressure Category
def bp_category(bp):
    if bp < 120:
        return "Normal"
    elif 120 <= bp < 130:
        return "Elevated"
    elif 130 <= bp < 140:
        return "High Blood Pressure Stage 1"
    else:
        return "High Blood Pressure Stage 2"

df['Blood Pressure Category'] = df['Blood Pressure (mmHg)'].apply(bp_category)

In [17]:
# 4. Risk Score
def risk_score(row):
    score = 0
    if row['Cholesterol (mg/dL)'] > 240:
        score += 1
    if row['Blood Pressure (mmHg)'] >= 140:
        score += 1
    if row['Has Diabetes'] == 1:
        score += 1
    if row['Smoking Status'] == 'Current':
        score += 1
    return score

df['Risk Score'] = df.apply(risk_score, axis=1)

In [18]:
# 5. Heart Health Status
def heart_health_status(row):
    bp_cat = row['Blood Pressure Category']
    chol_cat = row['Cholesterol Level Category']
    if bp_cat == "Normal" and chol_cat == "Low":
        return "Healthy"
    elif bp_cat == "Elevated" or chol_cat == "Borderline High":
        return "At Risk"
    else:
        return "Unhealthy"

df['Heart Health Status'] = df.apply(heart_health_status, axis=1)

In [19]:
# 6. Lifestyle Modification Necessity
df['Lifestyle Modification Necessity'] = df.apply(lambda x: 'Yes' if x['Risk Score'] >= 2 or x['Smoking Status'] == 'Current' else 'No', axis=1)

In [20]:
# 7. Treatment Effectiveness Category
def treatment_effectiveness(row):
    if row['Chest Pain Type'] == 'Typical Angina' and row['Treatment'] == 'Lifestyle Changes':
        return "High"
    elif row['Chest Pain Type'] == 'Atypical Angina' and row['Treatment'] == 'Medication':
        return "Moderate"
    else:
        return "Low"

df['Treatment Effectiveness Category'] = df.apply(treatment_effectiveness, axis=1)

In [21]:
# 8. Create a Unique Identifier (Patient ID)
df['Patient ID'] = range(1, len(df) + 1)

In [22]:
# Save the updated dataframe to a new CSV file
output_path = '../Data/heart_attack_dataset_updated.csv'  # Update this path if needed
df.to_csv(output_path, index=False)

print("New columns added and saved to", output_path)

New columns added and saved to ../Data/heart_attack_dataset_updated.csv
