In [1]:
#Installing Nececcary Libraries With Messages Supressed
suppressPackageStartupMessages({
    library(tidyverse)
    library(repr)
    library(tidymodels)
})

options(repr.matrix.max.rows = 10)

#Set seed to make random results reproducible 
set.seed(13) 

In [2]:
url <- "https://archive.ics.uci.edu/static/public/45/heart+disease.zip"
temp <- tempfile()
download.file(url, temp)
data.hungary <- read.csv(unz(temp, "processed.hungarian.data"))
unlink(temp)

In [3]:
formatted_heart_disease<-data.hungary |> 
    rename(age = X28, sex = X1, chest_pain_type = X2, resting_bp = X130, 
        cholesterol = X132, fasting_blood_sugar = X0, resting_ekg = X2.1, max_hr = X185, exercise_induced_angina = X0.1, 
        ST_drop_exercise = X0.2, ST_slope = X., fluoroscopy_count = X..1, thalassemia_type = X..2, vessel_narrow = X0.3) |>
    transform(resting_bp = as.integer(resting_bp), cholesterol = as.integer(cholesterol), 
        fasting_blood_sugar = as.factor(fasting_blood_sugar), resting_ekg = as.factor(resting_ekg), max_hr = as.integer(max_hr), 
        exercise_induced_angina = as.factor(exercise_induced_angina), ST_slope = as.factor(ST_slope),fluoroscopy_count = as.integer(fluoroscopy_count),
        thalassemia_type = as.factor(thalassemia_type), vessel_narrow = as.factor(vessel_narrow), sex=as.factor(sex),
        chest_pain_type=as.factor(chest_pain_type))

formatted_heart_disease

“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”
“NAs introduced by coercion”


age,sex,chest_pain_type,resting_bp,cholesterol,fasting_blood_sugar,resting_ekg,max_hr,exercise_induced_angina,ST_drop_exercise,ST_slope,fluoroscopy_count,thalassemia_type,vessel_narrow
<int>,<fct>,<fct>,<int>,<int>,<fct>,<fct>,<int>,<fct>,<dbl>,<fct>,<int>,<fct>,<fct>
29,1,2,120,243,0,0,160,0,0,?,,?,0
29,1,2,140,,0,0,170,0,0,?,,?,0
30,0,1,170,237,0,1,170,0,0,?,,6,0
31,0,2,100,219,0,1,150,0,0,?,,?,0
32,0,2,105,198,0,0,165,0,0,?,,?,0
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
52,1,4,160,331,0,0,94,1,2.5,?,,?,1
54,0,3,130,294,0,1,100,1,0.0,2,,?,1
56,1,4,155,342,1,0,150,1,3.0,2,,?,1
58,0,2,180,393,0,0,110,1,1.0,2,,7,1


In [4]:
#According to the database: sex (1 = male; 0 = female);

#According to the database: chest pain type
        # -- Value 1: typical angina
        # -- Value 2: atypical angina
        # -- Value 3: non-anginal pain
        # -- Value 4: asymptomatic
#According to the database:exercise induced angina (1 = yes; 0 = no)

#According to the database: resting electrocardiographic results
        # -- Value 0: normal
        # -- Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
        # -- Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
#According to the database: (fasting blood sugar > 120 mg/dl)  (1 = true; 0 = false)

#According to the database: slope: the slope of the peak exercise ST segment
#         -- Value 1: upsloping
#         -- Value 2: flat
#         -- Value 3: downsloping
#Refactoring the above categorical variables
#According to the database: thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
#According to the database: num: diagnosis of heart disease (angiographic disease status)
        # -- Value 0: < 50% diameter narrowing
        # -- Value 1: > 50% diameter narrowing
        # (in any major vessel: attributes 59 through 68 are vessels)

formatted_heart_disease<-formatted_heart_disease|>
    mutate(sex=fct_recode(sex, "male"="1", "female"="0"
    ))|>
    mutate(chest_pain_type=fct_recode(chest_pain_type,
        "typical_angina"="1",
        "atypical_angina"="2",
        "non_anginal_pain"="3",
        "asymptomatic"="4"
    ))|>
    mutate(exercise_induced_angina=fct_recode(exercise_induced_angina,
        "yes"="1",
        "no"="0"
    ))|>
    mutate(resting_ekg=fct_recode(resting_ekg,
        "normal"="0",
        "ST-T_wave_abnormality"="1",
        "left_ventricular_hypertrophy"="2"
    ))|>
    mutate(fasting_blood_sugar=fct_recode(fasting_blood_sugar,
        "above120mg/dl"="1",
        "below120mg/dl"="0"
    ))|>
    mutate(ST_slope=fct_recode(ST_slope,
        "upsloping"="1",
        "flat"="2",
        "downsloping"="3"
    ))|>
    mutate(thalassemia_type=fct_recode(thalassemia_type,
        "normal"="3",
        "fixed_defect"="6",
        "reversable_defect"="7")
    )|>
    mutate(vessel_narrow=fct_recode(vessel_narrow,
        "below_50%of_d"="0",
        "above_50%of_d"="1"))
        
    
        
formatted_heart_disease


age,sex,chest_pain_type,resting_bp,cholesterol,fasting_blood_sugar,resting_ekg,max_hr,exercise_induced_angina,ST_drop_exercise,ST_slope,fluoroscopy_count,thalassemia_type,vessel_narrow
<int>,<fct>,<fct>,<int>,<int>,<fct>,<fct>,<int>,<fct>,<dbl>,<fct>,<int>,<fct>,<fct>
29,male,atypical_angina,120,243,below120mg/dl,normal,160,no,0,?,,?,below_50%of_d
29,male,atypical_angina,140,,below120mg/dl,normal,170,no,0,?,,?,below_50%of_d
30,female,typical_angina,170,237,below120mg/dl,ST-T_wave_abnormality,170,no,0,?,,fixed_defect,below_50%of_d
31,female,atypical_angina,100,219,below120mg/dl,ST-T_wave_abnormality,150,no,0,?,,?,below_50%of_d
32,female,atypical_angina,105,198,below120mg/dl,normal,165,no,0,?,,?,below_50%of_d
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
52,male,asymptomatic,160,331,below120mg/dl,normal,94,yes,2.5,?,,?,above_50%of_d
54,female,non_anginal_pain,130,294,below120mg/dl,ST-T_wave_abnormality,100,yes,0.0,flat,,?,above_50%of_d
56,male,asymptomatic,155,342,above120mg/dl,normal,150,yes,3.0,flat,,?,above_50%of_d
58,female,atypical_angina,180,393,below120mg/dl,normal,110,yes,1.0,flat,,reversable_defect,above_50%of_d
