In [None]:
library(tidyverse)
library(repr)
library(tidymodels)
library(GGally)
library(ISLR)

In [None]:

rd<-read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data",
             col_names=c("age","sex","cp","trestbps","chol","fbs","restecg","thalach","exang","oldpeak","slope","ca","thal","the_predicted_attribute"))
rd
HD_selected<-select(rd,age,sex,trestbps,chol,thalach,cp,the_predicted_attribute)

HD_mutated<-HD_selected%>%
            mutate(the_predicted_attribute=ifelse(the_predicted_attribute==0,"Negative","Positive"))%>%
            mutate(the_predicted_attribute=as.factor(the_predicted_attribute))#%>%
            #mutate(cp=as.factor(cp))
HD_mutated
HD_mutated_male<-HD_mutated%>%
        filter(sex=="1")
HD_mutated_female<-HD_mutated%>%
        filter(sex=="0")
HD_split <- initial_split(HD_mutated, prop = 3/4, strata = the_predicted_attribute)  
HD_train <- training(HD_split)   
HD_test  <- testing(HD_split)
HD_summarize<-HD_mutated%>%
            group_by(sex,the_predicted_attribute)%>%
            summarize(mean(chol),mean(thalach),mean(trestbps))%>%
            mutate(sex=ifelse(sex==1,"Male","Female"))
HD_summarize
HD_summarize_2<-HD_mutated%>%
group_by(the_predicted_attribute)%>%
summarize(mean=mean(trestbps),median=median(trestbps))
HD_summarize_2

In [None]:
HD_summarize_3<-HD_mutated%>%
group_by(the_predicted_attribute)%>%
summarize(mean=mean(chol),median=median(chol))
ggplot(HD_mutated,aes(x=chol,fill=the_predicted_attribute))+
geom_histogram()+
facet_grid(rows = vars(the_predicted_attribute))
HD_summarize_3

In [None]:
HD_summarize_3<-HD_mutated%>%
group_by(the_predicted_attribute)%>%
summarize(mean=mean(chol),median=median(chol))
HD_summarize_3
pull(filter(HD_summarize_3,the_predicted_attribute=="Negative"),median)

In [None]:
ggplot(HD_mutated,aes(x=trestbps,fill=the_predicted_attribute))+
geom_histogram()+
facet_grid(rows = vars(the_predicted_attribute))

In [None]:
ggplot(HD_mutated,aes(x=thalach,fill=the_predicted_attribute))+
geom_histogram()+
facet_grid(rows = vars(the_predicted_attribute))
HD_summarize_3<-HD_mutated%>%
group_by(the_predicted_attribute)%>%
summarize(mean=mean(thalach),median=median(thalach))
HD_summarize_3

In [None]:
ggplot(HD_mutated,aes(x=cp,fill=the_predicted_attribute))+
geom_histogram()

In [None]:
set.seed(7594) 
HD_recipe<-recipe(the_predicted_attribute ~age+sex+ thalach+chol+cp, data=HD_train) %>%
                step_scale(all_predictors()) %>%
                step_center(all_predictors())
HD_spec<- nearest_neighbor(weight_func ="rectangular" , neighbors = tune()) %>% #I can't use the tune function here
       set_engine("kknn") %>%
       set_mode("classification")
HD_spec
HD_vfold<-vfold_cv(HD_train, v = 5, strata = the_predicted_attribute)
HD_fit <- workflow() %>%
       add_recipe(HD_recipe) %>%
       add_model(HD_spec) %>%
         tune_grid(resamples = HD_vfold, grid =tibble(neighbors = seq(from = 1, to = 60))) %>%
       collect_metrics()
HD_fit

In [None]:
accuracy_graph<-HD_fit%>%
        filter(.metric=="accuracy")%>%
        ggplot(aes(x=neighbors,y=mean))+
        geom_line()
accuracy_graph

In [None]:
HD_best_spec<- nearest_neighbor(weight_func ="rectangular" , neighbors = 20) %>% #I can't use the tune function here
       set_engine("kknn") %>%
       set_mode("classification")
HD_best_spec
HD_vfold<-vfold_cv(HD_train, v = 5, strata = the_predicted_attribute)
HD_best_fit <- workflow() %>%
       add_recipe(HD_recipe) %>%
       add_model(HD_best_spec) %>%
         fit(data=HD_train)
HD_best_fit

In [None]:
HD_test_predictions<-predict(HD_best_fit,HD_test)%>%
            bind_cols(HD_test)
HD_prediction_accuracy <- HD_test_predictions %>%
         metrics(truth = the_predicted_attribute, estimate = .pred_class)  
HD_prediction_accuracy
conf_mat(HD_test_predictions,truth = the_predicted_attribute, estimate = .pred_class)

In [None]:
graph_analysis<-HD_mutated%>%
                ggplot()+
                geom_point(data=HD_test,
                           mapping = aes(x = chol, 
                           y = thalach, 
                           color = the_predicted_attribute),
                          alpha=0.75)+
                geom_point(data=HD_test_predictions,
                          mapping = aes(x = chol, 
                           y = thalach, 
                           color = the_predicted_attribute),
                          alpha=0.5,
                          size=5)
                scale_color_manual(labels = c("Positive", "Negative"), 
                     values = c("orange", "steelblue"))
graph_analysis