In [13]:
library(tidyverse)
library(modeldata)
library(leaps)
library(nnet)
library(caret)
library(corrplot)
library(ISLR)
require(boot)
library(pROC)

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.2     [32mv[39m [34mreadr    [39m 2.1.4
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.0
[32mv[39m [34mlubridate[39m 1.9.2     [32mv[39m [34mtibble   [39m 3.2.1
[32mv[39m [34mpurrr    [39m 1.0.2     [32mv[39m [34mtidyr    [39m 1.3.0
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mcombine()[39m  masks [34mrandomForest[39m::combine()
[31mx[39m [34mdplyr[39m::[32mfilter()[39m   masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m      masks [34mstats[39m::lag()
[31mx[39m [34mpurrr[39m::[32mlift()[39m     masks [34mcaret[39m::lift()
[31mx[39m [34mggplot2[39m::[32mmargin()[39m masks [34mrandomForest[39m::margin()
[36mi[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>

In [7]:
# import the data from the file

data <- read.csv("data/clean_data/for_model_kidney_disease_multi.csv")
head(data)

# transform columns into factors
data$class <- as.factor(data$class)

Unnamed: 0_level_0,age,sg,al,su,bgr,bu,sod,pot,hemo,pcv,...,pc,pcc,ba,htn,dm,cad,appet,pe,ane,class
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,...,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,48,1.02,1,0,121.0,36,137.53,4.63,15.4,44,...,2,1,1,2,2,1,1,1,1,1
2,7,1.02,4,0,148.04,18,137.53,4.63,11.3,38,...,2,1,1,1,1,1,1,1,1,1
3,62,1.01,2,3,423.0,53,137.53,4.63,9.6,31,...,2,1,1,1,2,1,2,1,2,1
4,48,1.005,4,0,117.0,56,111.0,2.5,11.2,32,...,1,2,1,2,1,1,2,2,2,2
5,51,1.01,2,0,106.0,26,137.53,4.63,11.6,35,...,2,1,1,1,1,1,1,1,1,1
6,60,1.015,3,0,74.0,25,142.0,3.2,12.2,39,...,2,1,1,2,2,1,1,2,1,1


In [8]:
# create a multi-class logistic regression model
library(nnet)
# split the data into training and testing sets
set.seed(123)
# load the required library
library(caret)
trainIndex <- createDataPartition(
  data$class,
  p = .8,
  list = FALSE,
  times = 1
)
data_train <- data[trainIndex, ]
data_test <- data[-trainIndex, ]

# fit the model
model <- multinom(class ~ ., data = data_train)
summary(model)

# make predictions
predictions <- predict(model, data_test)
head(predictions)

# distribution of the predictions and the actual values
table(predictions, data_test$class)

# calculate the accuracy of the model
accuracy <- sum(predictions == data_test$class) / nrow(data_test)

# print the accuracy
accuracy

# weights:  72 (46 variable)
initial  value 352.654545 
iter  10 value 268.218914
iter  20 value 158.466938
iter  30 value 112.782411
iter  40 value 101.671721
iter  50 value 91.419456
iter  60 value 90.564696
iter  70 value 90.321824
iter  80 value 90.134663
iter  90 value 89.806817
iter 100 value 89.696320
final  value 89.696320 
stopped after 100 iterations


Call:
multinom(formula = class ~ ., data = data_train)

Coefficients:
  (Intercept)        age        sg       al        su        bgr         bu
1    42.70135 0.06326376 50.221947 18.51979 -1.353595 0.07423740 0.08962598
2    91.41325 0.07297753  6.614984 18.48372 -1.345413 0.07744796 0.16761114
         sod       pot       hemo        pcv          wbcc      rbcc       rbc
1 -0.2967963 -1.656331 -0.7216016 -0.6778806 -1.414049e-05 -1.572802 -6.927593
2 -0.3348193 -1.706501 -1.2528211 -0.5537296  7.078045e-05 -1.901007 -7.302573
         pc       pcc       ba      htn       dm       cad     appet       pe
1 -11.84251 -20.11621 13.60962 1.701821 4.942768 -8.636185  9.189753 3.379109
2 -11.72347 -20.66535 13.70993 1.634168 4.113746 -8.046486 10.201541 2.664608
       ane
1 17.39607
2 17.12071

Std. Errors:
   (Intercept)        age           sg         al         su        bgr
1 0.0001836983 0.02417805 0.0001923329 0.07469432 0.01105311 0.02168705
2 0.0001775143 0.02562118 0.0001866783 0

           
predictions  0  1  2
          0 28  1  1
          1  1 14  5
          2  0  6 23

In [41]:
# use ann model
library(nnet)
set.seed(123)

model <- nnet(
  class ~ .,
  data = data_train,
  size = 10,
  maxit = 1000
)
# get levels of the class variable
levels(data_train$class)
levels(data_test$class)

# make predictions
predictions <- predict(model, data_test, type = "class")

# distribution of the predictions and the actual values
table(predictions, data_test$class)

# calculate the accuracy of the model
accuracy <- sum(predictions == data_test$class) / nrow(data_test)

# print the accuracy
accuracy

# weights:  263
initial  value 483.034171 
final  value 349.741346 
converged


           
predictions  0  1  2
          2 29 21 29

In [4]:
# use random forest model
library(randomForest)

set.seed(123)

model <- randomForest(
  class ~ .,
  data = data_train,
  ntree = 500
)

# make predictions
predictions <- predict(model, data_test)

# distribution of the predictions and the actual values
table(predictions, data_test$class)

# calculate the accuracy of the model
accuracy <- sum(predictions == data_test$class) / nrow(data_test)

# print the accuracy
accuracy

           
predictions  0  1  2
          0 29  0  1
          1  0 19  7
          2  0  2 21

In [22]:
# SVM
library(e1071)

set.seed(123)

model <- svm(
  class ~ .,
  data = data_train,
  kernel = "linear"
)

# make predictions
predictions <- predict(model, data_test)

# distribution of the predictions and the actual values
table(predictions, data_test$class)

# calculate the accuracy of the model
accuracy <- sum(predictions == data_test$class) / nrow(data_test)

# print the accuracy
accuracy


           
predictions  0  1  2
          0 28  2  1
          1  1 13  6
          2  0  6 22