-
Notifications
You must be signed in to change notification settings - Fork 30
/
logistic_regression.R
64 lines (44 loc) · 1.5 KB
/
logistic_regression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#LOAD DATA
#load dataset
data <- ISLR::Default
#view summary of dataset & total observations
summary(data)
nrow(data)
#CREAT TRAINING AND TESTING SAMPLES
#make this example reproducible
set.seed(1)
#Use 70% of dataset as training set and remaining 30% as testing set
sample <- sample(c(TRUE, FALSE), nrow(data), replace=TRUE, prob=c(0.7,0.3))
train <- data[sample, ]
test <- data[!sample, ]
#FIT THE LOGISTIC REGRESSION MODEL
model <- glm(default~student+balance+income, family="binomial", data=train)
#disable scientific notation for model summary
options(scipen=999)
#view model summary
summary(model)
#calculate McFadden's R-Squared
pscl::pR2(model)["McFadden"]
#calculate variable importance
caret:varImp(model)
#calculate VIF values
car::vif(model)
#USE MODEL TO MAKE PREDICTIONS
predicted <- predict(model, test, type="response")
#MODEL DIAGNOSTICS
library(InformationValue)
#convert defaults from "Yes" and "No" to 1's and 0's
test$default <- ifelse(test$default=="Yes", 1, 0)
#find optimal cutoff probability to use to maximize accuracy
optimal <- optimalCutoff(test$default, predicted)[1]
optimal
#create confusion matrix
confusionMatrix(test$default, predicted)
#calculate sensitivity
sensitivity(test$default, predicted)
#calculate specificity
specificity(test$default, predicted)
#calculate total misclassification error rate
misClassError(test$default, predicted, threshold=optimal)
#plot the ROC curve
plotROC(test$default, predicted)