# Anomaly Detection

# Mary Donovan Martello

## Part 7:  Use K Means Clusters as an Input Feature in a Supervised Feature.

In [147]:
# Importing required libraries
library(dplyr)
library(caret)
library(ggplot2)
library(caTools)
library(ROSE)
library(smotefamily)
library(rpart)
library(rpart.plot)
library(psych)
library(ltm)
library(corrplot)
library(e1071)
library(data.table)
library(factoextra)
library(NbClust)
library(rminer)
library(MLmetrics)


suppressMessages(library(dplyr))
suppressMessages(library(caTools))
suppressMessages(library(ROSE))
suppressMessages(library(smotefamily))
suppressMessages(library(rpart.plot))
suppressMessages(library(psych))
suppressMessages(library(ltm))
suppressMessages(library(corrplot))
suppressMessages(library(e1071))
suppressMessages(library(data.table))
suppressMessages(library(factoextra))
suppressMessages(library(NbClust))
suppressMessages(library(rminer))
suppressMessages(library(MLmetrics))

In [3]:
#Loading the dataset
dfAll <- read.csv('creditFraud20.csv')

### Create Subset of Features from Feature Selection

In [4]:
dfSub <- dfAll[, c(5:5, 11:11, 13:13, 15:15, 18:18, 31:31)]
head(dfSub, 2)

V4,V10,V12,V14,V17,Class
-2.3710149,-0.3395744,-0.8130265,-1.17911,0.5734176,0
-0.7282708,-1.0898177,0.922056,-0.2163698,-0.1641099,0


### Data Preparation

### Drop Non-Normal Distribution Features

In [5]:
df <- dfAll[, c(3:24, 26:31)]
head(df, 2)

V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,...,V20,V21,V22,V23,V25,V26,V27,V28,Amount,Class
-0.3135721,-0.8799915,-2.3710149,1.83282,3.3410355,-0.8045669,0.01251665,-0.9891706,-0.3395744,-0.05770153,...,0.1487577,0.674059981,-0.8824945,-0.26334793,1.0777848,-0.3662552,0.13243425,0.22821326,80.08,0
-0.3239156,0.9178714,-0.7282708,-1.180023,-0.8173157,-0.5494987,-0.05259299,1.7699065,-1.0898177,-0.38001273,...,-0.1039912,-0.008405836,0.2422148,-0.03969362,0.5116329,-0.6877417,0.09587039,0.03602943,1.0,0


In [6]:
dim(df)

**Split the Data into Training and Testing Sets**

**> Full Normal Dataset**

In [7]:
# set.seed(102) 
sample = sample.split(df$Amount, SplitRatio = .80)
Train = subset(df, sample == TRUE)
Test  = subset(df, sample == FALSE)


In [98]:
dim(df)

In [102]:
dim(Test)

In [28]:
head(Train, 2)

Unnamed: 0,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,...,V20,V21,V22,V23,V25,V26,V27,V28,Amount,Class
1,-0.3135721,-0.8799915,-2.371015,1.83282,3.3410355,-0.8045669,0.01251665,-0.9891706,-0.3395744,-0.05770153,...,0.14875768,0.67406,-0.8824945,-0.2633479,1.0777848,-0.36625523,0.1324342,0.2282133,80.08,0
3,0.7837216,0.7148632,-1.068535,1.238372,0.4431591,0.8106104,0.13607391,-0.7054481,-0.6262094,0.29127375,...,-0.07905931,-0.0511307,-0.1710939,-0.1074708,-0.5394949,-0.04552565,0.1218609,0.1478635,11.09,0


In [29]:
dim(Train)

In [8]:
# # drop the target variable
XTrain <- Train[, c(1:27)]
yTrain <- subset(Train, select=c("Class"))

XTest <- Test[, c(1:27)]
yTest <- subset(Test, select=c("Class"))

In [96]:
dim(Test)

**> Select Subset**

In [99]:
# set.seed(103) 
sample2 = sample.split(dfSub$V4, SplitRatio = .80)
TrainSub = subset(dfSub, sample2 == TRUE)
TestSub  = subset(dfSub, sample2 == FALSE)

In [97]:
dim(dfSub)

In [100]:
yTrainSub <- subset(TrainSub, select=c("Class"))
yTestSub <- subset(TestSub, select=c("Class"))
XTrainSub <- TrainSub[, c(1:5)]
XTestSub <- TestSub[, c(1:5)]

In [101]:
dim(TestSub)

### Modeling

**K Means Models (number of clusters determined in file 6_KMeansPlotClusters)**

**> Full Normal Dataset**

In [11]:
# Compute k-means with k = 10
set.seed(567)
resultK10 <- kmeans(XTrain, 10, iter.max = 15, nstart = 50)


In [12]:
# Cluster size
resultK10$size

In [13]:
table(resultK10$cluster, yTrain$Class)

    
         0     1
  1     37     0
  2   9419    15
  3      4     0
  4     88     0
  5   2034     5
  6    247     1
  7    602     1
  8   4334     4
  9   1078     4
  10 29742    61

In [46]:
# Add the clusters as features to the original data 
# Train instead of XTrain for for training the model
clustDf <- cbind(Train, kclusters = resultK10$cluster)
head(clustDf, 2)

Unnamed: 0,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,...,V21,V22,V23,V25,V26,V27,V28,Amount,Class,kclusters
1,-0.3135721,-0.8799915,-2.371015,1.83282,3.3410355,-0.8045669,0.01251665,-0.9891706,-0.3395744,-0.05770153,...,0.67406,-0.8824945,-0.2633479,1.0777848,-0.36625523,0.1324342,0.2282133,80.08,0,2
3,0.7837216,0.7148632,-1.068535,1.238372,0.4431591,0.8106104,0.13607391,-0.7054481,-0.6262094,0.29127375,...,-0.0511307,-0.1710939,-0.1074708,-0.5394949,-0.04552565,0.1218609,0.1478635,11.09,0,10


In [31]:
dim(clustDf)

In [54]:
# drop the target variable
XclustDf <- clustDf[, c(1:27, 29:29)]
dim(XclustDf)

**> Selected Subset**

In [15]:
# Compute k-means with k = 2
set.seed(567)
resultK2sub <- kmeans(XTrainSub, 2, iter.max = 15, nstart = 50)

In [16]:
# Cluster size
resultK2sub$size

In [17]:
table(resultK2sub$cluster, yTrainSub$Class)

   
        0     1
  1 23186    79
  2 22303     0

In [47]:
# Add the cluster classifications to the original data
# TrainSub instead of XTrainSub
clustDfSub <- cbind(TrainSub, kclusters = resultK2sub$cluster)
head(clustDfSub, 2)

Unnamed: 0,V4,V10,V12,V14,V17,Class,kclusters
1,-2.371015,-0.3395744,-0.8130265,-1.17911,0.57341763,0,2
4,2.813508,0.8926488,0.8545856,-0.5980116,0.04031759,0,1


In [56]:
# drop the target variable
XclustDfSub <- clustDfSub[, c(1:5, 7:7)]
dim(XclustDfSub)

## Compare with Supervised Model

**Supervised Classification DataSet**

In [148]:
# import full dataset to be underbalanced
underDf <- read.csv('creditcard.csv')

In [149]:
head(underDf, 3)

Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.07278117,2.5363467,1.3781552,-0.33832077,0.46238778,0.23959855,0.0986979,0.363787,...,-0.01830678,0.2778376,-0.1104739,0.06692807,0.1285394,-0.1891148,0.133558377,-0.02105305,149.62,0
0,1.191857,0.26615071,0.1664801,0.4481541,0.06001765,-0.08236081,-0.07880298,0.08510165,-0.2554251,...,-0.22577525,-0.638672,0.101288,-0.33984648,0.1671704,0.1258945,-0.008983099,0.01472417,2.69,0
1,-1.358354,-1.34016307,1.7732093,0.3797796,-0.50319813,1.80049938,0.79146096,0.24767579,-1.5146543,...,0.24799815,0.7716794,0.9094123,-0.68928096,-0.3276418,-0.1390966,-0.055352794,-0.05975184,378.66,0


In [150]:
dim(underDf)

**> Full Dataset**

In [151]:
# dataframe with original labels
dfClass <- underDf[, c(3:24, 26:31)]
head(underDf, 3)

Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.07278117,2.5363467,1.3781552,-0.33832077,0.46238778,0.23959855,0.0986979,0.363787,...,-0.01830678,0.2778376,-0.1104739,0.06692807,0.1285394,-0.1891148,0.133558377,-0.02105305,149.62,0
0,1.191857,0.26615071,0.1664801,0.4481541,0.06001765,-0.08236081,-0.07880298,0.08510165,-0.2554251,...,-0.22577525,-0.638672,0.101288,-0.33984648,0.1671704,0.1258945,-0.008983099,0.01472417,2.69,0
1,-1.358354,-1.34016307,1.7732093,0.3797796,-0.50319813,1.80049938,0.79146096,0.24767579,-1.5146543,...,0.24799815,0.7716794,0.9094123,-0.68928096,-0.3276418,-0.1390966,-0.055352794,-0.05975184,378.66,0


**> Selected Subset**

In [152]:
dfSubClass <- underDf[, c(5:5, 11:11, 13:13, 15:15, 18:18, 31:31)]
head(dfSubClass, 2)

V4,V10,V12,V14,V17,Class
1.3781552,0.09079417,-0.6178009,-0.3111694,0.2079712,0
0.4481541,-0.16697441,1.0652353,-0.1437723,-0.1148047,0


**Split the Data into Training and Test Data**

**> Full Normal Data**

In [153]:
set.seed(102) 
sample3 = sample.split(dfClass$Amount, SplitRatio = .80)
TrainClass = subset(dfClass, sample3 == TRUE)
TestClass  = subset(dfClass, sample3 == FALSE)



**> Select Subset**

In [154]:
set.seed(103) 
sample4 = sample.split(dfSubClass$V4, SplitRatio = .80)
TrainClassSub = subset(dfSubClass, sample4 == TRUE)
TestClassSub  = subset(dfSubClass, sample4 == FALSE)


**Undersample Training Data Only**

**> Full Normal Dataset**

In [155]:
table(TrainClass$Class)


     0      1 
231496    419 

In [157]:
# Random Under-Sampling (RUS)

# set the number of non-fraud records to under sample to
n_fraud <- 419
new_frac_fraud <- 0.50
new_n_total <- n_fraud/new_frac_fraud

undersampling_result <- ovun.sample(Class ~ .,
                                   data = TrainClass,
                                   method = "under",
                                   N = new_n_total,
                                   seed =123)

TrainClass <- undersampling_result$data

table(TrainClass$Class)


  0   1 
419 419 

In [159]:
dim(TrainClass)

**> Select Features Subset**

In [156]:
table(TrainClassSub$Class)


     0      1 
227453    392 

In [160]:
# Random Under-Sampling (RUS)

# set the number of non-fraud records to under sample to
n_fraud <- 392
new_frac_fraud <- 0.50
new_n_total <- n_fraud/new_frac_fraud

undersampling_result <- ovun.sample(Class ~ .,
                                   data = TrainClassSub,
                                   method = "under",
                                   N = new_n_total,
                                   seed =123)

TrainClassSub <- undersampling_result$data

table(TrainClassSub$Class)


  0   1 
392 392 

In [161]:
dim(TrainClassSub)

**Separate Predictor and Dependent Features**

**> Full Normal Dataset**

In [162]:
# drop the target variable
XTrainClass <- TrainClass[, c(1:27)]
yTrainClass <- subset(TrainClass, select=c("Class"))

XTestClass <- TestClass[, c(1:27)]
yTestClass <- subset(TestClass, select=c("Class"))

**> Select Features Subset**

In [163]:
# drop the target variable
yTrainClassSub <- subset(TrainClassSub, select=c("Class"))
yTestClassSub <- subset(TestClassSub, select=c("Class"))
XTrainClassSub <- TrainClassSub[, c(1:5)]
XTestClassSub <- TestClassSub[, c(1:5)]

### Modeling

**> Full Normal Dataset**

In [49]:
# LR model with the kmeans cluster result as the target variable
lrModelClust <- glm(Class ~ ., data = clustDf, family = binomial(), maxit = 100)



"glm.fit: fitted probabilities numerically 0 or 1 occurred"

In [191]:
# LR model with original class labels as the target variable
lrModelOrig <- glm(Class ~ ., data = TrainClass, family = binomial(), maxit = 100)

"glm.fit: fitted probabilities numerically 0 or 1 occurred"

**> Selected Subset**

In [51]:
# LR model with the kmeans cluster result as the target variable
lrModelClustSub <- glm(Class ~ ., data = clustDfSub, family = binomial(), maxit = 100)



"glm.fit: fitted probabilities numerically 0 or 1 occurred"

In [193]:
# LR model with original class labels as the target variable
lrModelOrigSub <- glm(Class ~ ., data = TrainClassSub, family = binomial(), maxit = 100)

"glm.fit: fitted probabilities numerically 0 or 1 occurred"

### Predictions

**> Full Normal Data**

In [58]:
# predictions on training data

# make predictions on kmeans clusters as target variable
predLRClust <- predict(lrModelClust, XclustDf)

In [169]:
# make predictions on original target labels
predLROrig <- predict(lrModelOrig, XTrainClass)

**> Selected Subset**

In [63]:
# predictions on training data

# make predictions on kmeans clusters as target variable
predLRClustSub <- predict(lrModelClustSub, XclustDfSub)

In [171]:
# make predictions on original target labels
predLROrigSub <- predict(lrModelOrigSub, XTrainClassSub)

In [172]:
predLRClust <- as.numeric(as.character(predLRClust))

predLROrig <- as.numeric(as.character(predLROrig))

predLRClustSub <- as.numeric(as.character(predLRClustSub))

predLROrigSub <- as.numeric(as.character(predLROrigSub))

### Evaluate Training Models

**> Full Normal Dataset**

**Confusion Matrix**

In [65]:
# https://www.journaldev.com/47628/f1-score-in-r

#error metrics -- Confusion Matrix
err_metric=function(CM)
{
  TN =CM[1,1]
  TP =CM[2,2]
  FP =CM[1,2]
  FN =CM[2,1]
  precision =(TP)/(TP+FP)
  recall_score =(TP)/(TP+FN)
 
  f1_score=2*((precision*recall_score)/(precision+recall_score))
  accuracy_model  =(TP+TN)/(TP+TN+FP+FN)
  False_positive_rate =(FP)/(FP+TN)
  False_negative_rate =(FN)/(FN+TP)
 
  print(paste("Precision value of the model: ",round(precision,2)))
  print(paste("Accuracy of the model: ",round(accuracy_model,2)))
  print(paste("Recall value of the model: ",round(recall_score,2)))
  print(paste("False Positive rate of the model: ",round(False_positive_rate,2)))
 
  print(paste("False Negative rate of the model: ",round(False_negative_rate,2)))
 
  print(paste("f1 score of the model: ",round(f1_score,2)))
}



In [66]:
# make confusion matrix with kmean clusters as a feature
predLRClust <- ifelse(predLRClust > 0.5,1,0) # Probability check
CM= table(yTrain$Class, predLRClust)
print(CM)
err_metric(CM)

   predLRClust
        0     1
  0 47580     5
  1    30    61
[1] "Precision value of the model:  0.92"
[1] "Accuracy of the model:  1"
[1] "Recall value of the model:  0.67"
[1] "False Positive rate of the model:  0"
[1] "False Negative rate of the model:  0.33"
[1] "f1 score of the model:  0.78"


In [67]:
F1_Score(y_pred =predLRClust, y_true = yTrain$Class, positive = "1")
Recall(y_pred = predLRClust, y_true = yTrain$Class, positive = "1")

In [173]:
# make confusion matrix with 
predLROrig <- ifelse(predLROrig > 0.5,1,0) # Probability check
CM= table(yTrainClass$Class, predLROrig)
print(CM)
err_metric(CM)

   predLROrig
      0   1
  0 413   6
  1  33 386
[1] "Precision value of the model:  0.98"
[1] "Accuracy of the model:  0.95"
[1] "Recall value of the model:  0.92"
[1] "False Positive rate of the model:  0.01"
[1] "False Negative rate of the model:  0.08"
[1] "f1 score of the model:  0.95"


In [174]:
F1_Score(y_pred = predLROrig, y_true = yTrainClass$Class, positive = "1")
Recall(y_pred = predLROrig, y_true = yTrainClass$Class, positive = "1")

**> Select Subset**

In [78]:
# make confusion matrix with kmean clusters as input feature
predLRClustSub <- ifelse(predLRClustSub > 0.5,1,0) # Probability check
CM= table(yTrainSub$Class, predLRClustSub)
print(CM)
err_metric(CM)

   predLRClustSub
        0     1
  0 45482     7
  1    30    49
[1] "Precision value of the model:  0.88"
[1] "Accuracy of the model:  1"
[1] "Recall value of the model:  0.62"
[1] "False Positive rate of the model:  0"
[1] "False Negative rate of the model:  0.38"
[1] "f1 score of the model:  0.73"


In [79]:
F1_Score(y_pred =predLRClustSub, y_true = yTrainSub$Class, positive = "1")
Recall(y_pred = predLRClustSub, y_true = yTrainSub$Class, positive = "1")

In [177]:
# make confusion matrix 
predLROrigSub <- ifelse(predLROrigSub > 0.5, 1, 0) # Probability check
CM= table(yTrainClassSub$Class, predLROrigSub)
print(CM)
err_metric(CM)

   predLROrigSub
      0   1
  0 391   1
  1  38 354
[1] "Precision value of the model:  1"
[1] "Accuracy of the model:  0.95"
[1] "Recall value of the model:  0.9"
[1] "False Positive rate of the model:  0"
[1] "False Negative rate of the model:  0.1"
[1] "f1 score of the model:  0.95"


In [178]:
F1_Score(y_pred = predLROrigSub, y_true = yTrainClassSub$Class, positive = "1")
Recall(y_pred = predLROrigSub, y_true = yTrainClassSub$Class, positive = "1")

### Test Data

**Create Clusters for Full and Subset Test Datasets**

**> Full Normal Dataset**

In [203]:
# Compute k-means with k = 10
set.seed(567)
resultK10Test <- kmeans(XTest, 10, iter.max = 15, nstart = 50)


In [204]:
# Add the clusters as features to the original data 
# Test instead of XTest 
clustDfTest <- cbind(Test, kclusters = resultK10Test$cluster)
head(clustDfTest, 2)

# drop the target variable
XclustDfTest <- clustDfTest[, c(1:27, 29:29)]
dim(XclustDfTest)

Unnamed: 0,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,...,V21,V22,V23,V25,V26,V27,V28,Amount,Class,kclusters
2,-0.3239156,0.9178714,-0.7282708,-1.1800225,-0.8173157,-0.5494987,-0.05259299,1.769906,-1.0898177,-0.3800127,...,-0.008405836,0.24221482,-0.03969362,0.5116329,-0.6877417,0.09587039,0.03602943,1.0,0,1
8,-1.5576956,2.1790087,-2.3847499,-0.9778896,-0.4124026,-0.7876442,0.32045757,-2.100417,0.9545297,0.4993855,...,0.022861887,-0.08452142,0.24155116,0.3186859,-0.2840936,0.23457658,0.14608108,152.65,0,10


**> Select Subset**

In [85]:
# Compute k-means with k = 2
set.seed(567)
resultK2TestSub <- kmeans(XTestSub, 2, iter.max = 15, nstart = 50)


In [103]:
# Add the clusters as features to the original data 
# Test instead of XTest 
clustDfTestSub <- cbind(TestSub, kclusters = resultK2TestSub$cluster)
head(clustDfTest, 2)

# drop the target variable
XclustDfTestSub <- clustDfTestSub[, c(1:5, 7:7)]
dim(XclustDfTestSub)

Unnamed: 0,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,...,V21,V22,V23,V25,V26,V27,V28,Amount,Class,kclusters
2,-0.3239156,0.9178714,-0.7282708,-1.1800225,-0.8173157,-0.5494987,-0.05259299,1.769906,-1.0898177,-0.3800127,...,-0.008405836,0.24221482,-0.03969362,0.5116329,-0.6877417,0.09587039,0.03602943,1.0,0,1
8,-1.5576956,2.1790087,-2.3847499,-0.9778896,-0.4124026,-0.7876442,0.32045757,-2.100417,0.9545297,0.4993855,...,0.022861887,-0.08452142,0.24155116,0.3186859,-0.2840936,0.23457658,0.14608108,152.65,0,1


### Predictions

In [205]:
# Full Normal Dataset

# make predictions with kmeans clusters as input features
predLRClustTest <- predict(lrModelClust, XclustDfTest)

# make predictions 
predLROrigTest <- predict(lrModelOrig, XTestClass)


# Select Subset

# make predictions with kmeans clusters as input features
predLRClustTestSub <- predict(lrModelClustSub, XclustDfTestSub)

# make predictions 
predLROrigTestSub <- predict(lrModelOrigSub, XTestClassSub)

In [206]:

predLRClustTest <- as.numeric(as.character(predLRClustTest))

predLROrigTest <- as.numeric(as.character(predLROrigTest))

predLRClustTestSub <- as.numeric(as.character(predLRClustTestSub))

predLROrigTestSub <- as.numeric(as.character(predLROrigTestSub))

In [130]:
# make confusion matrix with kmean clusters as target label
predLRClustTest <- ifelse(predLRClustTest > 0.5,1,0) # Probability check
CM= table(yTest$Class, predLRClustTest)
print(CM)
err_metric(CM)


   predLRClustTest
       0    1
  0 9269    2
  1    7    7
[1] "Precision value of the model:  0.78"
[1] "Accuracy of the model:  1"
[1] "Recall value of the model:  0.5"
[1] "False Positive rate of the model:  0"
[1] "False Negative rate of the model:  0.5"
[1] "f1 score of the model:  0.61"


In [133]:
F1_Score(y_pred = predLRClustTest, y_true = yTest$Class, positive = "1")
Recall(y_pred = predLRClustTest, y_true = yTest$Class, positive = "1")

In [185]:
# make confusion matrix with  original labels as target label
predLROrigTest <- ifelse(predLROrigTest > 0.5,1,0) # Probability check
CM= table(yTestClass$Class, predLROrigTest)
print(CM)
err_metric(CM)

   predLROrigTest
        0     1
  0 51771  1048
  1     7    66
[1] "Precision value of the model:  0.06"
[1] "Accuracy of the model:  0.98"
[1] "Recall value of the model:  0.9"
[1] "False Positive rate of the model:  0.02"
[1] "False Negative rate of the model:  0.1"
[1] "f1 score of the model:  0.11"


In [186]:
F1_Score(y_pred = predLROrigTest, y_true = yTestClass$Class, positive = "1")
Recall(y_pred = predLROrigTest, y_true = yTestClass$Class, positive = "1")

In [143]:
# make confusion matrix with kmean clusters as target label
predLRClustTestSub <- ifelse(predLRClustTestSub > 0.5,1,0) # Probability check
CM= table(yTestSub$Class, predLRClustTestSub)
print(CM)
err_metric(CM)

   predLRClustTestSub
        0     1
  0 11376     2
  1    11     4
[1] "Precision value of the model:  0.67"
[1] "Accuracy of the model:  1"
[1] "Recall value of the model:  0.27"
[1] "False Positive rate of the model:  0"
[1] "False Negative rate of the model:  0.73"
[1] "f1 score of the model:  0.38"


In [144]:
F1_Score(y_pred =predLRClustTestSub, y_true = yTestSub$Class, positive = "1")
Recall(y_pred = predLRClustTestSub, y_true = yTestSub$Class, positive = "1")

In [189]:
# make confusion matrix with original labels as target label
predLROrigTestSub <- ifelse(predLROrigTestSub > 0.5,1,0) # Probability check
CM= table(yTestClassSub$Class, predLROrigTestSub)
print(CM)
err_metric(CM)

   predLROrigTestSub
        0     1
  0 56037   825
  1    16    84
[1] "Precision value of the model:  0.09"
[1] "Accuracy of the model:  0.99"
[1] "Recall value of the model:  0.84"
[1] "False Positive rate of the model:  0.01"
[1] "False Negative rate of the model:  0.16"
[1] "f1 score of the model:  0.17"


In [190]:
F1_Score(y_pred = predLROrigTestSub, y_true = yTestClassSub$Class, positive = "1")
Recall(y_pred = predLROrigTestSub, y_true = yTestClassSub$Class, positive = "1")
