# Detección de clases con aprendizaje supervisado reproducible

## Oier Mentxaka
https://cran.r-project.org/web/packages/arulesCBA/arulesCBA.pdf

### Carga de librerías


In [162]:
library(tidyverse)
library(caret)
library(arulesCBA)
source("Modelos/rCBA.R")
source("Preprocesamiento/utils.R")
source("Preprocesamiento/plots.R")

#### Asignación de la semilla

In [163]:
set.seed(123)

#### Lectura de fichero ya filtrado

In [164]:
data <- read.csv("../data/Statlog_rCBA.csv", header = TRUE, sep = ",")

#### Factorización de las variables

In [165]:
data <- mutate_all(data, as.factor)

#### Separación de train y test

In [166]:
index <- createDataPartition(data$Class, p = 0.7, list = FALSE)

# Split the data
train <- data[index, ]
test <- data[-index, ]

#### Eliminacion de variable a predecir de testing

In [167]:
y_test <- test$Class
test$Class <- NULL

#### Selección de variables 

Seleccionadas tras realizar un estudio con grid search

In [168]:
support <- 0.1
confidence <- 0.1

#### Obtención de los pesos

In [169]:
train_weights <- train$pesos
train$pesos <- NULL
test_weights <- test$pesos
test$pesos <- NULL

#### Creación del modelo

In [170]:
# Convert the training dataset into transactions
trans <- as(train, "transactions")

# Create rule base with CARs (Classification Association Rules)
cars <- mineCARs(Class ~ ., trans, parameter = list(support = support, confidence = confidence))

# Remove redundant rules
cars <- cars[!is.redundant(cars)]

# Sort the rules by confidence
cars <- sort(cars, by = "conf")

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen
        0.1    0.1    1 none FALSE           FALSE       5     0.1      1
 maxlen target  ext
      5  rules TRUE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 70 

set item appearances ...[112 item(s)] done [0.00s].
set transactions ...[112 item(s), 700 transaction(s)] done [0.00s].
sorting and recoding items ... [110 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.06s].
writing ... [19378 rule(s)] done [0.01s].
creating S4 object  ... done [0.01s].


In [171]:
# Fit the model with weights and the rules
classifier <- CBA_ruleset(Class ~ .,
                           rules = cars,
                           default = uncoveredMajorityClass(Class ~ ., trans, cars),
                           method = "majority",
                           weights = train_weights)

#### Obtención de reglas y transformación

In [172]:
rules <- extract_rules(classifier, test)
head(rules)

Unnamed: 0_level_0,support,confidence,coverage,lift,count,Status.of.existing.checking.account,Duration.in.month,Credit.history,Purpose,Credit.amount,...,Housing,Number.of.existing.credits.at.this.bank,Job,Number.of.people.being.liable.to.provide.maintenance.for,Telephone,Foreign.worker,Gender,Marital.Status,Weights,Class
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<chr>,<lgl>,...,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
11492,0.1042857,0.9864865,0.1057143,1.409266,73,no checking account,,critical account,,,...,own,,,,,,,,,1
3590,0.1271429,0.978022,0.13,1.397174,89,no checking account,,critical account,,,...,,,,,,,,,,1
17175,0.1171429,0.9761905,0.12,1.394558,82,no checking account,,,,,...,own,,skilled employee,,,,,,,1
17057,0.1071429,0.974026,0.11,1.391466,75,no checking account,,,,,...,,,skilled employee,,yes~ registered,,,,,1
17272,0.1428571,0.9708738,0.1471429,1.386963,100,no checking account,,,,,...,own,,,,,,,single,,1
10952,0.1071429,0.9615385,0.1114286,1.373626,75,no checking account,,,radio/television,,...,,,,,,,,,1.0,1


#### Definición de extracción de reglas activadas

In [173]:
trans_test <- as(test, "transactions")

#### Obtención de predicciones

In [174]:
predictions <- predict(classifier, trans_test, weights = test_weights)

#### Matriz de confusión

In [175]:
confusion_matrix <- confusion_matrix(predictions, y_test)

[1] "Confusion Matrix:"
           actual_labels
predictions   1   2
          1 164  48
          2  46  42


#### Precisión del modelo

In [176]:
# Calculate accuracy
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Accuracy:", round(accuracy, 2)))

[1] "Accuracy: 0.69"
