# Detección de clases con aprendizaje supervisado reproducible

## Oier Mentxaka
https://cran.r-project.org/web/packages/arulesCBA/arulesCBA.pdf

### Carga de librerías


In [283]:
library(tidyverse)
library(caret)
library(arulesCBA)
source("Modelos/rCBA.R")
source("Preprocesamiento/utils.R")
source("Preprocesamiento/plots.R")

#### Asignación de la semilla

In [284]:
set.seed(123)

#### Lectura de fichero ya filtrado

In [285]:
data <- read.csv("../data/Statlog_rCBA.csv", header = TRUE, sep = ",")

#### Factorización de las variables

In [286]:
data <- mutate_all(data, as.factor)

#### Separación de train y test

In [287]:
index <- createDataPartition(data$Class, p = 0.7, list = FALSE)

# Split the data
train <- data[index, ]
test <- data[-index, ]

#### Eliminacion de variable a predecir de testing

In [288]:
y_test <- test$Class
test$Class <- NULL

#### Obtención de los Weighs

In [289]:
train_weights <- train$Weights
train$Weights <- NULL
test_weights <- test$Weights
test$Weights <- NULL

#### Grid search

In [290]:
# # Example values for the grid search
# support_values <- seq(0.01, 0.1, by = 0.01)
# confidence_values <- seq(0.01, 0.1, by = 0.01)

# best <- grid_search(train, test, y_test, support_values, confidence_values,train_weights = train_weights, test_weights = test_weights)
# print(best)

#### Selección de variables 

Seleccionadas tras realizar un estudio con grid search

In [291]:
support <- 0.01
confidence <- 0.01

#### Creación del modelo

In [292]:
# Convert the training dataset into transactions
trans <- as(train, "transactions")

# Create rule base with CARs (Classification Association Rules)
cars <- mineCARs(Class ~ ., trans, parameter = list(support = support, confidence = confidence))

# Remove redundant rules
cars <- cars[!is.redundant(cars)]

# Sort the rules by confidence
cars <- sort(cars, by = "conf")

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen
       0.01    0.1    1 none FALSE           FALSE       5    0.01      1
 maxlen target  ext
      5  rules TRUE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 7 

set item appearances ...[107 item(s)] done [0.00s].
set transactions ...[107 item(s), 700 transaction(s)] done [0.00s].
sorting and recoding items ... [107 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.36s].
writing ... [327551 rule(s)] done [0.11s].
creating S4 object  ... done [0.10s].


In [293]:
trans

transactions in sparse format with
 700 transactions (rows) and
 107 items (columns)

In [294]:
# Fit the model with weights and the rules
classifier <- CBA_ruleset(Class ~ .,
                           rules = cars,
                           default = uncoveredMajorityClass(Class ~ . , trans, cars),
                           method = "majority",
                           weights = train_weights)

#### Obtención de reglas y transformación

In [295]:
rules <- extract_rules(classifier, test)
head(rules)

Unnamed: 0_level_0,support,confidence,coverage,lift,count,Status.of.existing.checking.account,Duration.in.month,Credit.history,Purpose,Credit.amount,...,Other.installment.plans,Housing,Number.of.existing.credits.at.this.bank,Job,Number.of.people.being.liable.to.provide.maintenance.for,Telephone,Foreign.worker,Gender,Marital.Status,Class
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,...,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>,<chr>
230,0.01,1,0.01,1.428571,7,,,,retraining,,...,,own,,,,,,,,1
356,0.01142857,1,0.01142857,3.333333,8,,,no credits/all paid,,,...,,,,,,yes~ registered,,,,2
480,0.01285714,1,0.01285714,1.428571,9,no checking account,,,,,...,,,(2~4],,,,,,,1
504,0.01,1,0.01,1.428571,7,,(9~12],,,,...,,,,,,,,,,1
555,0.01,1,0.01,1.428571,7,,(3~6],,,,...,,,,,,,no,,,1
556,0.01142857,1,0.01142857,1.428571,8,,,,,,...,,,,,,,no,,,1


#### Definición de extracción de reglas activadas

In [296]:
trans_test <- as(test, "transactions")

#### Obtención de predicciones

In [297]:
predictions <- predict(classifier, trans_test, weights = test_weights)

#### Matriz de confusión

In [298]:
confusion_matrix <- confusion_matrix(predictions, y_test)

[1] "Confusion Matrix:"
           actual_labels
predictions   1   2
          1 137  22
          2  73  68


#### Precisión del modelo

In [299]:
# Calculate accuracy
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Accuracy:", round(accuracy, 2)))

[1] "Accuracy: 0.68"


#### División grárica de las clases 

In [300]:
# TODO