# Detección de clases con aprendizaje supervisado reproducible

## Oier Mentxaka
https://cran.r-project.org/web/packages/arulesCBA/arulesCBA.pdf

### Carga de librerías


In [134]:
library(tidyverse)
library(caret)
library(arulesCBA)
source("Modelos/rCBA.R")
source("Preprocesamiento/utils.R")
source("Preprocesamiento/plots.R")

#### Asignación de la semilla

In [135]:
set.seed(123)

#### Lectura de fichero ya filtrado

In [136]:
data <- read.csv("../data/Statlog_rCBA.csv", header = TRUE, sep = ",")

#### Factorización de las variables

In [137]:
data <- mutate_all(data, as.factor)

#### Separación de train y test

In [138]:
index <- createDataPartition(data$Class, p = 0.7, list = FALSE)

# Split the data
train <- data[index, ]
test <- data[-index, ]

#### Eliminacion de variable a predecir de testing

In [139]:
y_test <- test$Class
test$Class <- NULL

#### Obtención de los Weighs

In [140]:
train_weights <- train$Weighs
train$Weighs <- NULL
test_weights <- test$Weighs
test$Weighs <- NULL

#### Grid search

In [141]:
# # Example values for the grid search
# support_values <- seq(0.01, 0.1, by = 0.01)
# confidence_values <- seq(0.01, 0.1, by = 0.01)

# best <- grid_search(train, test, y_test, support_values, confidence_values,train_weights = train_weights, test_weights = NULL)
# print(best)

#### Selección de variables 

Seleccionadas tras realizar un estudio con grid search

In [142]:
support <- 0.9
confidence <- 0.5

#### Creación del modelo

In [143]:
# Convert the training dataset into transactions
trans <- as(train, "transactions")

# Create rule base with CARs (Classification Association Rules)
cars <- mineCARs(Class ~ ., trans, parameter = list(support = support, confidence = confidence))

# Remove redundant rules
cars <- cars[!is.redundant(cars)]

# Sort the rules by confidence
cars <- sort(cars, by = "conf")

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen
        0.5    0.1    1 none FALSE           FALSE       5     0.9      1
 maxlen target  ext
      5  rules TRUE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 630 

set item appearances ...[112 item(s)] done [0.00s].
set transactions ...[112 item(s), 700 transaction(s)] done [0.00s].
sorting and recoding items ... [15 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 done [0.00s].
writing ... [16 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].


In [144]:
# Fit the model with weights and the rules
classifier <- CBA_ruleset(Class ~ .,
                           rules = cars,
                           default = uncoveredMajorityClass(Class ~ ., trans, cars),
                           method = "majority",
                           weights = train_weights)

#### Obtención de reglas y transformación

In [145]:
rules <- extract_rules(classifier, test)
head(rules)

Unnamed: 0_level_0,support,confidence,coverage,lift,count,Status.of.existing.checking.account,Duration.in.month,Credit.history,Purpose,Credit.amount,...,Housing,Number.of.existing.credits.at.this.bank,Job,Number.of.people.being.liable.to.provide.maintenance.for,Telephone,Foreign.worker,Gender,Marital.Status,Weights,Class
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,...,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<chr>,<chr>
3,0.6657143,0.7092846,0.9385714,1.013264,466,,,,,,...,,,,,,,,,1.0,1
2,0.6385714,0.7028302,0.9085714,1.004043,447,,,,,,...,,,,,,,,,,1
1,0.7,0.7,1.0,1.0,490,,,,,,...,,,,,,,,,,1


#### Definición de extracción de reglas activadas

In [146]:
trans_test <- as(test, "transactions")

#### Obtención de predicciones

In [147]:
predictions <- predict(classifier, trans_test, weights = test_weights)

#### Matriz de confusión

In [148]:
confusion_matrix <- confusion_matrix(predictions, y_test)

[1] "Confusion Matrix:"
           actual_labels
predictions   1   2
          1 210  90
          2   0   0


#### Precisión del modelo

In [149]:
# Calculate accuracy
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Accuracy:", round(accuracy, 2)))

[1] "Accuracy: 0.7"


#### División grárica de las clases 

In [150]:
# TODO