# RIPPER ( Repeated Incremental Pruning to Produce Error Reduction)
[RIPPER](https://www.geeksforgeeks.org/ripper-algorithm/)



In [1]:
from sklearn.model_selection import train_test_split
import wittgenstein as lw

In [2]:

import sys
sys.path.append("/home/alvaro/Desktop/MachineLearning/PracticalApplications/1-3/my-staff")
from utils import load_ds
from globals import TEST_SIZE, SEED

In [3]:
X, y, features_names, class_names = load_ds("multivariant")
X.shape

(293, 10)

In [4]:
params =  {'alpha': 0.1, 'k': 2, 'n-discretize-bins': 10, 'prune-size': 0.1}

# Evaluate

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=TEST_SIZE,
                                                                random_state=SEED)

print(len(X_train), len(X_test))

205 88


In [6]:
ripper_clf = lw.RIPPER(k=params["k"], alpha=params["alpha"], n_discretize_bins=params["n-discretize-bins"], prune_size=params["prune-size"], verbosity=True) # Or irep_clf = lw.IREP() to build a model using IREP
ripper_clf

<RIPPER(prune_size=0.1, max_total_conds=None, verbosity=True, alpha=0.1, dl_allowance=64, max_rules=None, n_discretize_bins=10, max_rule_conds=None, k=2, random_state=None)>

In [7]:
ripper_clf.fit(X_train, y_train)

discretizing 5 features

GREW INITIAL RULESET:
[[Stage=4.0 ^ N_Days=<602.6] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[Hepatomegaly_N=False ^ Prothrombin=>12.0] V
[Copper=>200.6] V
[SGOT=>198.4] V
[Hepatomegaly_N=False ^ Prothrombin=11.5-12.0] V
[Hepatomegaly_N=False ^ SGOT=128.65-140.84] V
[Hepatomegaly_N=False] V
[Copper=<24.0 ^ N_Days=2575.6-3072.4] V
[Bilirubin=1.06-1.3 ^ N_Days=3072.4-3821.8] V
[SGOT=140.84-170.5 ^ Prothrombin=10.7-11.0] V
[Copper=44.4-57.6 ^ Prothrombin=10.7-11.0] V
[N_Days=1945.0-2290.4 ^ Stage=3.0 ^ Bilirubin=0.6-0.8] V
[N_Days=<602.6] V
[Copper=141.8-200.6 ^ Prothrombin=<9.74] V
[Copper=44.4-57.6 ^ Bilirubin=0.6-0.8] V
[SGOT=140.84-170.5 ^ N_Days=999.8-1349.2]]

optimization run 1 of 2

OPTIMIZED RULESET:
[[Stage=4.0 ^ N_Days=<602.6] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[Hepatomegaly_N=False ^ Prothrombin=>12.0] V
[Copper=>200.6] V
[SGOT=>198.4] V
[Hepatomegaly_N=False ^ Prothrombin=11.5-12.0] V
[Hepatomegaly_N=False ^ SGOT=128.65-140.84] V
[H

In [8]:
ripper_clf.out_model()

[[Stage=4.0 ^ N_Days=<602.6] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[Copper=>200.6] V
[SGOT=>198.4]]


# Evaluation

In [9]:
from utils import eval_model, show_report

In [10]:
y_pred = ripper_clf.predict(X_test)

In [11]:
eval_model(y_test, y_pred)

Métricas de evaluación:
Precision: 0.7612
Recall: 0.7500
F1-score: 0.7400


In [12]:
show_report(y_test, y_pred)

{'0': {'precision': 0.7258064516129032, 'recall': 0.9, 'f1-score': 0.8035714285714286, 'support': 50.0}, '1': {'precision': 0.8076923076923077, 'recall': 0.5526315789473685, 'f1-score': 0.6562500000000001, 'support': 38.0}, 'accuracy': 0.75, 'macro avg': {'precision': 0.7667493796526055, 'recall': 0.7263157894736842, 'f1-score': 0.7299107142857144, 'support': 88.0}, 'weighted avg': {'precision': 0.761166253101737, 'recall': 0.75, 'f1-score': 0.7399553571428572, 'support': 88.0}}
              precision    recall  f1-score   support

           0       0.73      0.90      0.80        50
           1       0.81      0.55      0.66        38

    accuracy                           0.75        88
   macro avg       0.77      0.73      0.73        88
weighted avg       0.76      0.75      0.74        88

