In [1]:
import sys
sys.path.append('../../')
import rulelearn.algorithms.r2n.simple_rulenet as rn
import rulelearn.algorithms.r2n.base as base
import rulelearn.algorithms.r2n.utilities as util
import rulelearn.algorithms.r2n.preprocessing_layers as ul
import rulelearn.algorithms.r2n.r2n_algo as algo
from rulelearn.algorithms.r2n.training import train as train_R2N

In [2]:
from sklearn.model_selection import train_test_split
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler

import numpy as np
import matplotlib.pylab as plt

torch.manual_seed(2)
np.random.seed(2)

%load_ext autoreload
%autoreload 2

In [3]:
# Control variables
data_loc = 'data/churn_prob_out_35.csv'
label_col = 'CHURN'
to_drop = ['Id','CHURN','3_Class','5_Class','is_test_set','pChurn']

In [4]:
# Prepare the data
data = pd.read_csv(data_loc)

In [5]:
input_data = data.drop(columns=to_drop)
label = data[[label_col]]
label= label.replace('F',0)
label= label.replace('T',1)

In [6]:
input_train,input_test,label_train, label_test = train_test_split(input_data, label,train_size=0.75) 

In [7]:
input_data

Unnamed: 0,Gender,Status,Children,EstIncome,CarOwner,Age,Paymethod,Usage,RatePlan
0,F,S,1,38000.00,N,24.393333,CC,229.64,3
1,M,M,2,29616.00,N,49.426667,CH,75.29,2
2,M,M,0,19732.80,N,50.673333,CC,47.25,3
3,M,S,2,96.33,N,56.473333,CC,59.01,1
4,F,M,2,52004.80,N,25.140000,CH,28.14,1
...,...,...,...,...,...,...,...,...,...
1794,M,D,1,90478.60,N,54.553333,CC,154.49,4
1795,F,M,2,56187.00,N,41.000000,CH,21.83,2
1796,F,S,1,42000.00,N,24.393333,CC,229.64,3
1797,M,M,0,9780.66,N,24.546667,CC,21.39,2


In [8]:
cat_cols = ['Gender','Status','Children','CarOwner', 'Paymethod']
num_cols = ['EstIncome','Age', 'Usage', 'RatePlan']
# NOTE : R2N requires that column names do not have whitespace or arithmetic operators

In [9]:
R2run = algo.R2Nalgo(cat_columns=cat_cols, num_columns = num_cols, n_seeds=1, min_temp = 10**-4, decay_rate=0.98, coef = 5*10**-4, normalize_num=True,negation=False)

In [10]:
R2run.fit(input_train, label_train)

4 numerical features and  5 categorical features
---------
13 categorical dummies
---------
Epoch: 0 Success rate: 58.8 Loss: 5.91878 Temperature: 9.8e-01
Epoch: 10 Success rate: 58.8 Loss: 5.87950 Temperature: 8.0e-01
Epoch: 20 Success rate: 58.8 Loss: 5.84946 Temperature: 6.5e-01
Epoch: 30 Success rate: 58.8 Loss: 5.82898 Temperature: 5.3e-01
Epoch: 40 Success rate: 58.8 Loss: 5.81734 Temperature: 4.4e-01
Epoch: 50 Success rate: 58.1 Loss: 4.08421 Temperature: 3.6e-01
Epoch: 60 Success rate: 73.5 Loss: 2.75227 Temperature: 2.9e-01
Epoch: 70 Success rate: 76.6 Loss: 2.38626 Temperature: 2.4e-01
Epoch: 80 Success rate: 80.4 Loss: 1.99968 Temperature: 1.9e-01
Epoch: 90 Success rate: 85.5 Loss: 1.65741 Temperature: 1.6e-01
Epoch: 100 Success rate: 88.1 Loss: 1.47406 Temperature: 1.3e-01
Epoch: 110 Success rate: 89.3 Loss: 1.31030 Temperature: 1.1e-01
Epoch: 120 Success rate: 89.8 Loss: 1.21900 Temperature: 8.7e-02
Epoch: 130 Success rate: 90.2 Loss: 1.16660 Temperature: 7.1e-02
Epoch: 14

In [11]:
prediction = R2run.predict(input_test)

In [12]:
np.count_nonzero(np.array(prediction).flatten() == np.array(label_test).flatten())/len(label_test)

0.86

In [13]:
print(str(R2run.export_rules_to_trxf_dnf_ruleset(thr=0.1)))

if
([-3.0719568e-06*EstIncome - 0.011073619*Age + 0.41905272 >= 0] ^ [Gender == F] ^ [CarOwner == N]) v
([4.235193e-06*EstIncome + 0.00245478*Age + 0.0023219697*Usage + 0.028453792*RatePlan - 0.4903672 >= 0] ^ [-8.199226e-06*EstIncome - 0.0060453876*Age - 0.00047677557*Usage + 0.06189673*RatePlan + 0.5222411 >= 0] ^ [Children == 1] ^ [Paymethod == CC]) v
([-4.3862556e-06*EstIncome + 0.0013715553*Usage - 0.06675427*RatePlan + 0.35130543 >= 0] ^ [-3.0719568e-06*EstIncome - 0.011073619*Age + 0.41905272 >= 0] ^ [4.235193e-06*EstIncome + 0.00245478*Age + 0.0023219697*Usage + 0.028453792*RatePlan - 0.4903672 >= 0] ^ [Gender == M] ^ [Status == S]) v
([-3.3245412e-06*EstIncome + 0.021272553*RatePlan + 0.074754454 >= 0] ^ [2.327939e-06*EstIncome + 0.0028196916*Age + 0.00283685*Usage - 0.37616417 >= 0] ^ [-6.1245596e-06*EstIncome + 0.017771019*Age - 0.00060199713*Usage - 0.14333951 >= 0] ^ [-8.199226e-06*EstIncome - 0.0060453876*Age - 0.00047677557*Usage + 0.06189673*RatePlan + 0.5222411 >= 0] ^

In [14]:
print(str(R2run.export_rules_to_trxf_dnf_ruleset()))

if
([-3.0719568e-06*EstIncome - 0.011073619*Age - 9.73349e-07*Usage - 0.0003543593*RatePlan + 0.41905272 >= 0] ^ [Gender == F] ^ [CarOwner == N]) v
([4.235193e-06*EstIncome + 0.00245478*Age + 0.0023219697*Usage + 0.028453792*RatePlan - 0.4903672 >= 0] ^ [-8.199226e-06*EstIncome - 0.0060453876*Age - 0.00047677557*Usage + 0.06189673*RatePlan + 0.5222411 >= 0] ^ [Children == 1] ^ [Paymethod == CC]) v
([-4.3862556e-06*EstIncome - 0.0004928683*Age + 0.0013715553*Usage - 0.06675427*RatePlan + 0.35130543 >= 0] ^ [-3.0719568e-06*EstIncome - 0.011073619*Age - 9.73349e-07*Usage - 0.0003543593*RatePlan + 0.41905272 >= 0] ^ [4.235193e-06*EstIncome + 0.00245478*Age + 0.0023219697*Usage + 0.028453792*RatePlan - 0.4903672 >= 0] ^ [Gender == M] ^ [Status == S]) v
([-3.3245412e-06*EstIncome + 0.00015770689*Age - 1.6373829e-08*Usage + 0.021272553*RatePlan + 0.074754454 >= 0] ^ [2.327939e-06*EstIncome + 0.0028196916*Age + 0.00283685*Usage - 0.003067519*RatePlan - 0.37616417 >= 0] ^ [-6.1245596e-06*EstInc