In [1]:
" Import the libraries " 

import os
import sys 
import math
import copy

import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
" Import the scripts of SD for Explaining "

absFilePath = os.path.dirname(os.path.dirname(os.getcwd()))
newPath = os.path.join(absFilePath, 'SplitSD4X')
sys.path.append(newPath)

from fill_missing_values import *
from missing_values_table import *
from neighbors_generation import *
from patterns_extraction import *
from performances import *
from subgroups_discovery import *
from sp_lime import *

## Data Preparation 

In [3]:
"Loading and preparing data" 

datasets_path = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())), 'Datasets\\')
data = np.loadtxt(fname = datasets_path +"thyroid_data.dat", delimiter =',')
with open(datasets_path +'thyroid_names.dat', 'r') as f:
    string = f.read()
columns_names = string.split(', ')
columns_names.append('Class')
df = pd.DataFrame(data = data ,columns= columns_names)
df.head()

Unnamed: 0,Age,Sex,On_thyroxine,Query_on_thyroxine,On_antithyroid_medication,Sick,Pregnant,Thyroid_surgery,I131_treatment,Query_hypothyroid,...,Goitre,Tumor,Hypopituitary,Psych,TSH,T3,TT4,T4U,FTI,Class
0,0.73,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0006,0.015,0.12,0.082,0.146,3.0
1,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00025,0.03,0.143,0.133,0.108,3.0
2,0.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0019,0.024,0.102,0.131,0.078,3.0
3,0.64,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0009,0.017,0.077,0.09,0.085,3.0
4,0.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00025,0.026,0.139,0.09,0.153,3.0


In [4]:
" Handling some data "
df = df.drop(['Hypopituitary'],axis =1)

In [5]:
" Decode Categorical Features " 
sex_mapper = {0 : 'M', 
              1 : 'F'}
sex_mapper_inv = dict(map(reversed, sex_mapper.items()))
df['Sex'] = df['Sex'].replace(sex_mapper)


ft_mapper = {0 : 'F', 
             1 : 'T'}
ft_mapper_inv = dict(map(reversed, ft_mapper.items()))
df['On_thyroxine'] = df['On_thyroxine'].replace(ft_mapper)
df['Query_on_thyroxine'] = df['Query_on_thyroxine'].replace(ft_mapper)
df['On_antithyroid_medication'] = df['On_antithyroid_medication'].replace(ft_mapper)
df['Sick'] = df['Sick'].replace(ft_mapper)
df['Pregnant'] = df['Pregnant'].replace(ft_mapper)
df['Thyroid_surgery'] = df['Thyroid_surgery'].replace(ft_mapper)
df['I131_treatment'] = df['I131_treatment'].replace(ft_mapper)
df['Query_hypothyroid'] = df['Query_hypothyroid'].replace(ft_mapper)
df['Query_hyperthyroid'] = df['Query_hyperthyroid'].replace(ft_mapper)
df['Lithium'] = df['Lithium'].replace(ft_mapper)
df['Goitre'] = df['Goitre'].replace(ft_mapper)
df['Tumor'] = df['Tumor'].replace(ft_mapper)
df['Psych'] = df['Psych'].replace(ft_mapper)

df.head()

Unnamed: 0,Age,Sex,On_thyroxine,Query_on_thyroxine,On_antithyroid_medication,Sick,Pregnant,Thyroid_surgery,I131_treatment,Query_hypothyroid,...,Lithium,Goitre,Tumor,Psych,TSH,T3,TT4,T4U,FTI,Class
0,0.73,M,T,F,F,F,F,F,T,F,...,F,F,F,F,0.0006,0.015,0.12,0.082,0.146,3.0
1,0.24,M,F,F,F,F,F,F,F,F,...,F,F,F,F,0.00025,0.03,0.143,0.133,0.108,3.0
2,0.47,M,F,F,F,F,F,F,F,F,...,F,F,F,F,0.0019,0.024,0.102,0.131,0.078,3.0
3,0.64,F,F,F,F,F,F,F,F,F,...,F,F,F,F,0.0009,0.017,0.077,0.09,0.085,3.0
4,0.23,M,F,F,F,F,F,F,F,F,...,F,F,F,F,0.00025,0.026,0.139,0.09,0.153,3.0


In [6]:
" display the features types "
df.dtypes

Age                          float64
Sex                           object
On_thyroxine                  object
Query_on_thyroxine            object
On_antithyroid_medication     object
Sick                          object
Pregnant                      object
Thyroid_surgery               object
I131_treatment                object
Query_hypothyroid             object
Query_hyperthyroid            object
Lithium                       object
Goitre                        object
Tumor                         object
Psych                         object
TSH                          float64
T3                           float64
TT4                          float64
T4U                          float64
FTI                          float64
Class                        float64
dtype: object

In [7]:
" Checking missing values "
df.replace('?', np.nan, inplace=True)
missing_values_table(df)

Your slelected dataframe has 21 columns.
There are 0 columns that have missing values.


Unnamed: 0,Missing Values,% of Total Values


In [8]:
" separate the data and the target "
data_df = df.drop(columns=['Class'])
target_df = df['Class']

In [9]:
" calculate the categorical features mask "
categorical_feature_mask = (data_df.dtypes == object)
categorical_feature_mask

Age                          False
Sex                           True
On_thyroxine                  True
Query_on_thyroxine            True
On_antithyroid_medication     True
Sick                          True
Pregnant                      True
Thyroid_surgery               True
I131_treatment                True
Query_hypothyroid             True
Query_hyperthyroid            True
Lithium                       True
Goitre                        True
Tumor                         True
Psych                         True
TSH                          False
T3                           False
TT4                          False
T4U                          False
FTI                          False
dtype: bool

In [10]:
categorical_cols_names = data_df.columns[categorical_feature_mask].tolist()
categorical_cols_names

['Sex',
 'On_thyroxine',
 'Query_on_thyroxine',
 'On_antithyroid_medication',
 'Sick',
 'Pregnant',
 'Thyroid_surgery',
 'I131_treatment',
 'Query_hypothyroid',
 'Query_hyperthyroid',
 'Lithium',
 'Goitre',
 'Tumor',
 'Psych']

In [11]:
numerical_cols_names = data_df.columns[~categorical_feature_mask].tolist()
numerical_cols_names

['Age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']

In [12]:
" if no values missed we execute this code : "
data_df = pd.concat([data_df[numerical_cols_names], data_df[categorical_cols_names]],axis = 1)
data_df.head()

Unnamed: 0,Age,TSH,T3,TT4,T4U,FTI,Sex,On_thyroxine,Query_on_thyroxine,On_antithyroid_medication,Sick,Pregnant,Thyroid_surgery,I131_treatment,Query_hypothyroid,Query_hyperthyroid,Lithium,Goitre,Tumor,Psych
0,0.73,0.0006,0.015,0.12,0.082,0.146,M,T,F,F,F,F,F,T,F,F,F,F,F,F
1,0.24,0.00025,0.03,0.143,0.133,0.108,M,F,F,F,F,F,F,F,F,F,F,F,F,F
2,0.47,0.0019,0.024,0.102,0.131,0.078,M,F,F,F,F,F,F,F,F,F,F,F,F,F
3,0.64,0.0009,0.017,0.077,0.09,0.085,F,F,F,F,F,F,F,F,F,F,F,F,F,F
4,0.23,0.00025,0.026,0.139,0.09,0.153,M,F,F,F,F,F,F,F,F,F,F,F,F,F


In [13]:
" Encoding categorical features" 

data_df['Sex'] = data_df['Sex'].replace(sex_mapper_inv)
data_df['On_thyroxine'] = data_df['On_thyroxine'].replace(ft_mapper_inv)
data_df['Query_on_thyroxine'] = data_df['Query_on_thyroxine'].replace(ft_mapper_inv)
data_df['On_antithyroid_medication'] = data_df['On_antithyroid_medication'].replace(ft_mapper_inv)
data_df['Sick'] = data_df['Sick'].replace(ft_mapper_inv)
data_df['Pregnant'] = data_df['Pregnant'].replace(ft_mapper_inv)
data_df['Thyroid_surgery'] = data_df['Thyroid_surgery'].replace(ft_mapper_inv)
data_df['I131_treatment'] = data_df['I131_treatment'].replace(ft_mapper_inv)
data_df['Query_hypothyroid'] = data_df['Query_hypothyroid'].replace(ft_mapper_inv)
data_df['Query_hyperthyroid'] = data_df['Query_hyperthyroid'].replace(ft_mapper_inv)
data_df['Lithium'] = data_df['Lithium'].replace(ft_mapper_inv)
data_df['Goitre'] = data_df['Goitre'].replace(ft_mapper_inv)
data_df['Tumor'] = data_df['Tumor'].replace(ft_mapper_inv)
data_df['Psych'] = data_df['Psych'].replace(ft_mapper_inv)
data_df.head()

Unnamed: 0,Age,TSH,T3,TT4,T4U,FTI,Sex,On_thyroxine,Query_on_thyroxine,On_antithyroid_medication,Sick,Pregnant,Thyroid_surgery,I131_treatment,Query_hypothyroid,Query_hyperthyroid,Lithium,Goitre,Tumor,Psych
0,0.73,0.0006,0.015,0.12,0.082,0.146,0,1,0,0,0,0,0,1,0,0,0,0,0,0
1,0.24,0.00025,0.03,0.143,0.133,0.108,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0.47,0.0019,0.024,0.102,0.131,0.078,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.64,0.0009,0.017,0.077,0.09,0.085,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.23,0.00025,0.026,0.139,0.09,0.153,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [14]:
data_target_df = pd.concat([data_df, target_df], axis=1) 

In [15]:
" generate the Test SET "
nb_test_instances = 1000 
test_df = data_target_df.sample(n=nb_test_instances)
data_test_df = test_df.drop(columns=['Class'])
target_test_df = test_df['Class']

In [16]:
" generate the Training SET "
train_df = pd.concat([data_target_df,test_df]).drop_duplicates(keep=False)
data_train_df = train_df.drop(columns=['Class'])
target_train_df = train_df['Class']

In [17]:
" Extract values of the test set to generate the neighbors"

data_test = data_test_df.values
target_test = target_test_df.values

In [18]:
numerical_cols = np.arange(0,len(numerical_cols_names)) 
categorical_cols = np.arange(len(numerical_cols_names),data_df.shape[1])

## Neighbors Generation

In [19]:
nb_neighbors = 50
list_neigh = generate_all_neighbors(data_test,numerical_cols,categorical_cols,nb_neighbors)

In [20]:
" store all the neighbors together "
n = np.size(data_test,0)
all_neighbors = list_neigh[0]
for i in range(1,n) :
    all_neighbors = np.concatenate((all_neighbors, list_neigh[i]), axis=0)

### One hot encoding 

In [21]:
df_neigh = pd.DataFrame(data = all_neighbors,columns= numerical_cols_names + categorical_cols_names)
df_neigh[categorical_cols_names] = df_neigh[categorical_cols_names].astype(int,errors='ignore')

" Decode all the data neighbors to perform one hot encoding "
df_neigh['Sex'] = df_neigh['Sex'].replace(sex_mapper)
df_neigh['On_thyroxine'] = df_neigh['On_thyroxine'].replace(ft_mapper)
df_neigh['Query_on_thyroxine'] = df_neigh['Query_on_thyroxine'].replace(ft_mapper)
df_neigh['On_antithyroid_medication'] = df_neigh['On_antithyroid_medication'].replace(ft_mapper)
df_neigh['Sick'] = df_neigh['Sick'].replace(ft_mapper)
df_neigh['Pregnant'] = df_neigh['Pregnant'].replace(ft_mapper)
df_neigh['Thyroid_surgery'] = df_neigh['Thyroid_surgery'].replace(ft_mapper)
df_neigh['I131_treatment'] = df_neigh['I131_treatment'].replace(ft_mapper)
df_neigh['Query_hypothyroid'] = df_neigh['Query_hypothyroid'].replace(ft_mapper)
df_neigh['Query_hyperthyroid'] = df_neigh['Query_hyperthyroid'].replace(ft_mapper)
df_neigh['Lithium'] = df_neigh['Lithium'].replace(ft_mapper)
df_neigh['Goitre'] = df_neigh['Goitre'].replace(ft_mapper)
df_neigh['Tumor'] = df_neigh['Tumor'].replace(ft_mapper)
df_neigh['Psych'] = df_neigh['Psych'].replace(ft_mapper)
df_neigh.head()

Unnamed: 0,Age,TSH,T3,TT4,T4U,FTI,Sex,On_thyroxine,Query_on_thyroxine,On_antithyroid_medication,Sick,Pregnant,Thyroid_surgery,I131_treatment,Query_hypothyroid,Query_hyperthyroid,Lithium,Goitre,Tumor,Psych
0,0.315479,0.006765,0.024367,0.085653,0.081722,0.105221,M,F,F,F,F,F,F,F,F,F,F,F,F,F
1,0.328172,0.004432,0.025129,0.083415,0.080221,0.101878,M,F,F,F,F,F,F,F,F,F,T,F,F,F
2,0.312417,-0.00157,0.025282,0.086259,0.083029,0.105445,M,F,F,F,F,F,F,F,F,F,F,F,F,F
3,0.283055,0.000322,0.02449,0.08937,0.081956,0.107511,F,T,F,F,F,F,F,F,F,F,F,F,F,F
4,0.316296,0.000567,0.025055,0.080091,0.080168,0.100684,M,F,F,F,F,F,F,F,F,F,F,F,F,F


In [22]:
" One hot encoding "
df_neigh = pd.get_dummies(df_neigh, prefix_sep='_', drop_first=True)
df_neigh.head()

Unnamed: 0,Age,TSH,T3,TT4,T4U,FTI,Sex_M,On_thyroxine_T,Query_on_thyroxine_T,On_antithyroid_medication_T,Sick_T,Pregnant_T,Thyroid_surgery_T,I131_treatment_T,Query_hypothyroid_T,Query_hyperthyroid_T,Lithium_T,Goitre_T,Tumor_T,Psych_T
0,0.315479,0.006765,0.024367,0.085653,0.081722,0.105221,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0.328172,0.004432,0.025129,0.083415,0.080221,0.101878,1,0,0,0,0,0,0,0,0,0,1,0,0,0
2,0.312417,-0.00157,0.025282,0.086259,0.083029,0.105445,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.283055,0.000322,0.02449,0.08937,0.081956,0.107511,0,1,0,0,0,0,0,0,0,0,0,0,0,0
4,0.316296,0.000567,0.025055,0.080091,0.080168,0.100684,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [23]:
" Store the neighbors in a list"

data_neigh = df_neigh.values
n = np.size(data_test,0)
list_neigh = []
j = 0
for i in range(0,n):
    list_neigh.append(data_neigh[j:(j+nb_neighbors),:])
    j += nb_neighbors

####  One hot encoding for the training and the test sets

In [24]:
data_train_df['Sex'] = data_train_df['Sex'].replace(sex_mapper)
data_train_df['On_thyroxine'] = data_train_df['On_thyroxine'].replace(ft_mapper)
data_train_df['Query_on_thyroxine'] = data_train_df['Query_on_thyroxine'].replace(ft_mapper)
data_train_df['On_antithyroid_medication'] = data_train_df['On_antithyroid_medication'].replace(ft_mapper)
data_train_df['Sick'] = data_train_df['Sick'].replace(ft_mapper)
data_train_df['Pregnant'] = data_train_df['Pregnant'].replace(ft_mapper)
data_train_df['Thyroid_surgery'] = data_train_df['Thyroid_surgery'].replace(ft_mapper)
data_train_df['I131_treatment'] = data_train_df['I131_treatment'].replace(ft_mapper)
data_train_df['Query_hypothyroid'] = data_train_df['Query_hypothyroid'].replace(ft_mapper)
data_train_df['Query_hyperthyroid'] = data_train_df['Query_hyperthyroid'].replace(ft_mapper)
data_train_df['Lithium'] = data_train_df['Lithium'].replace(ft_mapper)
data_train_df['Goitre'] = data_train_df['Goitre'].replace(ft_mapper)
data_train_df['Tumor'] = data_train_df['Tumor'].replace(ft_mapper)
data_train_df['Psych'] = data_train_df['Psych'].replace(ft_mapper)

In [25]:
data_train_df = pd.get_dummies(data_train_df, prefix_sep='_', drop_first=True)
data_train_df.head()

Unnamed: 0,Age,TSH,T3,TT4,T4U,FTI,Sex_M,On_thyroxine_T,Query_on_thyroxine_T,On_antithyroid_medication_T,Sick_T,Pregnant_T,Thyroid_surgery_T,I131_treatment_T,Query_hypothyroid_T,Query_hyperthyroid_T,Lithium_T,Goitre_T,Tumor_T,Psych_T
0,0.73,0.0006,0.015,0.12,0.082,0.146,1,1,0,0,0,0,0,1,0,0,0,0,0,0
1,0.24,0.00025,0.03,0.143,0.133,0.108,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0.47,0.0019,0.024,0.102,0.131,0.078,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.64,0.0009,0.017,0.077,0.09,0.085,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.23,0.00025,0.026,0.139,0.09,0.153,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
data_train = data_train_df.values
target_train = target_train_df.values

In [27]:
data_test_df['Sex'] = data_test_df['Sex'].replace(sex_mapper)
data_test_df['On_thyroxine'] = data_test_df['On_thyroxine'].replace(ft_mapper)
data_test_df['Query_on_thyroxine'] = data_test_df['Query_on_thyroxine'].replace(ft_mapper)
data_test_df['On_antithyroid_medication'] = data_test_df['On_antithyroid_medication'].replace(ft_mapper)
data_test_df['Sick'] = data_test_df['Sick'].replace(ft_mapper)
data_test_df['Pregnant'] = data_test_df['Pregnant'].replace(ft_mapper)
data_test_df['Thyroid_surgery'] = data_test_df['Thyroid_surgery'].replace(ft_mapper)
data_test_df['I131_treatment'] = data_test_df['I131_treatment'].replace(ft_mapper)
data_test_df['Query_hypothyroid'] = data_test_df['Query_hypothyroid'].replace(ft_mapper)
data_test_df['Query_hyperthyroid'] = data_test_df['Query_hyperthyroid'].replace(ft_mapper)
data_test_df['Lithium'] = data_test_df['Lithium'].replace(ft_mapper)
data_test_df['Goitre'] = data_test_df['Goitre'].replace(ft_mapper)
data_test_df['Tumor'] = data_test_df['Tumor'].replace(ft_mapper)
data_test_df['Psych'] = data_test_df['Psych'].replace(ft_mapper)

In [28]:
data_test_df = pd.get_dummies(data_test_df, prefix_sep='_', drop_first=True)
data_test_df.head()

Unnamed: 0,Age,TSH,T3,TT4,T4U,FTI,Sex_M,On_thyroxine_T,Query_on_thyroxine_T,On_antithyroid_medication_T,Sick_T,Pregnant_T,Thyroid_surgery_T,I131_treatment_T,Query_hypothyroid_T,Query_hyperthyroid_T,Lithium_T,Goitre_T,Tumor_T,Psych_T
2028,0.31,0.0022,0.025,0.085,0.081,0.105,1,0,0,0,0,0,0,0,0,0,0,0,0,0
361,0.34,0.00189,0.0206,0.11118,0.099,0.11207,1,0,0,0,0,0,0,0,0,0,0,0,0,0
973,0.47,0.00189,0.0206,0.11118,0.099,0.11207,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3607,0.61,0.001,0.014,0.118,0.102,0.116,1,0,0,0,1,0,0,0,0,0,0,0,0,0
3095,0.44,4e-05,0.0206,0.141,0.103,0.137,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [29]:
data_test = data_test_df.values
target_test = target_test_df.values

In [30]:
" Define the functions to save and load data "
import pickle
def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [31]:
'SAVE THE DATA'

path = './saved_data/'
save_obj(data_train, path + 'data_train')
save_obj(target_train, path + 'target_train')
save_obj(data_test, path  + 'data_test')
save_obj(target_test, path + 'target_test')
save_obj(list_neigh, path + 'list_neighbors')

## Training the models

In [32]:
" Logistic Regression : "
lr = LogisticRegression(class_weight = "balanced",random_state=0,max_iter = 1000)
model_lr = lr.fit(data_train,target_train)
target_pred_lr = model_lr.predict(data_test)

In [33]:
" Random Forest : "
rdclassifier = RandomForestClassifier(n_estimators=500,max_depth=3, random_state=0) 
model_rd = rdclassifier.fit(data_train,target_train)
target_pred_rd = model_rd.predict(data_test)

In [34]:
" SVM : "
clf = svm.SVC(class_weight = "balanced",probability=True,decision_function_shape='ovr')
model_svm = clf.fit(data_train, target_train)
target_pred_svm = model_svm.predict(data_test)

In [35]:
" Sklearn MLP Classifier : "
mlp = MLPClassifier(hidden_layer_sizes=(5,), max_iter=500,
                    solver='sgd', random_state=1,
                    learning_rate_init=.1)

model_nt = mlp.fit(data_train, target_train)
target_pred_mlp = model_nt.predict(data_test)

## Scores of the black box models 

In [36]:
print(f"{'The score of the logistic regression model is ' :<50}{': {}'.format(round(f1_score(target_test,target_pred_lr,average='macro'),4))}")
print(f"{'The score of the Random Forest  model is ' :<50}{': {}'.format(round(f1_score(target_test,target_pred_rd,average='macro'),4))}")
print(f"{'The score of the SVM model is ' :<50}{': {}'.format(round(f1_score(target_test,target_pred_svm,average='macro'),4))}")
print(f"{'The score of the Multi-Layer-Perceptron model is ' :<50}{': {}'.format(round(f1_score(target_test,target_pred_mlp,average='macro'),4))}")

The score of the logistic regression model is     : 0.5801
The score of the Random Forest  model is          : 0.6128
The score of the SVM model is                     : 0.4945
The score of the Multi-Layer-Perceptron model is  : 0.6475


## Execution of Split Based Selection Form Algorithm : 


In [37]:
split_point = len(numerical_cols)
nb_models = 100
(L_Subgroups,P) = SplitBasedSelectionForm (data_test, target_test, nb_models, model_nt, list_neigh,split_point,3)

In [38]:
'SAVE THE LIST OF THE SUBGROUPS'
save_obj(L_Subgroups, path + 'list_subgroups')

## Subgroups Descriptions

In [39]:
att_names = data_test_df.columns
patt_descriptions = patterns(P,split_point,data_test,att_names)

subrgoup 0
0.05 < TSH <= 0.5
Query_hypothyroid_T = 1
-------------------------------------------------------------------
subrgoup 1
0.03 < TSH <= 0.05
0.04 < FTI <= 0.08
-------------------------------------------------------------------
subrgoup 2
0.03 < TSH <= 0.05
0.0 < FTI <= 0.04
-------------------------------------------------------------------
subrgoup 3
0.03 < TSH <= 0.05
0.08 < FTI <= 0.55
0.02 < T3 <= 0.07
-------------------------------------------------------------------
subrgoup 4
0.01 < TSH <= 0.02
0.0 < FTI <= 0.09
Sex_M = 0
-------------------------------------------------------------------
subrgoup 5
0.02 < TSH <= 0.03
On_thyroxine_T = 1
0.0 < T3 <= 0.02
-------------------------------------------------------------------
subrgoup 6
0.02 < TSH <= 0.03
On_thyroxine_T = 0
0.09 < FTI <= 0.55
0.02 < T3 <= 0.07
-------------------------------------------------------------------
subrgoup 7
0.03 < TSH <= 0.05
0.08 < FTI <= 0.55
0.01 < T3 <= 0.02
------------------------------

0.01 < TSH <= 0.01
0.07 < TT4 <= 0.6
0.08 < FTI <= 0.11
0.57 < Age <= 0.94
-------------------------------------------------------------------
subrgoup 62
0.0 < TSH <= 0.0
0.03 < TT4 <= 0.07
0.09 < FTI <= 0.1
0.01 < Age <= 0.64
-------------------------------------------------------------------
subrgoup 63
0.01 < TSH <= 0.01
0.07 < TT4 <= 0.13
0.11 < FTI <= 0.55
0.01 < Age <= 0.2
-------------------------------------------------------------------
subrgoup 64
0.0 < TSH <= 0.01
0.07 < TT4 <= 0.13
0.11 < FTI <= 0.55
0.01 < Age <= 0.2
-------------------------------------------------------------------
subrgoup 65
0.0 < TSH <= 0.0
0.07 < TT4 <= 0.13
0.12 < FTI <= 0.55
0.01 < T3 <= 0.07
-------------------------------------------------------------------
subrgoup 66
0.0 < TSH <= 0.0
0.07 < TT4 <= 0.13
0.12 < FTI <= 0.55
0.0 < T3 <= 0.01
-------------------------------------------------------------------
subrgoup 67
0.0 < TSH <= 0.0
0.03 < TT4 <= 0.07
0.05 < FTI <= 0.07
0.01 < Age <= 0.64
----

In [40]:
'SAVE THE SUBGROUPS PATTERNS'
save_obj(patt_descriptions, path + 'patterns')
save_obj(att_names, path + 'att_names')