In [1]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Binarized data

In [2]:
column_names = [
        'top-left-square', 'top-middle-square', 'top-right-square',
        'middle-left-square', 'middle-middle-square', 'middle-right-square',
        'bottom-left-square', 'bottom-middle-square', 'bottom-right-square',
        'Class'
    ]
df = pd.read_csv('data_sets/tic-tac-toe.data', names = column_names)
df['Class'] = [x == 'positive' for x in df['Class']]
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,True
1,x,x,x,x,o,o,o,x,o,True
2,x,x,x,x,o,o,o,o,x,True
3,x,x,x,x,o,o,o,b,b,True
4,x,x,x,x,o,o,b,o,b,True


In [3]:
X = pd.get_dummies(df[column_names[:-1]], prefix=column_names[:-1]).astype(bool)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [4]:
bin_cls = fcalc.classifier.BinarizedBinaryClassifier(X_train.values, y_train.to_numpy(), method="standard-support")

In [5]:
bin_cls.predict(X_test.values)

In [6]:
from sklearn.metrics import accuracy_score, f1_score

print(accuracy_score(y_test, bin_cls.predictions))
print(f1_score(y_test, bin_cls.predictions))

0.9965277777777778
0.9974160206718347


# Pattern structures

In [7]:
column_names = [
        'top-left-square', 'top-middle-square', 'top-right-square',
        'middle-left-square', 'middle-middle-square', 'middle-right-square',
        'bottom-left-square', 'bottom-middle-square', 'bottom-right-square',
        'Class'
    ]
df = pd.read_csv('data_sets/tic-tac-toe.data', names = column_names)
df['Class'] = [x == 'positive' for x in df['Class']]
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,True
1,x,x,x,x,o,o,o,x,o,True
2,x,x,x,x,o,o,o,o,x,True
3,x,x,x,x,o,o,o,b,b,True
4,x,x,x,x,o,o,b,o,b,True


In [8]:
X = df[column_names[:-1]]
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [9]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy(), 
                                             categorical=np.arange(X_train.shape[1]))

In [10]:
pat_cls.predict(X_test.values)

In [11]:
from sklearn.metrics import accuracy_score, f1_score

print(accuracy_score(y_test, pat_cls.predictions))
print(f1_score(y_test, pat_cls.predictions))

0.9930555555555556
0.9948453608247423


In [12]:
df = pd.read_csv('data_sets/iris.data', names=['sepal_length',	'sepal_width',	'petal_length',	'petal_width','species'])
df['species'] = [x == 'Iris-setosa' for x in df['species']]
df.sample(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
102,7.1,3.0,5.9,2.1,False
12,4.8,3.0,1.4,0.1,True
141,6.9,3.1,5.1,2.3,False
119,6.0,2.2,5.0,1.5,False
79,5.7,2.6,3.5,1.0,False
2,4.7,3.2,1.3,0.2,True
140,6.7,3.1,5.6,2.4,False
18,5.7,3.8,1.7,0.3,True
53,5.5,2.3,4.0,1.3,False
68,6.2,2.2,4.5,1.5,False


In [13]:
X = df.iloc[:,:-1]
y = df['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [14]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy())

In [15]:
pat_cls.predict(X_test.values)

In [16]:
from sklearn.metrics import accuracy_score, f1_score
print("accuracy:",round(accuracy_score(y_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y_test, pat_cls.predictions),4))

accuracy: 1.0
f1 score: 1.0


In [17]:
df = pd.read_csv('data_sets/heart_failure_clinical_records_dataset.csv')
df.sample(5)

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
60,45.0,0,7702,1,25,1,390000.0,1.0,139,1,0,60,1
198,50.0,1,582,1,20,1,279000.0,1.0,134,0,0,186,0
59,72.0,0,364,1,20,1,254000.0,1.3,136,1,1,59,1
297,45.0,0,2413,0,38,0,140000.0,1.4,140,1,1,280,0
289,90.0,1,337,0,38,0,390000.0,0.9,144,0,0,256,0


In [18]:
X = df.iloc[:,:-1]
y = df['DEATH_EVENT']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [19]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy(), 
                                                   categorical=np.array([1,3,5,9,10]))

In [20]:
pat_cls.predict(X_test.values)

In [22]:
from sklearn.metrics import accuracy_score, f1_score
print("accuracy:",round(accuracy_score(y_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y_test, pat_cls.predictions,average='macro'),4))

accuracy: 0.6778
f1 score: 0.4268
