In [1]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Binarized data

In [2]:
column_names = [
        'top-left-square', 'top-middle-square', 'top-right-square',
        'middle-left-square', 'middle-middle-square', 'middle-right-square',
        'bottom-left-square', 'bottom-middle-square', 'bottom-right-square',
        'Class'
    ]
df = pd.read_csv('data_sets/tic-tac-toe.data', names = column_names)
df['Class'] = [x == 'positive' for x in df['Class']]
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,True
1,x,x,x,x,o,o,o,x,o,True
2,x,x,x,x,o,o,o,o,x,True
3,x,x,x,x,o,o,o,b,b,True
4,x,x,x,x,o,o,b,o,b,True


In [16]:
X = pd.get_dummies(df[column_names[:-1]], prefix=column_names[:-1]).astype(bool)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [20]:
bin_cls = fcalc.classifier.BinarizedClassifier(X_train.values, y_train.to_numpy())

In [21]:
bin_cls.predict(X_test.values)

In [22]:
from sklearn.metrics import accuracy_score, f1_score

print(accuracy_score(y_test, bin_cls.predictions))
print(f1_score(y_test, bin_cls.predictions))

0.9965277777777778
0.9974160206718347


# Pattern structures

In [7]:
column_names = [
        'top-left-square', 'top-middle-square', 'top-right-square',
        'middle-left-square', 'middle-middle-square', 'middle-right-square',
        'bottom-left-square', 'bottom-middle-square', 'bottom-right-square',
        'Class'
    ]
df = pd.read_csv('data_sets/tic-tac-toe.data', names = column_names)
df['Class'] = [x == 'positive' for x in df['Class']]
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,True
1,x,x,x,x,o,o,o,x,o,True
2,x,x,x,x,o,o,o,o,x,True
3,x,x,x,x,o,o,o,b,b,True
4,x,x,x,x,o,o,b,o,b,True


In [8]:
X = df[column_names[:-1]]
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [9]:
pat_cls = fcalc.classifier.PatternClassifier(X_train.values, y_train.to_numpy(), 
                                             categorical=np.arange(X_train.shape[1]))

In [11]:
pat_cls.predict(X_test.values)

categorical case


In [13]:
from sklearn.metrics import accuracy_score, f1_score

print(accuracy_score(y_test, pat_cls.predictions))
print(f1_score(y_test, pat_cls.predictions))

0.9965277777777778
0.9974160206718347


In [2]:
df = pd.read_csv('data_sets/iris.data', names=['sepal_length',	'sepal_width',	'petal_length',	'petal_width','species'])
df['species'] = [x == 'Iris-setosa' for x in df['species']]
df.sample(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
112,6.8,3.0,5.5,2.1,False
117,7.7,3.8,6.7,2.2,False
140,6.7,3.1,5.6,2.4,False
54,6.5,2.8,4.6,1.5,False
32,5.2,4.1,1.5,0.1,True
65,6.7,3.1,4.4,1.4,False
144,6.7,3.3,5.7,2.5,False
31,5.4,3.4,1.5,0.4,True
91,6.1,3.0,4.6,1.4,False
105,7.6,3.0,6.6,2.1,False


In [3]:
X = df.iloc[:,:-1]
y = df['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
pat_cls = fcalc.classifier.PatternClassifier(X_train.values, y_train.to_numpy())

In [5]:
pat_cls.predict(X_test.values)

In [10]:
from sklearn.metrics import accuracy_score, f1_score
print("accuracy:",round(accuracy_score(y_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y_test, pat_cls.predictions),4))

accuracy: 0.9556
f1 score: 0.9474


In [2]:
df = pd.read_csv('data_sets/heart_failure_clinical_records_dataset.csv')
df.sample(5)

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
252,50.0,0,245,0,45,1,274000.0,1.0,133,1,0,215,0
267,56.0,1,135,1,38,0,133000.0,1.7,140,1,0,244,0
109,45.0,0,292,1,35,0,850000.0,1.3,142,1,1,88,0
283,65.0,0,1688,0,38,0,263358.03,1.1,138,1,1,250,0
202,70.0,0,97,0,60,1,220000.0,0.9,138,1,0,186,0


In [3]:
X = df.iloc[:,:-1]
y = df['DEATH_EVENT']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
pat_cls = fcalc.classifier.PatternClassifier(X_train.values, y_train.to_numpy(), 
                                             categorical=np.array([1,3,5,9,10]))

In [5]:
pat_cls.predict(X_test.values)

In [6]:
from sklearn.metrics import accuracy_score, f1_score
print("accuracy:",round(accuracy_score(y_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y_test, pat_cls.predictions),4))

accuracy: 0.4222
f1 score: 0.5873
