In [142]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score

# Titanic Dataset


In [143]:
data = pd.read_csv(r'https://raw.githubusercontent.com/OscarL7/Big-Homework-OSDA/main/titanic.csv')
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
columns_to_drop = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Fare', 'Embarked']
data = data.drop(columns=columns_to_drop)
mean_age = data['Age'].mean()

# Replace NaN values with the mean age
data['Age'].fillna(mean_age, inplace=True)

# Round the age values in the column
data['Age'] = data['Age'].round()

data['Survived'] = [x == 1 for x in data['Survived']]

data['child'] = data['Age'].apply(lambda x: 1 if 0 <= x <= 18 else 0)
data['adult'] = data['Age'].apply(lambda x: 1 if 19 <= x <= 76 else 0)



data['Class_1'] = data['Pclass'].apply(lambda x: 1 if x == 1 else 0)
data['Class_2'] = data['Pclass'].apply(lambda x: 1 if x == 2 else 0)
data['Class_3'] = data['Pclass'].apply(lambda x: 1 if x == 3 else 0)

bdata = data.drop(['Age', 'Pclass', 'SibSp', 'Parch'], axis=1)
bdata.head()

Unnamed: 0,Survived,Sex,child,adult,Class_1,Class_2,Class_3
0,False,0,0,1,0,0,1
1,True,1,0,1,0,0,1
2,False,0,0,1,0,1,0
3,False,0,0,1,0,0,1
4,True,1,0,1,0,0,1


In [144]:
nbdata = pd.read_csv(r'https://raw.githubusercontent.com/OscarL7/Big-Homework-OSDA/main/titanic.csv')
nbdata['Sex'] = nbdata['Sex'].map({'male': 0, 'female': 1})
nbdata['Embarked'] = nbdata['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
columns_to_drop = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Fare', 'Embarked', 'Age', 'Pclass', 'SibSp', 'Parch']
nbdata = nbdata.drop(columns=columns_to_drop)


In [145]:
x1 = bdata.drop('Survived', axis=1)
y1 = bdata['Survived']

x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.3, random_state=42)

In [146]:
#Binarized Binary Classifier

bin_cls = fcalc.classifier.BinarizedBinaryClassifier(x1_train.values, y1_train.to_numpy(), method="standard", alpha=0)

In [147]:
bin_cls.predict(x1_test.values)
print(bin_cls.predictions)

[0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0.
 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1. 1.
 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1.
 0. 0. 0. 0. 0. 0.]


In [148]:
print(accuracy_score(y1_test, bin_cls.predictions))
print(f1_score(y1_test, bin_cls.predictions)) 

0.9841269841269841
0.9761904761904763


## Pattern Binary Classifier

In [149]:
nbdata = pd.read_csv(r'https://raw.githubusercontent.com/OscarL7/Big-Homework-OSDA/main/titanic.csv')
nbdata['Sex'] = nbdata['Sex'].map({'male': 0, 'female': 1})
nbdata['Embarked'] = nbdata['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
columns_to_drop = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Fare', 'Embarked', 'Parch']
nbdata = nbdata.drop(columns=columns_to_drop)
mean_age = nbdata['Age'].mean()

# Replace NaN values with the mean age
nbdata['Age'].fillna(mean_age, inplace=True)

# Round the age values in the column
nbdata['Age'] = nbdata['Age'].round()

print(nbdata.head())

   Survived  Pclass  Sex   Age  SibSp
0         0       3    0  34.0      0
1         1       3    1  47.0      1
2         0       2    0  62.0      0
3         0       3    0  27.0      0
4         1       3    1  22.0      1


In [150]:
x1 = nbdata.drop('Survived', axis=1)
y1 = nbdata['Survived']

x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.3, random_state=42)        

In [151]:
# Pattern Binary Classifier

pat_cls = fcalc.classifier.PatternBinaryClassifier(x1_train.values, y1_train.to_numpy(), 
                                             categorical=np.arange(x1_train.shape[1]))   

In [152]:
pat_cls.predict(x1_test.values)

In [153]:
print(accuracy_score(y1_test, pat_cls.predictions))
print(f1_score(y1_test, pat_cls.predictions))

1.0
1.0


# Heart Disease Dataset

In [154]:
data2 = pd.read_csv(r'https://raw.githubusercontent.com/OscarL7/Big-Homework-OSDA/main/heart_diseases.csv')
columns_to_remove = ['FastingBS', 'RestingECG', 'ExerciseAngina', 'Oldpeak', 'ST_Slope', 'ChestPainType']
data2 = data2.drop(columns=columns_to_remove)
data2['Sex'] = data2['Sex'].replace({'M': 1, 'F': 0})

data2['RestingBP0_50'] = data2['RestingBP'].apply(lambda x: 1 if 0 <= x <= 50 else 0)
data2['RestingBP51_100'] = data2['RestingBP'].apply(lambda x: 1 if 51 <= x <= 100 else 0)
data2['RestingBP101_150'] = data2['RestingBP'].apply(lambda x: 1 if 101 <= x <= 150 else 0)
data2['RestingBP151_200'] = data2['RestingBP'].apply(lambda x: 1 if 151 <= x <= 200 else 0)

data2['Cholesterol0_200'] = data2['Cholesterol'].apply(lambda x: 1 if 0 <= x <= 200 else 0)
data2['Cholesterol201_240'] = data2['Cholesterol'].apply(lambda x: 1 if 201 <= x <= 240 else 0)
data2['Cholesterol241_603'] = data2['Cholesterol'].apply(lambda x: 1 if 241 <= x <= 603 else 0)

data2['MaxHR60_110'] = data2['MaxHR'].apply(lambda x: 1 if 60 <= x <= 110 else 0)
data2['MaxHR111_120'] = data2['MaxHR'].apply(lambda x: 1 if 111 <= x <= 120 else 0)
data2['MaxHR121_170'] = data2['MaxHR'].apply(lambda x: 1 if 121 <= x <= 170 else 0)
data2['MaxHR171_200'] = data2['MaxHR'].apply(lambda x: 1 if 171 <= x <= 200 else 0)

data2['Age28_44'] = data2['Age'].apply(lambda x: 1 if 28 <= x <= 44 else 0)
data2['Age45_61'] = data2['Age'].apply(lambda x: 1 if 45 <= x <= 61 else 0)
data2['Age61_77'] = data2['Age'].apply(lambda x: 1 if 61 <= x <= 77 else 0)

data2['HeartDisease'] = [x == 1 for x in data2['HeartDisease']]

bdata2 = data2.drop(['RestingBP', 'Cholesterol', 'MaxHR', 'Age'], axis=1)

bdata2.head()

Unnamed: 0,Sex,HeartDisease,RestingBP0_50,RestingBP51_100,RestingBP101_150,RestingBP151_200,Cholesterol0_200,Cholesterol201_240,Cholesterol241_603,MaxHR60_110,MaxHR111_120,MaxHR121_170,MaxHR171_200,Age28_44,Age45_61,Age61_77
0,1,False,0,0,1,0,0,0,1,0,0,0,1,1,0,0
1,0,True,0,0,0,1,1,0,0,0,0,1,0,0,1,0
2,1,False,0,0,1,0,0,0,1,1,0,0,0,1,0,0
3,0,True,0,0,1,0,0,1,0,1,0,0,0,0,1,0
4,1,False,0,0,1,0,1,0,0,0,0,1,0,0,1,0


In [155]:
x2 = bdata2.drop('HeartDisease', axis=1)
y2 = bdata2['HeartDisease']

x2_train, x2_test, y2_train, y2_test = train_test_split(x2, y2, test_size=0.3, random_state=42)

In [156]:
# Binarized Binary Classifier

bin_cls = fcalc.classifier.BinarizedBinaryClassifier(x2_train.values, y2_train.to_numpy(), method="standard",  alpha=0)

In [157]:
bin_cls.predict(x2_test.values)
print(bin_cls.predictions)

[0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1.
 1. 1. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1.
 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1.
 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 1.
 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 1. 1. 1.
 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1.
 1. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 1. 0. 1.
 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1.
 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0.]


In [158]:
print(accuracy_score(y2_test, bin_cls.predictions))
print(f1_score(y2_test, bin_cls.predictions))

0.6630434782608695
0.7335243553008597


# Pattern Binary Classifier

In [159]:
nbdata2 = pd.read_csv(r'https://raw.githubusercontent.com/OscarL7/Big-Homework-OSDA/main/heart_diseases.csv')
columns_to_remove = ['FastingBS', 'RestingECG', 'ExerciseAngina', 'Oldpeak', 'ST_Slope', 'ChestPainType']
nbdata2 = nbdata2.drop(columns=columns_to_remove)
nbdata2['Sex'] = nbdata2['Sex'].replace({'M': 1, 'F': 0})

x2 = bdata2.drop('HeartDisease', axis=1)
y2 = bdata2['HeartDisease']

x2_train, x2_test, y2_train, y2_test = train_test_split(x2, y2, test_size=0.3, random_state=42)

In [160]:
# Pattern Binary Classifier

pat_cls = fcalc.classifier.PatternBinaryClassifier(x2_train.values, y2_train.to_numpy(), method="ratio-support",  alpha=0)

In [161]:
pat_cls.predict(x2_test.values)
print(pat_cls.predict)

<bound method PatternBinaryClassifier.predict of <fcalc.classifier.PatternBinaryClassifier object at 0x00000262F9922160>>


In [162]:
print("accuracy:",round(accuracy_score(y2_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y2_test, pat_cls.predictions, average = 'macro'),4))

accuracy: 0.721
f1 score: 0.7086


# Breast Cancer Dataset

In [163]:
data3 = pd.read_csv(r'https://raw.githubusercontent.com/OscarL7/Big-Homework-OSDA/main/breast_cancer.csv')
data3['y'] = [x == 'M' for x in data3['y']]

data3['area_mean143_600'] = data3['x.area_mean'].apply(lambda x: 1 if 143 <= x <= 600.9 else 0)
data3['area_mean601_1200'] = data3['x.area_mean'].apply(lambda x: 1 if 601 <= x <= 1200.9 else 0)
data3['area_mean1201_1900'] = data3['x.area_mean'].apply(lambda x: 1 if 1201 <= x <= 1900.9 else 0)
data3['area_mean1901_2502'] = data3['x.area_mean'].apply(lambda x: 1 if 1901 <= x <= 2501 else 0)


data3['radius_mean6_14'] = data3['x.radius_mean'].apply(lambda x: 1 if 6.0 <= x <= 14.9 else 0)
data3['radius_mean15_22'] = data3['x.radius_mean'].apply(lambda x: 1 if 15.0 <= x <= 22.9 else 0)
data3['radius_mean23_29'] = data3['x.radius_mean'].apply(lambda x: 1 if 23.0 <= x <= 29.0 else 0)


data3['texture_mean9_19'] = data3['x.texture_mean'].apply(lambda x: 1 if 9.71 <= x <= 19.9 else 0)
data3['texture_mean20_29'] = data3['x.texture_mean'].apply(lambda x: 1 if 20.0 <= x <= 29.9 else 0)
data3['texture_mean30_39'] = data3['x.texture_mean'].apply(lambda x: 1 if 30.0 <= x <= 39.28 else 0)

data3['concavity_mean'] = data3['x.concavity_mean'].apply(lambda x: 1 if 0.0 <= x <= 0.22 else 0)
data3['concavity_mean'] = data3['x.concavity_mean'].apply(lambda x: 1 if 0.23 <= x <= 0.42 else 0)

bdata3 = data3.drop(['Unnamed: 0', 'x.radius_mean', 'x.texture_mean', 'x.perimeter_mean',
       'x.area_mean', 'x.smoothness_mean', 'x.compactness_mean',
       'x.concavity_mean', 'x.concave_pts_mean', 'x.symmetry_mean',
       'x.fractal_dim_mean', 'x.radius_se', 'x.texture_se', 'x.perimeter_se',
       'x.area_se', 'x.smoothness_se', 'x.compactness_se', 'x.concavity_se',
       'x.concave_pts_se', 'x.symmetry_se', 'x.fractal_dim_se',
       'x.radius_worst', 'x.texture_worst', 'x.perimeter_worst',
       'x.area_worst', 'x.smoothness_worst', 'x.compactness_worst',
       'x.concavity_worst', 'x.concave_pts_worst', 'x.symmetry_worst',
       'x.fractal_dim_worst'], axis=1)



In [164]:
x3 = bdata3.drop('y', axis=1)
y3 = bdata3['y']

x3_train, x3_test, y3_train, y3_test = train_test_split(x3, y3, test_size=0.3, random_state=42)

In [192]:
# Binarized Binary Classifier

bin_cls = fcalc.classifier.BinarizedBinaryClassifier(x3_train.values, y3_train.to_numpy(), method="standard",  alpha=0)

In [193]:
bin_cls.predict(x3_test.values)
print(bin_cls.predictions)

[0. 0. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0.
 1. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.
 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 1.
 1. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1.
 0. 1. 0.]


In [194]:
print(accuracy_score(y3_test, bin_cls.predictions))
print(f1_score(y3_test, bin_cls.predictions))

0.8771929824561403
0.8173913043478261


### Pattern Binary Classifier

In [None]:
nbdata3 = pd.read_csv(r'https://raw.githubusercontent.com/OscarL7/Big-Homework-OSDA/main/breast_cancer.csv')
nbdata3['y'] = [x == 'M' for x in nbdata3['y']]

x3 = nbdata3.drop('y', axis=1)
y3 = nbdata3['y']

x3_train, x3_test, y3_train, y3_test = train_test_split(x3, y3, test_size=0.3, random_state=42)

In [None]:
# Pattern Binary Classifier
pat_cls = fcalc.classifier.PatternBinaryClassifier(x3_train.values, y3_train.to_numpy(), 
                                             categorical=np.arange(x3_train.shape[1]), method="ratio-support",  alpha=1)

In [None]:
pat_cls.predict(x3_test.values)

In [None]:
print(accuracy_score(y3_test, pat_cls.predictions))
print(f1_score(y3_test, pat_cls.predictions, average = 'macro'))

0.6842105263157895
0.679375
