In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OrdinalEncoder, Binarizer
from imblearn.over_sampling import SMOTENC, SMOTEN, SMOTE
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV 
from gen_ball_mix import gen_balls
from classGBTSVM import OvO_GBTSVM , MultiLabelGBTSVM, GBTSVM, OVR_GBTSVM
from classLGBTSVM import OvO_LGBTSVM, OvR_LGBTSVM, LGBTSVM
from ucimlrepo import fetch_ucirepo 
from scipy.stats import zscore

In [18]:
def evaluate_base_model(model_class, train_data, test_data, *params):
    model = model_class(*params)
    model.fit(train_data)
    y_pred = model.predict(test_data)
    accuracy = model.score(test_data)
    return accuracy

In [19]:
def evaluate_model(model_class, X_train, y_train, X_test, y_test, *params):
    model = model_class(*params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = model.score(X_test, y_test)
    return accuracy

## Breast Cancer

In [4]:
breast_cancer = fetch_ucirepo(id=17)           
X = breast_cancer.data.features                 
y = breast_cancer.data.targets.squeeze()        

df = pd.concat([X, y], axis=1)

In [5]:
breast_cancer = fetch_ucirepo(id=17)           
X = breast_cancer.data.features                 
y = breast_cancer.data.targets.squeeze()        

df = pd.concat([X, y], axis=1)                  
target_col = y.name

enc_target = LabelEncoder()
df[target_col] = enc_target.fit_transform(df[target_col]).astype(np.float64)

df_decile = df.copy()
for col in X.columns:                         
    df_decile[col] = pd.qcut(
        df_decile[col],
        q=10,
        labels=list(range(10)),
        duplicates="drop"
    )

df_mix = pd.concat(
    [X,                         
     df_decile.add_suffix("_d")
    ],
    axis=1
)
y_mix = df[target_col]

n_num = X.shape[1]               
cat_idx = list(range(n_num, 2*n_num))     

smote = SMOTENC(
    categorical_features=cat_idx,
    random_state=42,
    sampling_strategy="auto"        # cân bằng hoàn toàn
)
X_res, y_res = smote.fit_resample(df_mix, y_mix)

df_balanced = pd.DataFrame(X_res, columns=df_mix.columns)
df_balanced[target_col] = y_res.reset_index(drop=True)


In [6]:
num_cols = X.columns.tolist()                
extra_label_col = f"{target_col}_d"          
cat_cols = [c for c in df_balanced.columns   
            if c.endswith("_d") and c != extra_label_col]
df_cat_only = df_balanced[cat_cols + [target_col]].copy()


In [7]:
data = df_cat_only.to_numpy(dtype=np.float64)
m,n = data.shape

In [8]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [9]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [10]:
pur = 1 - (0.015 * 5)                      
num = 4
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [11]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [12]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_base_model(OvO_LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovr_lgbtsvm = evaluate_base_model(OvR_LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)

In [13]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")
print(f"Accuracy of OvR_LGBTSVM: {accuracy_ovr_lgbtsvm}")

Accuracy of GBTSVM: 53.84615384615385
Accuracy of LGBTSVM: 34.26573426573427
Accuracy of OvO_GBTSVM: 97.9020979020979
Accuracy of OVR_GBTSVM: 97.9020979020979
Accuracy of MultiLabelGBTSVM: 97.9020979020979
Accuracy of OvO_LGBTSVM: 72.72727272727273
Accuracy of OvR_LGBTSVM: 72.72727272727273


## Balance

In [9]:
balance_scale = fetch_ucirepo(id=12) 
X = balance_scale.data.features 
y = balance_scale.data.targets.squeeze()        

In [10]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

In [11]:
df = pd.concat([X_res, y_res], axis=1)
target_col = y.name
enc_target = LabelEncoder()
df[target_col] = enc_target.fit_transform(df[target_col]).astype(np.float64)

In [12]:
data = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [13]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [14]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [15]:
pur = 1 - (0.015 * 5)                      
num = 2
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [16]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [20]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_base_model(OvO_LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)

In [21]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

Accuracy of GBTSVM: 14.450867052023122
Accuracy of LGBTSVM: 32.947976878612714
Accuracy of OvO_GBTSVM: 84.97109826589595
Accuracy of OVR_GBTSVM: 18.497109826589593
Accuracy of MultiLabelGBTSVM: 47.97687861271676
Accuracy of OvO_LGBTSVM: 73.98843930635837


## Dermatology

In [None]:
dermatology = fetch_ucirepo(id=33) 
X = dermatology.data.features 
y = dermatology.data.targets.squeeze()

In [None]:
X_df = pd.DataFrame(X)
X_clean = X_df.dropna(subset=['age'])
y_clean = y[X_clean.index]

In [None]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_clean, y_clean)

In [None]:
df = pd.concat([X_res, y_res], axis=1)
data = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [None]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [None]:
pur = 1 - (0.015 * 5)                      
num = 2
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [None]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [None]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_model(OvO_LGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.1, 0.1)

In [None]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

## Ecoli

In [None]:
ecoli = fetch_ucirepo(id=39) 
X = ecoli.data.features 
y = ecoli.data.targets.squeeze()

In [None]:
smote = SMOTE(random_state=42, k_neighbors=1)
X_res, y_res = smote.fit_resample(X, y)

In [None]:
df = pd.concat([X_res, y_res], axis=1)
target_col = y.name
enc_target = LabelEncoder()
df[target_col] = enc_target.fit_transform(df[target_col]).astype(np.float64)

In [None]:
data = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [None]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [None]:
pur = 1 - (0.015 * 5)                      
num = 2
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [None]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [None]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_model(LGBTSVM_OvO, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.1, 0.1)

In [None]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

## Glass

In [4]:
glass_identification = fetch_ucirepo(id=42) 
X = glass_identification.data.features 
y = glass_identification.data.targets.squeeze()

In [5]:
X_df = pd.DataFrame(X)
columns = X_df.columns
z_scores = np.abs(zscore(X_df[columns]))
threshold = 3
X_clean = X_df[(z_scores < threshold).all(axis=1)]
y_clean = y[X_clean.index]

In [6]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_clean, y_clean)

In [7]:
df = pd.concat([X_res, y_res], axis=1)
data = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [8]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [9]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [10]:
pur = 1 - (0.015 * 5)                      
num = 2
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [11]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [13]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_base_model(OvO_LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)

In [14]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

Accuracy of GBTSVM: 2.380952380952381
Accuracy of LGBTSVM: 0.0
Accuracy of OvO_GBTSVM: 80.95238095238095
Accuracy of OVR_GBTSVM: 44.047619047619044
Accuracy of MultiLabelGBTSVM: 55.952380952380956
Accuracy of OvO_LGBTSVM: 60.71428571428571


## Hayes-roth

In [6]:
hayes_roth = fetch_ucirepo(id=44) 
X = hayes_roth.data.features 
y = hayes_roth.data.targets.squeeze()

In [7]:
df = pd.concat([X, y], axis=1)

In [8]:
Q1 = np.percentile(X, 25, axis=0)
Q3 = np.percentile(X, 75, axis=0)
IQR = Q3 - Q1
mask_outlier = np.all((X >= Q1 - 1.5*IQR) & (X <= Q3 + 1.5*IQR), axis=1)

X = X[mask_outlier]
y = y[mask_outlier]

# Loại bỏ NaN trong y
mask_nan = ~np.isnan(y) if y.dtype.kind in ['f', 'i'] else (y != '') & (y != None)
X = X[mask_nan]
y = y[mask_nan]

## Image Segmentation

In [None]:
image_segmentation = fetch_ucirepo(id=50) 
X = image_segmentation.data.features 
y = image_segmentation.data.targets.squeeze()

In [None]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

In [None]:
df = pd.concat([X_res, y_res], axis=1)
target_col = y.name
enc_target = LabelEncoder()
df[target_col] = enc_target.fit_transform(df[target_col]).astype(np.float64)

In [None]:
data = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [None]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [None]:
pur = 1 - (0.015 * 5)                      
num = 2
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [None]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [None]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_model(OvO_LGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.1, 0.1)

In [None]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

## Iris

In [37]:
iris = fetch_ucirepo(id=53) 
X = iris.data.features 
y = iris.data.targets.squeeze()

In [38]:
df = pd.concat([X, y], axis=1)
target_col = y.name
enc_target = LabelEncoder()
df[target_col] = enc_target.fit_transform(df[target_col]).astype(np.float64)

In [None]:
# for col in X.columns:
#     df[col+'_binned'] = pd.qcut(df[col], q=3, labels=list(range(3)), duplicates='drop')
#     df.drop(columns=[col], inplace=True)

In [39]:
data = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [40]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [41]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [42]:
pur = 1 - (0.015 * 5)                      
num = 2
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [43]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [44]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_base_model(OvO_LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)

In [45]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

Accuracy of GBTSVM: 36.666666666666664
Accuracy of LGBTSVM: 0.0
Accuracy of OvO_GBTSVM: 96.66666666666667
Accuracy of OVR_GBTSVM: 73.33333333333333
Accuracy of MultiLabelGBTSVM: 73.33333333333333
Accuracy of OvO_LGBTSVM: 93.33333333333333


## Teaching Evaluation

In [122]:
import pandas as pd
tae_data_path = 'Dataset/tae/tae.data'
df = pd.read_csv(tae_data_path, header=None, sep=',', engine='python')
num_columns_in_data = len(df.columns)
column_names = [f"X{i+1}" for i in range(num_columns_in_data)]
df.columns = column_names
# X  = df.iloc[:,:-1]
# y = df.iloc[:,-1]

In [123]:
features = ['X2', 'X3', 'X5']
quantiles = df[features].quantile([0.25, 0.75])
Q1 = quantiles.loc[0.25]
Q3 = quantiles.loc[0.75]
df_cleaned = df[~((df[features] < (Q1 - 1.5*(Q3-Q1))) | 
                  (df[features] > (Q3 + 1.5*(Q3-Q1)))).any(axis=1)]

In [124]:
X = df_cleaned.iloc[:,:-1]
y = df_cleaned.iloc[:,-1]

In [125]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
df = pd.concat([X_res, y_res], axis=1)

In [126]:
data = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [127]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [128]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [129]:
pur = 1 - (0.015 * 5)                      
num = 2
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [130]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [131]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_base_model(OvO_LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)

In [132]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

Accuracy of GBTSVM: 21.875
Accuracy of LGBTSVM: 12.5
Accuracy of OvO_GBTSVM: 46.875
Accuracy of OVR_GBTSVM: 34.375
Accuracy of MultiLabelGBTSVM: 65.625
Accuracy of OvO_LGBTSVM: 6.25


## Global  Cancer

In [181]:
# Load dataset
df = pd.read_csv("Dataset/globalcancer.csv", sep=",")
df.drop(columns=['Patient_ID', 'Gender', 'Year', 'Cancer_Type', 'Country_Region'], inplace=True)
cancer_stage_col = df.pop('Cancer_Stage')
df['Cancer_Stage'] = cancer_stage_col
target_col = ['Cancer_Stage']
enc_target = OrdinalEncoder()
df[target_col] = enc_target.fit_transform(df[target_col]).astype(np.float64)
changed_col = ['Age', 'Genetic_Risk', 'Air_Pollution', 'Alcohol_Use', 'Smoking',
               'Obesity_Level', 'Treatment_Cost_USD', 'Survival_Years', 'Target_Severity_Score']

for col in changed_col:
    df[col] = pd.qcut(df[col], q=8, duplicates="drop").cat.codes.astype(np.float64)
df['Cancer_Stage_Binary'] = df['Cancer_Stage'].apply(lambda x: 0 if x <= 1 else 1)
df.drop(columns=['Cancer_Stage'], inplace=True)

In [182]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
smote = SMOTE(random_state=42)
X_res, y_res = SMOTE().fit_resample(X, y)
df = pd.concat([X_res, y_res], axis=1)

In [183]:
data  = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [184]:
data = data[0:int(m*(1-0.98))]
m,n = data.shape

In [175]:
scaler = MinMaxScaler(feature_range=(-1, 1))
data[:, :-1] = scaler.fit_transform(data[:, :-1])

In [185]:
np.random.seed(0)
indices = np.random.permutation(m)
data = data[indices]
A_train=data[0:int(m*(1-0.20))]
A_test=data[int(m * (1-0.20)):]

In [186]:
pur = 1 - (0.015 * 15)                      
num = 1
A_train = gen_balls(A_train, pur=pur, delbals=num)

Radius=[]
for i in A_train:
    Radius.append(i[1])
Center=[]
for ii in A_train:
    Center.append(ii[0])
Label=[]
for iii in A_train:
    Label.append(iii[-1])
Radius=np.array(Radius)
Center=np.array(Center)
Label=np.array(Label)
Z_train=np.hstack((Center,Radius.reshape(Radius.shape[0], 1)))
Lab=Label.reshape(Label.shape[0], 1)
A_train=np.hstack((Z_train,Lab))

In [187]:
X_train = A_train[:,:-1]
X_test = A_test[:,:-1]
y_train = A_train[:,-1]
y_test = A_test[:,-1]

In [188]:
accuracy_gbtsvm = evaluate_base_model(GBTSVM, A_train, A_test, 0.1, 0.1, 0.05, 0.05)
accuracy_lgbtsvm = evaluate_base_model(LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)
accuracy_ovo_gbtsvm = evaluate_model(OvO_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovr_gbtsvm = evaluate_model(OVR_GBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_multi_gbtsvm = evaluate_model(MultiLabelGBTSVM, X_train, y_train, X_test, y_test, 0.1, 0.1, 0.05, 0.05)
accuracy_ovo_lgbtsvm = evaluate_base_model(OvO_LGBTSVM, A_train, A_test, 0.1, 0.1, 0.1, 0.1)

In [189]:
print(f"Accuracy of GBTSVM: {accuracy_gbtsvm}")
print(f"Accuracy of LGBTSVM: {accuracy_lgbtsvm}")
print(f"Accuracy of OvO_GBTSVM: {accuracy_ovo_gbtsvm}")
print(f"Accuracy of OVR_GBTSVM: {accuracy_ovr_gbtsvm}")
print(f"Accuracy of MultiLabelGBTSVM: {accuracy_multi_gbtsvm}")
print(f"Accuracy of OvO_LGBTSVM: {accuracy_ovo_lgbtsvm}")

Accuracy of GBTSVM: 50.20746887966805
Accuracy of LGBTSVM: 46.88796680497925
Accuracy of OvO_GBTSVM: 61.82572614107884
Accuracy of OVR_GBTSVM: 61.82572614107884
Accuracy of MultiLabelGBTSVM: 61.82572614107884
Accuracy of OvO_LGBTSVM: 55.18672199170125


## Seeds

In [28]:
seed =  70
df = pd.read_csv('Dataset/seeds_dataset.txt', delim_whitespace=True, header=None)
df.columns = [
    'Area', 
    'Perimeter', 
    'Compactness', 
    'KernelLength', 
    'KernelWidth', 
    'AsymmetryCoeff', 
    'KernelGrooveLength', 
    'Class'
]
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
mask = ~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)
df_clean = df[mask]

In [29]:
X = df_clean.iloc[:,:-1]
y = df_clean.iloc[:,-1]
smote = SMOTE(random_state=42)
X_res, y_res = SMOTE().fit_resample(X, y)
df = pd.concat([X_res, y_res], axis=1)

In [30]:
data  = df.to_numpy(dtype=np.float64)
m,n = data.shape

In [81]:
data = {
    "Dataset": [
        "Balance", "Dermatology", "Ecoli", "Glass", "Hayes-roth", 
        "Iris", "Image-segmentation", "Seeds", "Teaching Evaluation"
    ],
    "#Instances": [625, 358, 327, 214, 132, 150, 210, 210, 151],
    "#Features": [4, 4, 7, 9, 5, 4, 19, 7, 5],
    "#Classes": [3, 6, 5, 6, 3, 3, 7, 3, 3]
}

df = pd.DataFrame(data)

# Lưu DataFrame thành file Excel
excel_path = "Dataset/datasets_summary.xlsx"
df.to_excel(excel_path, index=False)