In [1]:
from kss import KSS
from data_utils import *
from sklearn.metrics import classification_report, confusion_matrix  
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

In [2]:
MF = ['SEP','COH','WPC','CC']

In [3]:
features = []
labels = []
names = []

datasets = [Ecoli(),Glass(),Haberman(),Ionosphere(),Iris(),Pima(),Sonar(),Thyroid(),Vehicle(),WDBC(),Wine()]
for data in datasets:
    f,l = data.get_data()
    features.append(f)
    labels.append(l)
    names.append(type(data).__name__)

In [19]:
results ={}
j=0

for i in range(len(names)):
    feature = features[i].to_numpy()
    label = labels[i].to_numpy()
    name = names[i]
    
    kf = KFold(n_splits=10, random_state=None, shuffle=True)
    k_fold_result = list(kf.split(feature,label))
    
    for mf_type in MF:
        f1_score = 0
        for train_index, test_index in k_fold_result:
            X_train, X_test = feature[train_index], feature[test_index]
            y_train, y_test = label[train_index], label[test_index]
            
            kss = KSS(7)
            kss.fit(X_train,y_train,mf_type)
            y_pred = kss.predict(X_test)

            f1_score += classification_report(y_test, y_pred,output_dict=True)['macro avg']['f1-score']
        results[j] = [name,mf_type,f1_score/10]
        j+=1

In [85]:
df = pd.DataFrame.from_dict(results,orient='index',columns=['Data sets','MF', "F1-Score"])

In [86]:
df

Unnamed: 0,Data sets,MF,F1-Score
0,Ecoli,SEP,0.741764
1,Ecoli,COH,0.656541
2,Ecoli,WPC,0.703075
3,Ecoli,CC,0.746433
4,Glass,SEP,0.919493
5,Glass,COH,0.902119
6,Glass,WPC,0.902119
7,Glass,CC,0.915938
8,Haberman,SEP,0.544324
9,Haberman,COH,0.569629


In [87]:
df_SEP = df.loc[df['MF']=='SEP']
df_COH = df.loc[df['MF']=='COH']
df_WPC = df.loc[df['MF']=='WPC']
df_CC = df.loc[df['MF']=='CC']

In [88]:
df_SEP = df_SEP.rename(columns={'F1-Score':'SEP-kSS'})
df_SEP = df_SEP.set_index('Data sets')
df_SEP = df_SEP.drop('MF', 1)


df_COH = df_COH.rename(columns={'F1-Score':'COH-kSS'})
df_COH = df_COH.set_index('Data sets')
df_COH = df_COH.drop('MF', 1)

df_WPC = df_WPC.rename(columns={'F1-Score':'WPC-kSS'})
df_WPC = df_WPC.set_index('Data sets')
df_WPC = df_WPC.drop('MF', 1)

df_CC = df_CC.rename(columns={'F1-Score':'CC-kSS'})
df_CC = df_CC.set_index('Data sets')
df_CC = df_CC.drop('MF', 1)


In [89]:
df_organized = pd.concat([df_CC,df_WPC,df_COH,df_SEP],axis=1)

df_organized.loc['MEAN'] = df_organized.mean()

df_organized = df_organized.round(4)

df_organized


Unnamed: 0_level_0,CC-kSS,WPC-kSS,COH-kSS,SEP-kSS
Data sets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ecoli,0.7464,0.7031,0.6565,0.7418
Glass,0.9159,0.9021,0.9021,0.9195
Haberman,0.5665,0.6431,0.5696,0.5443
Ionosphere,0.7549,0.9324,0.8196,0.6655
Iris,0.9532,0.9537,0.9537,0.9537
Pima,0.6764,0.6638,0.6805,0.6834
Sonar,0.7425,0.7886,0.771,0.7547
Thyroid,0.8595,0.8811,0.886,0.8649
Vehicle,0.5949,0.629,0.6254,0.6193
WDBC,0.669,0.7209,0.6869,0.6904


In [90]:
print(df_organized.to_csv(index=True))

Data sets,CC-kSS,WPC-kSS,COH-kSS,SEP-kSS
Ecoli,0.7464,0.7031,0.6565,0.7418
Glass,0.9159,0.9021,0.9021,0.9195
Haberman,0.5665,0.6431,0.5696,0.5443
Ionosphere,0.7549,0.9324,0.8196,0.6655
Iris,0.9532,0.9537,0.9537,0.9537
Pima,0.6764,0.6638,0.6805,0.6834
Sonar,0.7425,0.7886,0.771,0.7547
Thyroid,0.8595,0.8811,0.886,0.8649
Vehicle,0.5949,0.629,0.6254,0.6193
WDBC,0.669,0.7209,0.6869,0.6904
Wine,0.6964,0.7063,0.7155,0.7096
MEAN,0.7432,0.7749,0.7515,0.7406

