## Importing libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.stats import hmean
import math
import seaborn as sn
import random
from scipy import ndarray
import pickle

In [3]:
import smote_variants as sv
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, SVMSMOTE 

Using TensorFlow backend.


In [4]:
%config InlineBackend.figure_format = 'svg'

In [5]:
palette = ['paleturquoise','c']

## Defining Necessary functions

In [6]:
def stats(arr):
    'Summarizing final results'
    x=np.mean(np.asarray(arr), axis = 0)
    y=np.std(np.asarray(arr), axis = 0)
    return(x,y)

In [7]:
def unison_shuffled_copies(a, b,seed_perm):
    'Shuffling the feature matrix along with the labels with same order'
    np.random.seed(seed_perm)##change seed 1,2,3,4,5
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [8]:
def OVS(training_data,training_labels,neb):
    'Generating oversampled data for several models using smote_variants library'
    import warnings
    warnings.filterwarnings("ignore")
    
    smote_polynom_fit_SMOTE = sv.polynom_fit_SMOTE(random_state=seed_perm)
    SMOTE_feat_polynom_fit_SMOTE, SMOTE_labels_polynom_fit_SMOTE = smote_polynom_fit_SMOTE.sample(training_data, training_labels)
    
    smote_ProWSyn = sv.ProWSyn(random_state=seed_perm, n_neighbors=neb )
    SMOTE_feat_ProWSyn, SMOTE_labels_ProWSyn = smote_ProWSyn.sample(training_data, training_labels)
    
    smote_cure = sv.CURE_SMOTE(random_state=seed_perm, )
    SMOTE_feat_cure, SMOTE_labels_cure = smote_cure.sample(training_data, training_labels)
    
    smote_SOMO = sv.SOMO(random_state=seed_perm)
    SMOTE_feat_SOMO, SMOTE_labels_SOMO = smote_SOMO.sample(training_data, training_labels)
    
    sm = sv.SMOTE(random_state=seed_perm, n_neighbors=neb)
    SMOTE_feat, SMOTE_labels = sm.fit_resample(training_data,training_labels)
    
    
    return(SMOTE_feat, SMOTE_labels, SMOTE_feat_polynom_fit_SMOTE, SMOTE_labels_polynom_fit_SMOTE, SMOTE_feat_ProWSyn, SMOTE_labels_ProWSyn,\
          SMOTE_feat_cure, SMOTE_labels_cure, SMOTE_feat_SOMO, SMOTE_labels_SOMO)

## Defining ML models

Switch on the classifier you want to use (press ctrl+y on the cell). For every classifier, for every dataset we have provided the oversampling scheme we have used. Use the same parameter values to exactly reproduce the results:
- High Gloval Variance(HGV) => neb_conv=1000, max_conv=2 
- High Lcal Vriance(HLV) => neb_conv=5, max_conv=2 
- Low Goval Vriance(LGV) => neb_conv=1000, max_conv=features_x.shape[1]
- Low Local Variance(LLV) => neb_conv=5, max_conv=features_x.shape[1]

|dataset|strategy|
|:-|---|
|abalone9-18|HGV|
|abalone_17_vs_7_8_9_10|HGV|
|car-vgood|LLV|
|car_good|LGV|
|flare_F|HGV|
|hypothyroid|LGV|
|kddcup-guess_passwd_vs_satan|HLV|
|kr-vs-k-three_vs_eleven|LGV|
|kr-vs-k-zero-one_vs_draw|LGV|
|oil|HGV|
|ozone_level|HGV|
|shuttle-2_vs_5|HGV|
|solar_flare_m0|HLV|
|thyroid_sick|LGV|
|wine_quality|HLV|
|winequality-red-4|HLV|
|yeast4|HLV|
|yeast5|HLV|
|yeast6|LGV|
|yeast_me2|LLV|

|dataset|strategy|
|:-|---|
|abalone9-18|HGV|
|abalone_17_vs_7_8_9_10|LLV|
|car-vgood|LGV|
|car_good|LGV|
|flare_F|LGV|
|hypothyroid|LGV|
|kddcup-guess_passwd_vs_satan|LGV|
|kr-vs-k-three_vs_eleven|LGV|
|kr-vs-k-zero-one_vs_draw|LGV|
|oil|LGV|
|ozone_level|LGV|
|shuttle-2_vs_5|LGV|
|solar_flare_m0|LGV|
|thyroid_sick|LLV|
|wine_quality|LGV|
|winequality-red-4|LGV|
|yeast4|LGV|
|yeast5|LLV|
|yeast6|LLV|
|yeast_me2|LGV|

|dataset|strategy|
|:-|---|
|abalone9-18|HGV|
|abalone_17_vs_7_8_9_10|HGV|
|car-vgood|HLV|
|car_good|HLV|
|flare_F|HGV|
|hypothyroid|LGV|
|kddcup-guess_passwd_vs_satan|HGV|
|kr-vs-k-three_vs_eleven|HGV|
|kr-vs-k-zero-one_vs_draw|LGV|
|oil|HGV|
|ozone_level|HGV|
|shuttle-2_vs_5|HGV|
|solar_flare_m0|HLV|
|thyroid_sick|HLV|
|wine_quality|HLV|
|winequality-red-4|HGV|
|yeast4|HGV|
|yeast5|HGV|
|yeast6|HLV|
|yeast_me2|HLV|

In [9]:
def classifier(X_train,y_train,X_test,y_test):
    import warnings
    warnings.filterwarnings("ignore")
    from sklearn import metrics
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import f1_score
    from scipy.stats import hmean
    from sklearn.metrics import average_precision_score
    from sklearn.metrics import balanced_accuracy_score
    from sklearn.metrics import cohen_kappa_score
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)
    y_pred= knn.predict(X_test)
    y_proba=knn.predict_proba(X_test)[:, 1]
    con_mat=confusion_matrix(y_test,y_pred)
    aps=average_precision_score(y_test,y_proba)
    f1 = f1_score(y_test, y_pred)
    kappa=cohen_kappa_score(y_test, y_pred)
    return(f1, kappa, aps, con_mat)
classify='K_NN'
sigma_model=0.001

|dataset|strategy|
|:-|---|
|abalone9-18|LLV|
|abalone_17_vs_7_8_9_10|LLV|
|car-vgood|LLV|
|car_good|LLV|
|flare_F|LLV|
|hypothyroid|LLV|
|kddcup-guess_passwd_vs_satan|LLV|
|kr-vs-k-three_vs_eleven|LLV|
|kr-vs-k-zero-one_vs_draw|LLV|
|oil|LLV|
|ozone_level|LLV|
|shuttle-2_vs_5|LLV|
|solar_flare_m0|LLV|
|thyroid_sick|LLV|
|wine_quality|LLV|
|winequality-red-4|LLV|
|yeast4|LLV|
|yeast5|LLV|
|yeast6|LLV|
|yeast_me2|LLV|

## Defining ProWRAS function

In [10]:
def Neb_grps(data,neb_conv,n_jobs):
    'Function calculating nearest neb_conv neighbours (among input data points), for every input data point'
    from sklearn.neighbors import NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=neb_conv, n_jobs=n_jobs).fit(data)
    distances, indices = nbrs.kneighbors(data)
    neb_class=[]
    for i in (indices):
        neb_class.append(i)
    return(np.asarray(neb_class)) 

def partition_info(X, y, max_levels, n_neighbours_max, theta, n_jobs):
    
        from sklearn.neighbors import NearestNeighbors
        
        features_1_trn=X[np.where(y==1)]
        features_0_trn=X[np.where(y!=1)]
        
        
        L=max_levels

        # Step 2
        P = np.where(y == 1)[0]
        X_maj = features_0_trn

        Ps = []
        proximity_levels = []
        

        # Step 3
        for i in range(1,L):
            if len(P) == 0:
                break
            # Step 3 a
            n_neighbours_max = min([len(P),n_neighbours_max])
            nn = NearestNeighbors(n_neighbors=n_neighbours_max, n_jobs=n_jobs)
            nn.fit(X[P])
            distances, indices = nn.kneighbors(X_maj)

            # Step 3 b
            P_i = np.unique(np.hstack([i for i in indices]))

            # Step 3 c - proximity levels are encoded in the Ps list index
            Ps.append(P[P_i])
            proximity_levels.append(i+1)

            # Step 3 d
            P = np.delete(P, P_i)

        # Step 4
        if len(P) > 0:
            Ps.append(P)

        # Step 5
        if len(P) > 0:
            proximity_levels.append(i)
            proximity_levels = np.array(proximity_levels)
            
        
        # Step 6
        weights = np.array([np.exp(-theta*(proximity_levels[i] - 1))
                            for i in range(len(proximity_levels))])
        # weights is the probability distribution of sampling in the
        # clusters identified
        weights = weights/np.sum(weights)
        return (np.array(Ps),weights)
    
def high_global_variance(data,num_samples_to_generate):
    np.random.seed(42)
    generated_data=[]
    for i in range(int(num_samples_to_generate)):
        r_1= np.random.randint(len(data))
        r_2= np.random.randint(len(data))
        wts=np.array([np.random.randint(1,100),np.random.randint(1,100)])
        aff_w=wts/sum(wts)
        data_tsl=data[np.array([r_1,r_2])]
        sample=np.dot(aff_w, data_tsl)
        generated_data.append(sample)
    return(np.array(generated_data))



def low_global_variance(data,cluster_size,shadow,sigma,num_samples_to_generate,num_convcom):
    np.random.seed(42)
    data_shadow=([])
    for i in range(cluster_size):
        c=0
        while c<shadow:
            data_shadow.append(data[i]+np.random.normal(0,sigma))
            c=c+1
    data_shadow=np.array(data_shadow)
    
    data_shadow_lc=([])

    for i in range(int(num_samples_to_generate)):
        idx = np.random.randint(int(shadow*cluster_size), size=int(num_convcom))
        w=np.random.randint(100, size=len(idx))
        aff_w=np.array(w/sum(w))
        data_tsl=data_shadow[idx]
        data_tsl_=np.dot(aff_w, data_tsl)        
        data_shadow_lc.append(data_tsl_)
    data_shadow_lc=np.array(data_shadow_lc)
    return(data_shadow_lc)   



def high_local_variance(data,neb_conv, num_samples_to_generate,n_jobs):
    np.random.seed(42)
    generated_data=[]
    neb_list=Neb_grps(data,neb_conv,n_jobs)
    for i in range(int(num_samples_to_generate)):
        random_neighbourhood=neb_list[np.random.randint(len(neb_list))]
        r_1= np.random.randint(neb_conv)
        r_2= np.random.randint(neb_conv)
        wts=np.array([np.random.randint(1,100),np.random.randint(1,100)])
        aff_w=wts/sum(wts)
        data_tsl=np.array([data[random_neighbourhood][r_1],data[random_neighbourhood][r_2]])
        sample= np.dot(aff_w, data_tsl)
        generated_data.append(sample)
    return(np.array(generated_data))



def low_local_variance(data, cluster_size, neb_conv, shadow,sigma,num_samples_to_generate,num_convcom,n_jobs):
    'Function creating LoRAS samples for one minority data point neighbourhood'
    np.random.seed(42)
    neb_list=Neb_grps(data,neb_conv,n_jobs)
    
    
    data_shadow_lc=([])
    for i in range(int(num_samples_to_generate)):
        random_neighbourhood=neb_list[np.random.randint(len(neb_list))]
        data_neighbourhood=data[random_neighbourhood]
    
        data_shadow=([])
        
        for i in range(neb_conv):
            c=0
            while c<shadow:
                data_shadow.append(data_neighbourhood[i]+np.random.normal(0,sigma))
                c=c+1
        data_shadow=np.array(data_shadow)
        
        idx = np.random.randint(int(shadow*neb_conv), size=int(num_convcom))
        w=np.random.randint(100, size=len(idx))
        aff_w=np.array(w/sum(w))
        data_tsl=data_shadow[idx]
        data_tsl_=np.dot(aff_w, data_tsl)        
        data_shadow_lc.append(data_tsl_)
    data_shadow_lc=np.array(data_shadow_lc)
    return(data_shadow_lc)   


def ProWRAS_gen(data, labels, max_levels, neb_conv, n_neighbours_max, max_conv, num_samples_to_generate, theta, shadow, sigma, n_jobs):
    np.random.seed(42)
    
    features_1_trn=data[np.where(labels==1)]
    features_0_trn=data[np.where(labels!=1)]
    
    num_feats=data.shape[1]
    clusters, weights = partition_info(data, labels, max_levels, n_neighbours_max, theta, n_jobs)
 
    num_samples_each_cluster=np.ceil(num_samples_to_generate*weights)
    num_convcomb_each_cluster=np.ceil((weights/max(weights))*max_conv)
    synth_samples=[]

    for i in range(len(weights)):

        if clusters[i].shape[0]>neb_conv:
            
            if num_convcomb_each_cluster[i]<num_feats:
                print('High Local Variance Generation\n')
                synth=high_local_variance(data[clusters[i]], neb_conv, num_samples_each_cluster[i],n_jobs)
                synth_samples.append(synth)
            else:
                print('Low Local Variance Generation\n')
                synth=low_local_variance(data[clusters[i]],data[clusters[i]].shape[0],neb_conv,shadow,sigma,num_samples_each_cluster[i],num_convcomb_each_cluster[i],n_jobs)
                synth_samples.append(synth)
        else:
            if num_convcomb_each_cluster[i]<num_feats:
                print('High Global Variance Generation\n')
                synth=high_global_variance(data[clusters[i]], num_samples_each_cluster[i])
                synth_samples.append(synth)
            else:
                print('Low Global Variance Generation\n')
                synth=low_global_variance(data[clusters[i]],data[clusters[i]].shape[0],shadow,sigma,num_samples_each_cluster[i],num_convcomb_each_cluster[i])
                synth_samples.append(synth)
                
    synth_samples=np.array(synth_samples)
    synth_samples=np.concatenate(synth_samples)
    
    ProWRAS_train=np.concatenate((synth_samples,features_1_trn,features_0_trn))
    ProWRAS_labels=np.concatenate((np.ones(len(synth_samples)+len(features_1_trn)),np.zeros(len(features_0_trn))))
    return(ProWRAS_train,ProWRAS_labels)
    
    
    

## Import dataset

change the dataset name

switch on the following cell for importing datasets from imblearn.dataset

In [11]:
dataset="wine_quality" #example
from imblearn.datasets import fetch_datasets
data = fetch_datasets()[dataset]
features_x,labels_x=data.data,data.target

n_feat=len(features_x[1])

- switch on the following cell for importing datasets from https://drive.google.com/file/d/1PKw1vETVUzaToomio1-RGzJ9_-buYjOW/view
- save the dataset in the same directory as the notebook

### Running 5 x 5 fold CV

In [12]:
np.random.seed(42)
strata=5
for seed_perm in range(strata):
    
    features_x,labels_x=unison_shuffled_copies(features_x,labels_x,seed_perm)

    scaler = StandardScaler()
    scaler.fit(features_x)
    features_x=(scaler.transform(features_x))
    
    
    ### Extracting all features and labels
    print('Extracting all features and labels for seed:'+ str(seed_perm)+'\n')
    
    ## Dividing data into training and testing datasets for 10-fold CV
    print('Dividing data into training and testing datasets for 10-fold CV for seed:'+ str(seed_perm)+'\n')
    label_1=np.where(labels_x == 1)[0]
    label_1=list(label_1)
    print('minority class samples:'+ str(len(label_1))+'\n')

    features_1=features_x[label_1]
    
    neb=5


    label_0=np.where(labels_x != 1)[0]
    label_0=list(label_0)
    len(label_0)

    print('majority class samples:'+ str(len(label_0))+'\n')


    features_0=features_x[label_0]
    
    a=len(features_1)//5
    b=len(features_0)//5

    fold_1_min=features_1[0:a]
    fold_1_maj=features_0[0:b]
    fold_1_tst=np.concatenate((fold_1_min,fold_1_maj))
    lab_1_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))

    fold_2_min=features_1[a:2*a]
    fold_2_maj=features_0[b:2*b]
    fold_2_tst=np.concatenate((fold_2_min,fold_2_maj))
    lab_2_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))

    fold_3_min=features_1[2*a:3*a]
    fold_3_maj=features_0[2*b:3*b]
    fold_3_tst=np.concatenate((fold_3_min,fold_3_maj))
    lab_3_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))

    fold_4_min=features_1[3*a:4*a]
    fold_4_maj=features_0[3*b:4*b]
    fold_4_tst=np.concatenate((fold_4_min,fold_4_maj))
    lab_4_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))


    fold_5_min=features_1[4*a:]
    fold_5_maj=features_0[4*b:]
    fold_5_tst=np.concatenate((fold_5_min,fold_5_maj))
    lab_5_tst=np.concatenate((np.zeros(len(fold_5_min))+1, np.zeros(len(fold_5_maj))))

    fold_1_trn=np.concatenate((fold_2_min,fold_3_min,fold_4_min,fold_5_min, fold_2_maj,fold_3_maj,fold_4_maj,fold_5_maj))

    lab_1_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

    fold_2_trn=np.concatenate((fold_1_min,fold_3_min,fold_4_min,fold_5_min,fold_1_maj,fold_3_maj,fold_4_maj,fold_5_maj))

    lab_2_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

    fold_3_trn=np.concatenate((fold_2_min,fold_1_min,fold_4_min,fold_5_min,fold_2_maj,fold_1_maj,fold_4_maj,fold_5_maj))

    lab_3_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

    fold_4_trn=np.concatenate((fold_2_min,fold_3_min,fold_1_min,fold_5_min,fold_2_maj,fold_3_maj,fold_1_maj,fold_5_maj))

    lab_4_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

    fold_5_trn=np.concatenate((fold_2_min,fold_3_min,fold_4_min,fold_1_min,fold_2_maj,fold_3_maj,fold_4_maj,fold_1_maj))

    lab_5_trn=np.concatenate((np.zeros(4*a)+1,np.zeros(4*b)))


    training_folds_feats=[fold_1_trn,fold_2_trn,fold_3_trn,fold_4_trn,fold_5_trn]

    testing_folds_feats=[fold_1_tst,fold_2_tst,fold_3_tst,fold_4_tst,fold_5_tst]

    training_folds_labels=[lab_1_trn,lab_2_trn,lab_3_trn,lab_4_trn,lab_5_trn]

    testing_folds_labels=[lab_1_tst,lab_2_tst,lab_3_tst,lab_4_tst,lab_5_tst]
    
    
    model_ProWRAS=[]
    for i in range(5):

        features = training_folds_feats[i]
        labels= training_folds_labels[i]
        label_1=np.where(labels == 1)[0]
        label_1=list(label_1)
        features_1_trn=features[label_1]

        label_0=np.where(labels == 0)[0]
        label_0=list(label_0)
        features_0_trn=features[label_0]

        features_trn_fold= np.concatenate((features_1_trn,features_0_trn))
        labels_trn_fold= np.concatenate((np.zeros(len(features_1_trn))+1, np.zeros(len(features_0_trn))))

        n_neighbours_max=neb
        max_levels=5
        num_samples_to_generate=int((len(features_0_trn)-len(features_1_trn)))
        theta=1
        shadow=100
        neb_conv=5 #combinations: 1000,5
        max_conv=features_x.shape[1] #combinations: features_x.shape[1],2
        n_jobs=1
        
        
        
        print('Running Classifier \n')
        ProWRAS_train,ProWRAS_labels=ProWRAS_gen(features_trn_fold, labels_trn_fold, max_levels,neb_conv, n_neighbours_max,max_conv, num_samples_to_generate, theta, shadow, sigma_model, n_jobs)
        f1_model_ProWRAS,kappa_model_ProWRAS,aps_model_ProWRAS,mat_model_ProWRAS=classifier(ProWRAS_train,ProWRAS_labels,testing_folds_feats[i],testing_folds_labels[i])
        model_ProWRAS.append([f1_model_ProWRAS,kappa_model_ProWRAS,aps_model_ProWRAS]) 

        print('\n')
        print('############################   Fold '+ str(i+1) + ' Finished   ############################\n\n')


    

    ## Using oversampling algorithms to generate and test classification models
    print('Using oversampling algorithms to generate and test classification models for seed:'+ str(seed_perm)+'\n')

    model=[] 
    model_SM=[]
    model_MOT2TLD=[] 
    model_ProWSyn=[] 
    model_CURE=[] 
    model_SOMO=[] 



    i=0
    while i<5:
        SMOTE_feat, SMOTE_labels, SMOTE_feat_polynom_fit_SMOTE, SMOTE_labels_polynom_fit_SMOTE, SMOTE_feat_ProWSyn, SMOTE_labels_ProWSyn, SMOTE_feat_cure, SMOTE_labels_cure, SMOTE_feat_SOMO, SMOTE_labels_SOMO=OVS(training_folds_feats[i],training_folds_labels[i], neb)

        
        f1_model,kappa_model,aps_model,mat_model=classifier(training_folds_feats[i],training_folds_labels[i],testing_folds_feats[i],testing_folds_labels[i])
        model.append([f1_model,kappa_model,aps_model])

        f1_model_SMOTE,kappa_model_SMOTE, aps_model_SMOTE, mat_model_SMOTE=classifier(SMOTE_feat,SMOTE_labels,testing_folds_feats[i],testing_folds_labels[i])
        model_SM.append([f1_model_SMOTE,kappa_model_SMOTE,aps_model_SMOTE])


        f1_model_SMOTE_polynom_fit_SMOTE,kappa_model_SMOTE_polynom_fit_SMOTE, aps_model_SMOTE_polynom_fit_SMOTE, mat_model_SMOTE_polynom_fit_SMOTE=classifier(SMOTE_feat_polynom_fit_SMOTE,SMOTE_labels_polynom_fit_SMOTE,testing_folds_feats[i],testing_folds_labels[i])
        model_MOT2TLD.append([f1_model_SMOTE_polynom_fit_SMOTE,kappa_model_SMOTE_polynom_fit_SMOTE, aps_model_SMOTE_polynom_fit_SMOTE])


        f1_model_SMOTE_ProWSyn,kappa_model_SMOTE_ProWSyn,aps_model_SMOTE_ProWSyn,mat_model_SMOTE_ProWSyn=classifier(SMOTE_feat_ProWSyn,SMOTE_labels_ProWSyn,testing_folds_feats[i],testing_folds_labels[i])
        model_ProWSyn.append([f1_model_SMOTE_ProWSyn,kappa_model_SMOTE_ProWSyn,aps_model_SMOTE_ProWSyn])

        f1_model_SMOTE_cure,kappa_model_SMOTE_cure,aps_model_SMOTE_cure,mat_model_SMOTE_cure=classifier(SMOTE_feat_cure, SMOTE_labels_cure,testing_folds_feats[i],testing_folds_labels[i])
        model_CURE.append([f1_model_SMOTE_cure,kappa_model_SMOTE_cure,aps_model_SMOTE_cure])


        f1_model_SMOTE_SOMO,kappa_model_SMOTE_SOMO,aps_model_SMOTE_SOMO,mat_model_SMOTE_SOMO=classifier(SMOTE_feat_SOMO, SMOTE_labels_SOMO,testing_folds_feats[i],testing_folds_labels[i])
        model_SOMO.append([f1_model_SMOTE_SOMO,kappa_model_SMOTE_SOMO,aps_model_SMOTE_SOMO])

        print('\n')
        print('############################   Fold '+ str(i+1) + ' Finished   ############################\n\n')
        i=i+1


    ## Using ProWRAS-X to generate and test classification models
    print('Using ProWRAS-X to generate and test classification models for seed:'+ str(seed_perm)+'\n')



    
    ## Summing up results

    print('Summing up results for seed:'+ str(seed_perm)+'\n')

    model_m, model_sd= stats(model)

    model_SM_m, model_SM_sd=stats(model_SM)

    model_MOT2TLD_m, model_MOT2TLD_sd=stats(model_MOT2TLD)

    model_ProWSyn_m, model_ProWSyn_sd=stats(model_ProWSyn)
    
    model_CURE_m, model_CURE_sd=stats(model_CURE)

    model_SOMO_m, model_SOMO_sd=stats(model_SOMO)

    model_ProWRAS_m, model_ProWRAS_sd=stats(model_ProWRAS)




    model_F1=np.array(['F1-Score_'+classify, model_m[0], model_SM_m[0], model_MOT2TLD_m[0], model_ProWSyn_m[0], model_CURE_m[0], model_SOMO_m[0], model_ProWRAS_m[0]])
    model_kappa=np.array(['Kappa_'+classify, model_m[1], model_SM_m[1], model_MOT2TLD_m[1], model_ProWSyn_m[1], model_CURE_m[1], model_SOMO_m[1], model_ProWRAS_m[1]])
   

    df=pd.DataFrame(np.array([model_F1,model_kappa]),
                       columns=['Performance Measure', 'Baseline', 'SMOTE', 'polynom_fit_SMOTE','ProWSyn','CURE','SOMO','ProWRASX'])

    results=df

    filename=dataset+'seed_'+str(seed_perm)+'.csv'

    results.to_csv(filename, index=False)

    print('results ready for seed:'+str(seed_perm)+'\n')
    print('\n')
    print('\n')
    print('\n')



Extracting all features and labels for seed:0

Dividing data into training and testing datasets for 10-fold CV for seed:0

minority class samples:183

majority class samples:4715

Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 1 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 2 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 3 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 4 Finished   ############################


Running Classifier 

Low Local Variance Generation



2021-01-31 11:26:10,045:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 0}")


High Global Variance Generation



############################   Fold 5 Finished   ############################


Using oversampling algorithms to generate and test classification models for seed:0



2021-01-31 11:26:10,057:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:10,471:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:10,628:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:10,715:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:11,758:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 0}")
2021-01-31 11:26:11,770:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 0}")




############################   Fold 1 Finished   ############################




2021-01-31 11:26:12,175:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:12,311:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:12,396:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:13,358:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 0}")
2021-01-31 11:26:13,371:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 0}")




############################   Fold 2 Finished   ############################




2021-01-31 11:26:13,808:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:13,946:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:14,043:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:14,990:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 0}")
2021-01-31 11:26:15,001:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 0}")




############################   Fold 3 Finished   ############################




2021-01-31 11:26:15,396:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:15,542:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:15,629:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:16,576:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 0}")
2021-01-31 11:26:16,589:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 0}")




############################   Fold 4 Finished   ############################




2021-01-31 11:26:16,962:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:17,072:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 0}")
2021-01-31 11:26:17,166:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 0}")




############################   Fold 5 Finished   ############################


Using ProWRAS-X to generate and test classification models for seed:0

Summing up results for seed:0

results ready for seed:0







Extracting all features and labels for seed:1

Dividing data into training and testing datasets for 10-fold CV for seed:1

minority class samples:183

majority class samples:4715

Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 1 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 2 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 3 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



################

2021-01-31 11:26:40,723:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 1}")
2021-01-31 11:26:40,736:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 1}")


High Global Variance Generation



############################   Fold 5 Finished   ############################


Using oversampling algorithms to generate and test classification models for seed:1



2021-01-31 11:26:41,098:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:41,243:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:41,340:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:42,365:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 1}")
2021-01-31 11:26:42,377:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 1}")




############################   Fold 1 Finished   ############################




2021-01-31 11:26:42,733:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:42,848:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:42,947:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:43,918:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 1}")
2021-01-31 11:26:43,930:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 1}")




############################   Fold 2 Finished   ############################




2021-01-31 11:26:44,333:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:44,443:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:44,525:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:45,480:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 1}")
2021-01-31 11:26:45,491:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 1}")




############################   Fold 3 Finished   ############################




2021-01-31 11:26:45,895:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:46,034:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:46,125:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:47,089:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 1}")
2021-01-31 11:26:47,101:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 1}")




############################   Fold 4 Finished   ############################




2021-01-31 11:26:47,471:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:47,596:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 1}")
2021-01-31 11:26:47,675:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 1}")




############################   Fold 5 Finished   ############################


Using ProWRAS-X to generate and test classification models for seed:1

Summing up results for seed:1

results ready for seed:1







Extracting all features and labels for seed:2

Dividing data into training and testing datasets for 10-fold CV for seed:2

minority class samples:183

majority class samples:4715

Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 1 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 2 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 3 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



################

2021-01-31 11:27:10,870:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 2}")


High Global Variance Generation



############################   Fold 5 Finished   ############################


Using oversampling algorithms to generate and test classification models for seed:2



2021-01-31 11:27:10,884:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:11,230:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:11,355:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:11,441:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:12,397:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 2}")
2021-01-31 11:27:12,410:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 2}")




############################   Fold 1 Finished   ############################




2021-01-31 11:27:12,792:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:12,946:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:13,188:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:14,156:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 2}")
2021-01-31 11:27:14,167:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 2}")




############################   Fold 2 Finished   ############################




2021-01-31 11:27:14,586:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:14,716:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:14,813:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:15,792:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 2}")
2021-01-31 11:27:15,805:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 2}")




############################   Fold 3 Finished   ############################




2021-01-31 11:27:16,170:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:16,293:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:16,373:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:17,375:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 2}")
2021-01-31 11:27:17,387:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 2}")




############################   Fold 4 Finished   ############################




2021-01-31 11:27:17,803:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:17,949:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 2}")
2021-01-31 11:27:18,032:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 2}")




############################   Fold 5 Finished   ############################


Using ProWRAS-X to generate and test classification models for seed:2

Summing up results for seed:2

results ready for seed:2







Extracting all features and labels for seed:3

Dividing data into training and testing datasets for 10-fold CV for seed:3

minority class samples:183

majority class samples:4715

Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 1 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 2 Finished   ############################


Running Classifier 

Low Local Variance Generation



############################   Fold 3 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 4 Finished   

2021-01-31 11:27:42,794:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 3}")


High Global Variance Generation



############################   Fold 5 Finished   ############################


Using oversampling algorithms to generate and test classification models for seed:3



2021-01-31 11:27:42,808:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:43,177:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:43,299:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:43,384:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:44,387:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 3}")
2021-01-31 11:27:44,400:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 3}")




############################   Fold 1 Finished   ############################




2021-01-31 11:27:44,777:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:44,895:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:44,982:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:45,943:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 3}")
2021-01-31 11:27:45,955:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 3}")




############################   Fold 2 Finished   ############################




2021-01-31 11:27:46,288:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:46,396:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:46,481:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:47,445:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 3}")
2021-01-31 11:27:47,457:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 3}")




############################   Fold 3 Finished   ############################




2021-01-31 11:27:47,848:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:47,986:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:48,069:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:49,017:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 3}")
2021-01-31 11:27:49,029:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 3}")




############################   Fold 4 Finished   ############################




2021-01-31 11:27:49,392:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:49,528:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 3}")
2021-01-31 11:27:49,604:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 3}")




############################   Fold 5 Finished   ############################


Using ProWRAS-X to generate and test classification models for seed:3

Summing up results for seed:3

results ready for seed:3







Extracting all features and labels for seed:4

Dividing data into training and testing datasets for 10-fold CV for seed:4

minority class samples:183

majority class samples:4715

Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 1 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 2 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



############################   Fold 3 Finished   ############################


Running Classifier 

Low Local Variance Generation

High Global Variance Generation



################

2021-01-31 11:28:13,445:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 4}")


High Global Variance Generation



############################   Fold 5 Finished   ############################


Using oversampling algorithms to generate and test classification models for seed:4



2021-01-31 11:28:13,459:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:13,816:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:13,953:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:14,037:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:14,999:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 4}")
2021-01-31 11:28:15,011:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 4}")




############################   Fold 1 Finished   ############################




2021-01-31 11:28:15,382:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:15,506:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:15,600:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:16,555:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 4}")
2021-01-31 11:28:16,568:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 4}")




############################   Fold 2 Finished   ############################




2021-01-31 11:28:16,951:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:17,098:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:17,179:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:18,189:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 4}")
2021-01-31 11:28:18,202:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 4}")




############################   Fold 3 Finished   ############################




2021-01-31 11:28:18,558:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:18,701:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:18,787:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:19,713:INFO:polynom_fit_SMOTE: Running sampling via ('polynom_fit_SMOTE', "{'proportion': 1.0, 'topology': 'star', 'random_state': 4}")
2021-01-31 11:28:19,725:INFO:ProWSyn: Running sampling via ('ProWSyn', "{'proportion': 1.0, 'n_neighbors': 5, 'L': 5, 'theta': 1.0, 'n_jobs': 1, 'random_state': 4}")




############################   Fold 4 Finished   ############################




2021-01-31 11:28:20,118:INFO:CURE_SMOTE: Running sampling via ('CURE_SMOTE', "{'proportion': 1.0, 'n_clusters': 5, 'noise_th': 2, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:20,247:INFO:SOMO: Running sampling via ('SOMO', "{'proportion': 1.0, 'n_grid': 10, 'sigma': 0.2, 'learning_rate': 0.5, 'n_iter': 100, 'n_jobs': 1, 'random_state': 4}")
2021-01-31 11:28:20,345:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'n_jobs': 1, 'random_state': 4}")




############################   Fold 5 Finished   ############################


Using ProWRAS-X to generate and test classification models for seed:4

Summing up results for seed:4

results ready for seed:4









In [13]:
import glob
file_list=glob.glob("*.csv")

whole_info=[]
for i in range(len(file_list)):
    arr=np.array(pd.read_csv(file_list[i]))
    whole_info.append(arr)
whole_info=np.array(whole_info)

F1_Scores_LR_mean=np.mean(np.array([whole_info[0][0][1:],whole_info[1][0][1:], whole_info[2][0][1:],whole_info[3][0][1:],whole_info[4][0][1:]]),axis=0)
BAs_LR_mean=np.mean(np.array([whole_info[0][1][1:],whole_info[1][1][1:], whole_info[2][1][1:],whole_info[3][1][1:],whole_info[4][1][1:]]),axis=0)
agrregate_results_for_dataset=pd.DataFrame(np.array([F1_Scores_LR_mean,BAs_LR_mean]),
                       columns=['Baseline', 'SMOTE', 'polynom_fit_SMOTE','ProWSyn','CURE','SOMO','LORASX'], index=np.array(['F1_Scores','Kappa']))
filename=file_list[0][:-9]+'agrregate.csv'
agrregate_results_for_dataset.to_csv(filename,index=True)
