In [2]:
from mcless import *
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import time


In [3]:
data1 = np.loadtxt('data/synthetic1.data', delimiter=',')
X1= data1[:,0:2]
y1=data1[:,2]

data2= np.loadtxt('data/synthetic2.data', delimiter=',')
X2= data2[:,0:2]
y2=data2[:,2]

data3 = load_iris()
X3 = data3.data
y3 = data3.target

data4 = load_wine()
X4 = data4.data
y4 = data4.target

In [4]:
X1.shape,X2.shape,X3.shape , X4.shape

((300, 2), (300, 2), (150, 4), (178, 13))

In [5]:
def scale_factor(X):
    X = X.transpose()
    scale = np.zeros((len(X)))
    for i in range(len(X)):
        row = X[i]
        scale[i] = np.amax(np.abs(row))
    return scale

def scale_data(X,scale):
    X= X.transpose()
    scaled_X = np.zeros((X.shape))
    
    for i in range(len(X)):
        row = X[i]
        scaled_X[i] = X[i]/scale[i]
    
    return scaled_X.transpose()

In [6]:
classifiers = [LogisticRegression(max_iter = 1000),KNeighborsClassifier(5),SVC(kernel="rbf",gamma=2, C=1),
               RandomForestClassifier(max_depth=5, n_estimators=50, max_features=1)]

In [7]:
names = [
"Logistic Regr",
"KNeighbors-5 ",
"RBF SVM ",
"Random Forest"]

In [8]:
N,d = X1.shape; labelset=set(y1)
nclass=len(labelset);
print('N,d,nclass=',N,d,nclass)

rtrain = 0.7e0; run = 100
rtest = 1-rtrain

N,d,nclass= 300 2 3


In [9]:
dataname = "Synthetic data 1"

In [10]:
acc_max=0
for name, clf in zip(names, classifiers):
    Acc = np.zeros([run,1])
    btime = time.time()

    for it in range(run):
        Xtrain, Xtest, ytrain, ytest = train_test_split(X1, y1, test_size=rtest, random_state=it, stratify = y1)

        clf.fit(Xtrain, ytrain);
        Acc[it] = clf.score(Xtest, ytest)

    etime = time.time()-btime
    accmean = np.mean(Acc)*100
    print('%s: %s: Acc.(mean,std) = (%.2f,%.2f)%%; E-time= %.5f'%(dataname,name,accmean,np.std(Acc)*100,etime/run))
    if accmean>acc_max:
        acc_max= accmean; algname = name
print('sklearn classifiers max: %s= %.2f' %(algname,acc_max))

Synthetic data 1: Logistic Regr: Acc.(mean,std) = (96.58,1.84)%; E-time= 0.01780
Synthetic data 1: KNeighbors-5 : Acc.(mean,std) = (95.67,1.80)%; E-time= 0.00806
Synthetic data 1: RBF SVM : Acc.(mean,std) = (94.85,2.03)%; E-time= 0.01396
Synthetic data 1: Random Forest: Acc.(mean,std) = (95.57,2.17)%; E-time= 0.14009
sklearn classifiers max: Logistic Regr= 96.58


In [11]:
data_X = [X1,X2,X3,X4]
data_y = [y1,y2,y3,y4]
data_names = ["Synthetic Data 1","Synthetic Data 2","Iris Data","Wine Data"]

In [12]:
for X,y,dataname in zip(data_X,data_y,data_names):
    acc_max=0
    for name, clf in zip(names, classifiers):
        Acc = np.zeros([run,1])
        btime = time.time()
        
        

        for it in range(run):
            Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=rtest, random_state=it, stratify = y)
            scale = scale_factor(Xtrain)
            Xtrain_scaled = scale_data(Xtrain,scale)
            Xtest_scaled = scale_data(Xtest,scale)

            clf.fit(Xtrain_scaled, ytrain);
            Acc[it] = clf.score(Xtest_scaled, ytest)

        etime = time.time()-btime
        accmean = np.mean(Acc)*100
        print('%s: %s: Acc.(mean,std) = (%.2f,%.2f)%%; E-time= %.5f'%(dataname,name,accmean,np.std(Acc)*100,etime/run))
        if accmean>acc_max:
            acc_max= accmean; algname = name
    print('sklearn classifiers max: %s= %.2f' %(algname,acc_max))
    print("\n")

Synthetic Data 1: Logistic Regr: Acc.(mean,std) = (96.99,1.71)%; E-time= 0.01156
Synthetic Data 1: KNeighbors-5 : Acc.(mean,std) = (95.81,1.96)%; E-time= 0.00897
Synthetic Data 1: RBF SVM : Acc.(mean,std) = (96.64,1.69)%; E-time= 0.00678
Synthetic Data 1: Random Forest: Acc.(mean,std) = (95.57,2.16)%; E-time= 0.14031
sklearn classifiers max: Logistic Regr= 96.99


Synthetic Data 2: Logistic Regr: Acc.(mean,std) = (94.12,2.07)%; E-time= 0.01024
Synthetic Data 2: KNeighbors-5 : Acc.(mean,std) = (93.96,2.24)%; E-time= 0.00820
Synthetic Data 2: RBF SVM : Acc.(mean,std) = (94.64,2.18)%; E-time= 0.00696
Synthetic Data 2: Random Forest: Acc.(mean,std) = (95.22,2.06)%; E-time= 0.19859
sklearn classifiers max: Random Forest= 95.22


Iris Data: Logistic Regr: Acc.(mean,std) = (93.48,3.01)%; E-time= 0.02731
Iris Data: KNeighbors-5 : Acc.(mean,std) = (95.76,2.31)%; E-time= 0.01338
Iris Data: RBF SVM : Acc.(mean,std) = (95.87,2.37)%; E-time= 0.00873


KeyboardInterrupt: 