In [0]:
import random
import numpy as np
import pandas as pd
from sklearn import datasets

In [0]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [0]:
def split_dataset(dataset):
    num_train,_ = dataset.data.shape

    indices = np.random.choice(num_train, num_train)
    train_X = dataset.data[indices[:-30]].astype("float32")
    train_y = dataset.target[indices[:-30]]
    test_X = dataset.data[indices[-30:]].astype("float32")
    test_y = dataset.target[indices[-30:]]

    print(train_X.shape)
    print(train_y.shape)
    print(test_X.shape)
    print(test_y.shape)
    return (train_X,train_y,test_X,test_y)

In [0]:
def train_test(clf,train_X,train_y,test_X,test_y):
    clf.fit(train_X,train_y)
    pred = clf.predict(test_X)
    score = metrics.accuracy_score(test_y,pred)
    return score

In [0]:
knn = KNeighborsClassifier(n_neighbors=3)
svc_lin = SVC(kernel="linear", C=0.25)
svc_gamma = SVC(gamma=2, C=1)
gauss = GaussianProcessClassifier()
dt = DecisionTreeClassifier(max_depth=5)
rf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
mlp = MLPClassifier(alpha=1)
ada = AdaBoostClassifier()
gauss_nb = GaussianNB()
quad = QuadraticDiscriminantAnalysis()

In [6]:
iris = datasets.load_iris()
train_X, train_y,test_X,test_y = split_dataset(iris)

(120, 4)
(120,)
(30, 4)
(30,)


In [7]:
print("Iris dataset")
print(f"knn : {train_test(knn,train_X,train_y,test_X,test_y)}")
print(f"svc linear: {train_test(svc_lin,train_X,train_y,test_X,test_y)}")
print(f"svc gamma : {train_test(svc_gamma,train_X,train_y,test_X,test_y)}")
print(f"gaussian process : {train_test(gauss,train_X,train_y,test_X,test_y)}")
print(f"decisison tree : {train_test(dt,train_X,train_y,test_X,test_y)}")
print(f"rainforest : {train_test(rf,train_X,train_y,test_X,test_y)}")
print(f"mlp : {train_test(mlp,train_X,train_y,test_X,test_y)}")
print(f"ada boost : {train_test(ada,train_X,train_y,test_X,test_y)}")
print(f"gauss_nb : {train_test(gauss_nb,train_X,train_y,test_X,test_y)}")
print(f"QDA : {train_test(quad,train_X,train_y,test_X,test_y)}")


Iris dataset
knn : 1.0
svc linear: 0.9666666666666667
svc gamma : 1.0
gaussian process : 0.9666666666666667
decisison tree : 1.0
rainforest : 1.0
mlp : 0.9666666666666667
ada boost : 1.0
gauss_nb : 0.9
QDA : 1.0




In [8]:
digits = datasets.load_digits()
train_X, train_y,test_X,test_y = split_dataset(digits)

(1767, 64)
(1767,)
(30, 64)
(30,)


In [9]:
print("Digits dataset")
print(f"knn : {train_test(knn,train_X,train_y,test_X,test_y)}")
print(f"svc linear: {train_test(svc_lin,train_X,train_y,test_X,test_y)}")
print(f"svc gamma : {train_test(svc_gamma,train_X,train_y,test_X,test_y)}")
print(f"gaussian process : {train_test(gauss,train_X,train_y,test_X,test_y)}")
print(f"decisison tree : {train_test(dt,train_X,train_y,test_X,test_y)}")
print(f"rainforest : {train_test(rf,train_X,train_y,test_X,test_y)}")
print(f"mlp : {train_test(mlp,train_X,train_y,test_X,test_y)}")
print(f"ada boost : {train_test(ada,train_X,train_y,test_X,test_y)}")
print(f"gauss_nb : {train_test(gauss_nb,train_X,train_y,test_X,test_y)}")
print(f"QDA : {train_test(quad,train_X,train_y,test_X,test_y)}")

Digits dataset
knn : 1.0
svc linear: 1.0
svc gamma : 0.6333333333333333
gaussian process : 0.6333333333333333
decisison tree : 0.8666666666666667
rainforest : 0.8333333333333334




mlp : 1.0
ada boost : 0.2
gauss_nb : 0.9
QDA : 0.7




In [10]:
boston = datasets.load_boston()
boston.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

In [11]:
boston.target.shape

(506,)

In [12]:
pd.Series(boston.target).describe()

count    506.000000
mean      22.532806
std        9.197104
min        5.000000
25%       17.025000
50%       21.200000
75%       25.000000
max       50.000000
dtype: float64

In [0]:
#pd.Series(boston.target).describe()
def get_range(val):
    if 5<=val<17:
        return 1
    elif 17<=val<21:
        return 2
    elif 21<=val<25:
        return 3
    elif 25<=val<=50:
        return 4


In [0]:
def discretize(attribute):
    new_attribute = []
    for val in attribute:
        new_attribute.append(get_range(val))
    return new_attribute


In [15]:
boston.target = np.array(discretize(boston.target))
print(set(boston.target))


{1, 2, 3, 4}


In [16]:
train_X,train_y,test_X,test_y = split_dataset(boston)

(476, 13)
(476,)
(30, 13)
(30,)


In [17]:
print("Boston dataset")
print(f"knn : {train_test(knn,train_X,train_y,test_X,test_y)}")
print(f"svc linear: {train_test(svc_lin,train_X,train_y,test_X,test_y)}")
print(f"svc gamma : {train_test(svc_gamma,train_X,train_y,test_X,test_y)}")
print(f"gaussian process : {train_test(gauss,train_X,train_y,test_X,test_y)}")
print(f"decisison tree : {train_test(dt,train_X,train_y,test_X,test_y)}")
print(f"rainforest : {train_test(rf,train_X,train_y,test_X,test_y)}")
print(f"mlp : {train_test(mlp,train_X,train_y,test_X,test_y)}")
print(f"ada boost : {train_test(ada,train_X,train_y,test_X,test_y)}")
print(f"gauss_nb : {train_test(gauss_nb,train_X,train_y,test_X,test_y)}")
print(f"QDA : {train_test(quad,train_X,train_y,test_X,test_y)}")

Boston dataset
knn : 0.7
svc linear: 0.7333333333333333
svc gamma : 0.5666666666666667
gaussian process : 0.6666666666666666
decisison tree : 0.6666666666666666
rainforest : 0.6333333333333333
mlp : 0.6
ada boost : 0.43333333333333335
gauss_nb : 0.5333333333333333
QDA : 0.6333333333333333
