In [1]:
import numpy as np
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import neighbors
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsOneClassifier
import matplotlib.pyplot as plt
from sklearn import datasets
%matplotlib inline

def visualize_classifier(classifier, Х, y):
    min_x, max_x = Х[:, 0].min() - 1.0, Х[:, 0].max() + 1.0
    min_y, max_y = Х[:, 1] .min() - 1.0, Х[:, 1].max() + 1.0
    mesh_step_size = 0.01
    x_vals, y_vals = np.meshgrid(np.arange(min_x, max_x, mesh_step_size), np.arange(min_y, max_y, mesh_step_size))
    output = classifier.predict(np.c_[x_vals.ravel(),y_vals.ravel()]) #np.c_(np.array([1,2]),np.array([3,4]))=array([[1,3],[2,4]])
    output = output.reshape(x_vals.shape)
    
    plt.figure()
    plt.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray) #output задаёт цвет (класс)
    plt.scatter(X[:, 0], Х[:, 1], c=y, s=50, edgecolors='black', linewidth=1, cmap=plt.cm.Paired) #c - color
    plt.xlim(x_vals.min(), x_vals.max())
    plt.ylim(y_vals.min(), y_vals.max())
    plt.xticks((np.arange(int(X[:, 0].min() - 1), int(X[:, 0].max() + 1), 1.0)))
    plt.yticks((np.arange(int(X[:, 1].min() - 1), int(X[:, 1].max() + 1), 1.0)))
    
    plt.show()

In [2]:
input_file = "data/clast.txt"
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)
get_proba = False
using_grid = False

In [3]:
#Logistic classifier
classifier = linear_model.LogisticRegression(solver='liblinear',C=100)

In [4]:
#Baise classifier
classifier = GaussianNB()

In [5]:
#Machine of vectors (linear core)
classifier = OneVsOneClassifier(LinearSVC())

In [6]:
#Tree classifier
params = {'random_state': 0, 'max_depth': 10}
classifier = DecisionTreeClassifier(**params) 

In [7]:
#Random forest classifier
params = {}
best_score = 0
if using_grid:
    metrics = ['precision_weighted', 'recall_weighted'] 
    parameter_grid = [ {'n_estimators': [i for i in range(20,35)], 'max_depth': [4] }, {'max_depth': [i for i in range(1,10)], 'n_estimators': [4]}] 
    for metric in metrics:
        classifier = GridSearchCV(RandomForestClassifier(random_state=0), parameter_grid, cv=5, scoring=metric)
        classifier.fit(X_train, y_train)
        b = classifier.best_score_
        if b > best_score:
            best_score = b
            params = classifier.best_params_
else:
    params = {'n_estimators': 4, 'max_depth': 4, 'random_state': 0 }
print(params)
classifier = RandomForestClassifier(**params)

{'n_estimators': 4, 'max_depth': 4, 'random_state': 0}


In [34]:
#Extra trees classifier
params = {'n_estimators': 4, 'max_depth': 4, 'random_state': 0}
classifier = ExtraTreesClassifier(**params)

In [35]:
#Neighbors classifier
num_neighbors = 12
classifier = neighbors.KNeighborsClassifier(num_neighbors, weights='distance')

In [41]:
classifier.fit(X_train, y_train)
y_test_pred = []
if get_proba:
    y_test_pred_proba = classifier.predict_proba(X_test)
    for i in range(0, len(y_test_pred_proba)):
        probabilities = y_test_pred_proba[i]
        label = int(str(np.argmax(probabilities)))
        y_test_pred.append(label)
        print("predict:", label, "correct:", int(y_test[i]), "(", probabilities[label]*100,"%)")
else:
    y_test_pred = classifier.predict(X_test)
accuracy = 100.0 * (y_test == y_test_pred).sum()/X_test.shape[0]
print(accuracy,"%")

ValueError: Unknown label type: (array([-0.29,  0.11,  0.14,  0.43,  0.53,  0.62,  0.64,  0.79,  0.8 ,
        0.95,  0.99,  1.  ,  1.02,  1.05,  1.06,  1.07,  1.08,  1.11,
        1.15,  1.18,  1.22,  1.26,  1.3 ,  1.31,  1.34,  1.37,  1.41,
        1.42,  1.47,  1.51,  1.52,  1.54,  1.56,  1.63,  1.64,  1.65,
        1.67,  1.69,  1.77,  1.78,  1.8 ,  1.81,  1.82,  1.86,  1.87,
        1.88,  1.94,  1.96,  1.99,  2.  ,  2.05,  2.06,  2.08,  2.11,
        2.14,  2.15,  2.16,  2.19,  2.2 ,  2.21,  2.23,  2.24,  2.25,
        2.28,  2.29,  2.32,  2.36,  2.41,  2.42,  2.44,  2.46,  2.47,
        2.5 ,  2.52,  2.54,  2.64,  2.67,  2.71,  2.81,  2.88,  2.93,
        2.94,  2.97,  2.99,  3.  ,  3.03,  3.05,  3.06,  3.09,  3.12,
        3.19,  3.25,  3.27,  3.38,  3.39,  3.53,  3.63,  3.69,  3.8 ,
        3.81,  3.84,  3.88,  3.98,  3.99,  4.08,  4.11,  4.33,  4.49,
        4.5 ,  4.54,  4.56,  4.57,  4.64,  4.65,  4.69,  4.78,  4.86,
        5.  ,  5.03,  5.06,  5.12,  5.2 ,  5.25,  5.29,  5.32,  5.35,
        5.36,  5.42,  5.49,  5.51,  5.65,  5.73,  5.74,  5.78,  5.83,
        5.84,  5.9 ,  6.  ,  6.06,  6.07,  6.1 ,  6.13,  6.22,  6.24,
        6.28,  6.3 ,  6.42,  6.55,  6.74,  6.75,  7.08,  7.14,  7.25,
        7.26,  7.35,  7.37,  7.38,  7.41,  7.45,  7.47,  7.53,  7.58,
        7.59,  7.65,  7.7 ,  7.73,  7.74,  7.78,  7.82,  7.85,  7.86,
        7.89,  7.9 ,  7.91,  7.92,  7.95,  7.96,  7.97,  8.  ,  8.02,
        8.03,  8.04,  8.06,  8.08,  8.09,  8.1 ,  8.11,  8.16,  8.19,
        8.23,  8.25,  8.26,  8.28,  8.29,  8.34,  8.35,  8.39,  8.41,
        8.45,  8.46,  8.47,  8.5 ,  8.55,  8.57,  8.66,  8.67,  8.69,
        8.71,  8.72,  8.73,  8.76,  8.78,  8.79,  8.8 ,  8.9 ,  8.96,
        8.97,  8.98,  8.99,  9.06,  9.09,  9.11,  9.13,  9.18,  9.27,
        9.38,  9.39,  9.44,  9.5 ,  9.55,  9.56,  9.65,  9.66,  9.73,
        9.74,  9.98, 10.2 , 10.32]),)

In [38]:
visualize_classifier(classifier, X, y)

IndexError: index 1 is out of bounds for axis 1 with size 1