In [1]:
# ! curl http://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data > ./data/ionosphere.data

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas

In [3]:
data = pandas.read_csv('./data/ionosphere.data', header=None) # Load data
train, test = train_test_split(data, test_size=0.3)
X_train = train[range(0, 34)]
y_train = train[34]
X_test = test[range(0, 34)]
y_test = test[34]

## Построение модели
Строится модель простого классификатора, для того чтобы оценить потом профит от подбора гиперпараметров

In [15]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

def scores(y_true, y_pred):
    return { 
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred, labels=['g', 'b'], pos_label='g'),
        "f1": f1_score(y_true, y_pred, labels=['g', 'b'], pos_label='g'),
        "recall": recall_score(y_true, y_pred, labels=['g', 'b'], pos_label='g'),
    }

classifier = SVC(kernel="linear")
model = classifier.fit(X_train, y_train)

y_predicted = classifier.predict(X_test)
scores(y_test, y_predicted)

{'accuracy': 0.9245283018867925,
 'precision': 0.9358974358974359,
 'f1': 0.948051948051948,
 'recall': 0.9605263157894737}

## Подбор гиперпараметров

In [23]:
from sklearn.model_selection import GridSearchCV

parameters = { "kernel": ["linear", "rbf", "poly", "sigmoid"],
               "C": list(map(lambda x: x/10, range(1, 11, 1))) }

classifier = GridSearchCV(SVC(), param_grid=parameters)

model = classifier.fit(X_train, y_train)

print(model.best_params_)
classifier = model.best_estimator_

y_predicted = classifier.predict(X_test)
scores(y_test, y_predicted)

{'C': 0.3, 'kernel': 'rbf'}


{'accuracy': 0.9528301886792453,
 'precision': 0.961038961038961,
 'f1': 0.9673202614379085,
 'recall': 0.9736842105263158}