### データセットのダウンロード
https://github.com/oreilly-japan/ml-security-jp/blob/master/ch03/archive.zip  
を取得して同じディレクトリに格納し、解凍を行う。  

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
import numpy as np
import optuna
from sklearn.model_selection import cross_validate

In [None]:
AndroidDataset = pd.read_csv('train.csv', sep=';')

In [None]:
AndroidDataset.type.value_counts()

In [None]:
print(AndroidDataset.columns)

In [None]:
pd.Series.sort_values(AndroidDataset[AndroidDataset.type == 1].sum(axis=0), ascending=False)[1:11] # 0は含まない(typeが全て1でこれがTopなのは当然)

In [None]:
top10 = [
    'android.permission.INTERNET',
    'android.permission.READ_PHONE_STATE',
    'android.permission.ACCESS_NETWORK_STATE',
    'android.permission.WRITE_EXTERNAL_STORAGE',
    'android.permission.ACCESS_WIFI_STATE',
    'android.permission.READ_SMS',
    'android.permission.WRITE_SMS',
    'android.permission.RECEIVE_BOOT_COMPLETED',
    'android.permission.ACCESS_COARSE_LOCATION',
    'android.permission.CHANGE_WIFI_STATE',
]

AndroidDataset.loc[AndroidDataset.type == 0, top10].sum()

In [None]:
fig, axs = plt.subplots(nrows=2, sharex=True)

AndroidDataset.loc[AndroidDataset.type == 0, top10].sum().plot(kind='bar', ax=axs[0])
AndroidDataset.loc[AndroidDataset.type == 1, top10].sum().plot(kind='bar', ax=axs[1], color='red')

In [None]:
X = AndroidDataset.iloc[:, :-1]
y = AndroidDataset.iloc[:, -1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=101)

In [None]:
class Objective_SVM:
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __call__(self, trial):

        params = {
            'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
            'C': trial.suggest_loguniform('C', 1e-5, 1e2),
            'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),
        }

        model = SVC(**params)

        scores = cross_validate(model, self.X, self.y, n_jobs=-1)

        return scores['test_score'].mean()   

In [None]:
objective = Objective_SVM(X_train, y_train)
study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=60)
print(study.best_params)

In [None]:
model = SVC(
    kernel=study.best_params['kernel'],
    C=study.best_params['C'],
    gamma=study.best_params['gamma']
)

model.fit(X_train, y_train)
pred = model.predict(X_test)

In [None]:
print('Accuracy: {:.5f}%'.format(100 * accuracy_score(y_test, pred)))
print(confusion_matrix(y_test, pred))