# 1. Scikit-learnのインストール

In [1]:
!pip install scikit-learn

# 2. データの読み込みと前処理

In [2]:
from sklearn import datasets

breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 3. モデルの選択と訓練

In [4]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)


DecisionTreeClassifier()

# 4. モデルの評価

In [5]:
from sklearn.metrics import accuracy_score

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)


Accuracy: 0.9385964912280702


# 5. モデルの保存と読み込み

In [6]:
!pip install joblib

In [7]:
import joblib

# モデルの保存
joblib.dump(clf, 'model.pkl')

# モデルの読み込み
loaded_clf = joblib.load('model.pkl')

# 読み込んだモデルで予測
y_pred = loaded_clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)


Accuracy: 0.9385964912280702


# 6. ハイパーパラメータチューニング

In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = {'max_depth': [2, 3, 4, 5],
              'min_samples_split': [2, 3, 4, 5]}

grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train, y_train)

# 最適なハイパーパラメータの組み合わせを表示
print("Best parameters:", grid_search.best_params_)

# 最適なハイパーパラメータで訓練されたモデルを取得
best_clf = grid_search.best_estimator_

# モデルの評価
y_pred = best_clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)


Best parameters: {'max_depth': 3, 'min_samples_split': 4}
Accuracy: 0.9473684210526315
