In [1]:
# SVM的函式庫
from sklearn.svm import LinearSVC

# 乳癌資料函式庫
from sklearn.datasets import load_breast_cancer

# 區分訓練資料與測試資料的函式庫
from sklearn.model_selection import train_test_split

# 資料的讀取
cancer = load_breast_cancer()

# 分為訓練資料與測試資料
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify = cancer.target, random_state = 0)

# 類別的初始化與學習
model = LinearSVC()
model.fit(X_train, y_train)

# 訓練資料與測試資料的分數
print('準確度（train）:{:.3f}'.format(model.score(X_train,y_train)))
print('準確度（test）:{:.3f}'.format(model.score(X_test,y_test)))

準確度（train）:0.925
準確度（test）:0.930




In [2]:
# 標準化函式庫
from sklearn.preprocessing import StandardScaler

# 資料的讀取
cancer = load_breast_cancer()

# 分為訓練資料與測試資料
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify = cancer.target, random_state = 0)

# 標準化處理
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# 邏輯迴歸的初始化學習
model = LinearSVC()
model.fit(X_train_std,y_train)

print('準確度（train）:{:.3f}'.format(model.score(X_train_std,y_train)))
print('準確度（test）:{:.3f}'.format(model.score(X_test_std,y_test)))

準確度（train）:0.993
準確度（test）:0.951


#### 練習問題8-8

In [5]:
from sklearn.svm import SVC

# 分為訓練資料與測試資料
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify = cancer.target, random_state = 50)

# 標準化處理
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# 邏輯迴歸的初始化學習
model = SVC(kernel = 'rbf', random_state = 0, C = 2)
model.fit(X_train_std,y_train)

print('準確度（train）:{:.3f}'.format(model.score(X_train_std,y_train)))
print('準確度（test）:{:.3f}'.format(model.score(X_test_std,y_test)))

準確度（train）:0.988
準確度（test）:0.986


### 第8章 綜合問題

#### 綜合問題8-2 決策樹

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

iris = load_iris()


# 分為訓練資料與測試資料
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, stratify = iris.target, random_state = 0)

# 決策樹類別的初始化與學習
model = DecisionTreeClassifier(criterion = 'entropy', max_depth = 3, random_state = 0)
# criterion = 'entropy' 分歧條件指定為熵
# max_depth 為分歧的數量
model.fit(X_train,y_train)

print('準確度（train）:{:.3f}'.format(model.score(X_train,y_train)))
print('準確度（test）:{:.3f}'.format(model.score(X_test,y_test)))

準確度（train）:0.964
準確度（test）:0.947


#### 綜合問題8-3 no-free-lunch

In [10]:
# 所需函式匯入
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC,SVC
import pandas as pd

# 這裡以乳癌資料為例，以load_breast_cancer
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify = cancer.target, random_state = 0)

# 標準化
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# 多個模型設定
models = {
    'knn': KNeighborsClassifier(),
    'tree' : DecisionTreeClassifier(random_state = 0),
    'logistic' : LogisticRegression(random_state = 0),
    'svc1' : LinearSVC(random_state = 0),
    'svc2' : SVC(random_state = 0)
}

# 為了持有分數的空Dict資料
scores = {}

# 對各個模型求得分數
for model_name, model in models.items():
    model.fit(X_train_std,y_train)
    scores[(model_name,'train')] = model.score(X_train_std,y_train)
    scores[(model_name,'test')] = model.score(X_test_std,y_test)

# 最後顯示各個分數結果
pd.Series(scores).unstack()

Unnamed: 0,test,train
knn,0.951049,0.978873
logistic,0.958042,0.99061
svc1,0.951049,0.992958
svc2,0.958042,0.992958
tree,0.902098,1.0
