In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn

In [2]:
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cancer_data.data, cancer_data.target, stratify = cancer_data.target, shuffle =True, random_state =144)

In [4]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt

DecisionTreeClassifier()

In [5]:
dt =dt.fit(X_train, y_train)

In [6]:
print("Training set score: {:.3f}".format(dt.score(X_train, y_train)))
print("Test set score: {:.3f}".format(dt.score(X_test, y_test)))

Training set score: 1.000
Test set score: 0.909


In [7]:
pred = dt.predict(X_test)

In [8]:
from sklearn.metrics import confusion_matrix
con_mat = confusion_matrix (y_test, pred)
print('Confusion matrix:\n{}'.format(con_mat))

Confusion matrix:
[[48  5]
 [ 8 82]]


In [9]:
from sklearn.metrics import f1_score
f1_score(y_test, pred)

0.9265536723163842

In [10]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred, target_names = ['malignant','benign']))

              precision    recall  f1-score   support

   malignant       0.86      0.91      0.88        53
      benign       0.94      0.91      0.93        90

    accuracy                           0.91       143
   macro avg       0.90      0.91      0.90       143
weighted avg       0.91      0.91      0.91       143



# RANDOM FOREST

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn

In [12]:
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cancer_data.data, cancer_data.target, stratify = cancer_data.target, shuffle =True, random_state =144)

In [14]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(oob_score = True)
rf

RandomForestClassifier(oob_score=True)

In [15]:
rf = rf.fit (X_train, y_train)

In [16]:
print("Training set score: {:.3f}".format(rf.score(X_train, y_train)))
print("Test set score: {:.3f}".format(rf.score(X_test, y_test)))
print("OOB Score: {:.3f}".format(rf.oob_score_))

Training set score: 1.000
Test set score: 0.944
OOB Score: 0.955


In [17]:
pred_rf = rf.predict(X_test)

In [18]:
from sklearn.metrics import confusion_matrix
con_mat = confusion_matrix (y_test, pred_rf)
print('Confusion matrix:\n{}'.format(con_mat))

Confusion matrix:
[[48  5]
 [ 3 87]]


In [19]:
from sklearn.metrics import f1_score
f1_score(y_test, pred)

0.9265536723163842

In [20]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred, target_names = ['malignant','benign']))

              precision    recall  f1-score   support

   malignant       0.86      0.91      0.88        53
      benign       0.94      0.91      0.93        90

    accuracy                           0.91       143
   macro avg       0.90      0.91      0.90       143
weighted avg       0.91      0.91      0.91       143



# GRADIENT BOOSTING

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn

In [22]:
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cancer_data.data, cancer_data.target, stratify = cancer_data.target, shuffle =True, random_state =144)

In [24]:
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier(n_estimators=250)
gb

GradientBoostingClassifier(n_estimators=250)

In [25]:
gb = gb.fit (X_train, y_train)

In [26]:
print("Training set score: {:.3f}".format(gb.score(X_train, y_train)))
print("Test set score: {:.3f}".format(gb.score(X_test, y_test)))


Training set score: 1.000
Test set score: 0.951


In [27]:
pred_gb = gb.predict(X_test)

In [28]:
from sklearn.metrics import confusion_matrix
con_mat = confusion_matrix (y_test, pred_gb)
print('Confusion matrix:\n{}'.format(con_mat))

Confusion matrix:
[[49  4]
 [ 3 87]]


In [29]:
from sklearn.metrics import f1_score
f1_score(y_test, pred_gb)

0.9613259668508287

In [30]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred_gb, target_names = ['malignant','benign']))

              precision    recall  f1-score   support

   malignant       0.94      0.92      0.93        53
      benign       0.96      0.97      0.96        90

    accuracy                           0.95       143
   macro avg       0.95      0.95      0.95       143
weighted avg       0.95      0.95      0.95       143

