In [26]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [15]:
dataset = load_breast_cancer()
df = pd.DataFrame(dataset.data,columns=dataset.feature_names)

In [8]:
# df['target'] = pd.Series(dataset.target)

In [16]:
dataset.target_names

array(['malignant', 'benign'], dtype='<U9')

In [17]:
df['target_names'] = pd.Categorical.from_codes(dataset.target, dataset.target_names)

In [18]:
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target_names
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,malignant
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,malignant
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,malignant
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,malignant
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,malignant


In [21]:
X = df.drop(['target_names'], axis=1).values

In [24]:
y = df['target_names'].values

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [28]:
df_train = RandomForestClassifier(n_jobs=2, random_state=42, max_depth= 4)

In [29]:
df_train.fit(X_train, y_train)

In [30]:
pred = df_train.predict(X_test)

In [31]:
df_train.score(X_test, y_test)

0.9649122807017544

In [32]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, pred)

In [33]:
cm

array([[107,   0],
       [  6,  58]], dtype=int64)

    cm[0][0] = TP
    cm[1][1] = TN
    cm[0][1] = FP
    cm[1][0] = FN

In [34]:
# Create confusion matrix
pd.crosstab(y_test, pred, rownames=['Actual Result'], 
colnames=['Predicted Result'])

Predicted Result,benign,malignant
Actual Result,Unnamed: 1_level_1,Unnamed: 2_level_1
malignant,6,58
benign,107,0


Run the classification report

With data from the confusion matrix, you can interpret the results by looking at the classification report.

In [35]:
from sklearn.metrics import classification_report
 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

      benign       0.95      1.00      0.97       107
   malignant       1.00      0.91      0.95        64

    accuracy                           0.96       171
   macro avg       0.97      0.95      0.96       171
weighted avg       0.97      0.96      0.96       171



# Precision

The precision tells us the accuracy of positive predictions.

![](image-24.png.webp)


# Recall?

The recall, also named sensivity, or hit rate, tells us the fraction of correctly identified positive predictions.

What fraction of the True predictions were actually True?This number should be as high as possible.

High recall: Predicted most True values correctly.

![](image-23.png.webp)


# f1-score?

The f1-score, or F measure, measures precision and recall at the same time by finding the harmonic mean of the two values.

This score is useful when you have opposite scores coming from precision and recall.

![](image-22.png.webp)


# Accuracy in Classification Report

The accuracy measures the accuracy of all predictions (positive and negative).

Accuracy should be as high as possible.

![](image-25.png.webp)

# Support Expl

The support is the number of occurrences of each class in your y_test

In [36]:
df_train.predict_proba(X_test)[0:10]

array([[0.90242195, 0.09757805],
       [0.99039442, 0.00960558],
       [0.98652397, 0.01347603],
       [0.98998415, 0.01001585],
       [0.99070596, 0.00929404],
       [0.97082112, 0.02917888],
       [0.08968623, 0.91031377],
       [0.7941867 , 0.2058133 ],
       [0.0053012 , 0.9946988 ],
       [0.96426857, 0.03573143]])