In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer

In [4]:
# Load the data
X, y = load_breast_cancer(return_X_y=True)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Apply Algorithms to this Problem

- Logistic Regression
- Decision Tree
- RandomForest Tree

In [7]:
# Logistic Regression
class LogisticRegression():

    def __init__(self, learning_rate, epochs):

        self.coef_ = None
        self.lr = learning_rate
        self.epochs = epochs
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X_train, y_train):

        X_train = np.insert(X_train, 0, 1, axis=1)
        self.coef_ = np.random.randn(X_train.shape[1]) * 0.01

        for i in range(self.epochs):

            y_hat = self.sigmoid(np.dot(X_train, self.coef_))

            error = y_train - y_hat

            coef_slope = np.dot(X_train.T, error) / X_train.shape[0]

            self.coef_ = self.coef_ - (self.lr * coef_slope)
        
        print(f"Coef_: {self.coef_}")
    
    def predict(self, X_test):

        X_test = np.insert(X_test, 0, 1, axis=1)
        y_pred = self.sigmoid(np.dot(X_test, self.coef_))
        return (y_pred >= 0.5).astype(int)



        

In [29]:
lr = LogisticRegression(learning_rate=0.001, epochs=5000)

In [30]:
lr.fit(X_train, y_train)

Coef_: [-1.16283177  3.28185553  1.77205269  3.38425838  3.17141594  1.95048104
  3.13125029  3.4765452   3.76702866  1.8847992   0.40410041  2.53789213
  0.0500135   2.52874634  2.4020512  -0.07540587  1.69992368  1.53980814
  2.18299926  0.24438138  0.76895212  3.49285825  1.98394693  3.56190312
  3.3359361   2.07972581  3.01723379  3.3019723   3.80339204  2.1004598
  1.86304933]


In [31]:
y_pred_lr = lr.predict(X_test)

In [32]:
from sklearn.metrics import accuracy_score, classification_report

acc_lr = accuracy_score(y_test, y_pred_lr)
cr_report = classification_report(y_test, y_pred_lr)

print(f"Accuracy : {acc_lr:.2f}%")
print(f"Classification Report : \n{cr_report}")

Accuracy : 0.04%
Classification Report : 
              precision    recall  f1-score   support

           0       0.03      0.05      0.04        43
           1       0.05      0.03      0.04        71

    accuracy                           0.04       114
   macro avg       0.04      0.04      0.04       114
weighted avg       0.04      0.04      0.04       114



In [38]:
# Logistic Regression with scikit-learn
from sklearn.linear_model import LogisticRegression

lr_sklearn = LogisticRegression()

lr_sklearn.fit(X_train, y_train)

y_pred_lr_sk = lr_sklearn.predict(X_test)

In [39]:
acc_lrsk = accuracy_score(y_test, y_pred_lr_sk)
cr_report_sk = classification_report(y_test, y_pred_lr_sk)

print(f"Accuracy (scikit-learn) : {acc_lrsk:.2f}%")
print(f"Classification Report (scikit-learn) : \n{cr_report_sk}")

Accuracy (scikit-learn) : 0.97%
Classification Report (scikit-learn) : 
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



In [33]:
# Decision Tree Classifier 
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(max_depth=5, random_state=42)

In [34]:
tree.fit(X_train, y_train)

In [35]:
y_pred = tree.predict(X_test)

In [37]:
acc_tree = accuracy_score(y_test, y_pred)
cr_report_tree = classification_report(y_test, y_pred)

print(f"Accuracy Score Decision Tree : {acc_tree:.2f}%")
print(f"Classification Report Decision Tree:\n{cr_report_tree}")

Accuracy Score Decision Tree : 0.95%
Classification Report Decision Tree:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114



In [40]:
# RandomForest Classifier 
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)

rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

In [42]:
acc_rf = accuracy_score(y_test, y_pred_rf)
cr_report_rf = classification_report(y_test, y_pred_rf)

print(f"Accuracy Score Random Forest : {acc_rf:.2f}%")
print(f"Classification Report Random Forest:\n{cr_report_rf}")

Accuracy Score Random Forest : 0.96%
Classification Report Random Forest:
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

