# Problem: Breast Cancer Classification

# Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


# Data Understanding

In [None]:
data = load_breast_cancer()

df = pd.DataFrame(data=data.data, columns=data.feature_names)

df['target'] = data.target

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.hist(figsize=(20, 20))
plt.show()

In [None]:
df['target'].value_counts()

In [None]:
sns.countplot(x='target', data=df)
plt.show()

In [None]:
df.isnull().values.any()

In [None]:
corr_matrix = df.corr()
plt.figure(figsize=(10, 10))
sns.heatmap(corr_matrix, annot=True, fmt='.2f')
plt.show()

# Data preparation

In [None]:
data = load_breast_cancer()

X = data.data
y = data.target

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_val_scaled = scaler.transform(X_val)

# Update weights and bias manual


## X, weights, bias and Learning Rate

In [None]:
lr = 0.01

In [None]:
X_scaled = scaler.transform(X)

In [None]:
X_scaled[0:1]

In [None]:
weights = np.random.normal(0, 1, 30)

bias = 0.1

In [None]:
weights

## z = np.dot(X, weights) + bias

In [None]:
z = np.dot(X_scaled, weights) + bias

In [None]:
z[0:5]

In [None]:
len(z)

## Sigmoid and y_prob

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
y_prob = sigmoid(z)

## Loss

In [None]:
def compute_loss(y, y_pred):
    epsilon = 1e-5
    return -np.mean(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon))

In [None]:
compute_loss(y, y_prob)

## Compute Gradients

In [None]:
def compute_gradients(X, y, y_pred):
    return np.dot(X.T, (y_pred - y)) / len(y)

In [None]:
grads = compute_gradients(X, y, y_prob)

In [None]:
grads[0:5]

In [None]:
grads_b = np.mean(y_prob - y)

In [None]:
new_weights = weights -lr * grads

In [None]:
new_weights[0:5]

In [None]:
new_b = bias - lr * grads_b

# Functions

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_loss(y, y_pred):
    epsilon = 1e-5
    return -np.mean(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon))

def compute_gradients(X, y, y_pred):
    return np.dot(X.T, (y_pred - y)) / len(y)

def gradient_descent(X, y, lr=0.01, epochs=100):
    weights = np.zeros(X.shape[1])
    bias = 0

    for epoch in range(epochs):
        z = np.dot(X, weights) + bias

        y_pred = sigmoid(z)

        loss = compute_loss(y, y_pred)

        dw = compute_gradients(X, y, y_pred)

        db = np.mean(y_pred - y)

        weights -= lr * dw

        bias -= lr * db

        if (epoch + 1) % 10 == 0 or epoch == 0:

            print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss}')

    return weights, bias

def predict(X, weights, bias):

    z = np.dot(X, weights) + bias

    y_pred = sigmoid(z)

    return y_pred >= 0.5

# Training

In [None]:
weights, bias = gradient_descent(X_train_scaled, y_train, lr=0.1, epochs=100)

# Performance Evaluation

In [None]:
y_pred_valid = predict(X_val_scaled, weights, bias)

In [None]:
accuracy = accuracy_score(y_val, y_pred_valid)

In [None]:
print(f'Validation Accuracy: {accuracy}')