<a href="https://colab.research.google.com/github/KruthikaTS/Breast-Cancer-Detection/blob/main/breast_cancer_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#importing libraries
import pandas as pd  #data processing
import numpy as np   #performing linear algebra
import matplotlib.pyplot as plt #visualising dataset/graphs


In [None]:
#import data
data = pd.read_csv("data.csv")
print (data.head())

         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    842302         M        17.99         10.38          122.80     1001.0   
1    842517         M        20.57         17.77          132.90     1326.0   
2  84300903         M        19.69         21.25          130.00     1203.0   
3  84348301         M        11.42         20.38           77.58      386.1   
4  84358402         M        20.29         14.34          135.10     1297.0   

   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \
0          0.11840           0.27760          0.3001              0.14710   
1          0.08474           0.07864          0.0869              0.07017   
2          0.10960           0.15990          0.1974              0.12790   
3          0.14250           0.28390          0.2414              0.10520   
4          0.10030           0.13280          0.1980              0.10430   

   ...  texture_worst  perimeter_worst  area_worst  smoothness

In [None]:
#data cleaning and dropping unwanted columns
data.drop(["Unnamed: 32", "id"], axis = 1, inplace = True)
#change the values in diagnosis column from object to char

In [None]:
#change the values in diagnosis column from object to char
data.diagnosis = [1 if i=='M' else 0 for i in data.diagnosis]
#data.diagnosis.map({'M':1, 'B':0})

In [None]:
#feature and target selection
x_data = np.array(data.drop("diagnosis", axis = 1))
y = np.array(data.diagnosis)

In [None]:
#normalise (feature scaling)
'''
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x = scaler.fit_transform(x)
'''
x_data = (x_data - np.min(x_data))/np.max(x_data) - np.min(x_data)

Normalisation ensures all feature values are between 0 & 1 → makes ML model work better.


In [None]:
#Split data to test and training
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.15, random_state = 42)

test size = 15%, training size = 85%

In [None]:
#Transpose
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size = 0.15, random_state = 42)

x_train = x_train.T
x_test = x_test.T
y_train = y_train.T
y_test = y_test.T
print("x train: ", x_train.shape)
print("x test: ", x_test.shape)
print("y train: ", y_train.shape)
print("y test: ", y_test.shape)

x train:  (30, 483)
x test:  (30, 86)
y train:  (483,)
y test:  (86,)


🔹 Why? Some ML algorithms work better with column vectors.
so after tranpose, it becomes feature x sample


In [None]:
#initialise weights and bias
def initialize_weights_and_bias(dimension):
  w = np.full((dimension, 1),0.01)
  b = 0.0
  return (w,b)

- initialize_weights_and_bias function takes dimesnions as input. It must be an integer.
- np.full creates a matrix. np.full((dimension, 1), 0.01): Creates an array filled with 0.01 values.
- w must be a column matrix of value 0.01
- b = 0 to avoid symmetry problems

In [None]:
#define sigmoid activation
def sigmoid(z):
  y_head = 1/(1+np.exp(-z))
  return y_head

- Sigmoid converts any number into a probability between 0 & 1.
- Mathematically, g(z) and y_head represent the same thing, but in ML code, we use y_head because it represents predictions explicitly.
- y_head = sigmoid(z) is the probability output (between 0 and 1).
- y_head represents predictions, not just a function.










In [None]:
#forward and backward prediction
def forward_and_backward_prediction(w, b, x_train, y_train):
  #forward prediction
  z = np.dot(w.T, x_train) + b
  y_head = sigmoid(z)
  loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
  cost = (np.sum(loss))/x_train.shape[1]

  #backward prediction
  derivative_weight = (np.dot(x_train, ((y_head-y_train).T)))/x_train.shape[1]
  derivative_bias = np.sum(y_head-y_train)/x_train.shape[1]
  gradients = {"derivative_weight": derivative_weight, "derivative_bias": derivative_bias}
  return cost, gradients

In [None]:
def update(w, b, x_train, y_train, learning_rate, number_of_iterarion):
    cost_list = []
    cost_list2 = []
    index = []

    # updating(learning) parameters is number_of_iterarion times
    for i in range(number_of_iterarion):
        # make forward and backward propagation and find cost and gradients
        cost, gradients = forward_backward_propagation(w, b, x_train, y_train)
        cost_list.append(cost)

        # lets update
        w = w - learning_rate * gradients["derivative_weight"]
        b = b - learning_rate * gradients["derivative_bias"]
        if i % 10 == 0:
            cost_list2.append(cost)
            index.append(i)
            print ("Cost after iteration % i: % f" %(i, cost))

    # update(learn) parameters weights and bias
    parameters = {"weight": w, "bias": b}
    plt.plot(index, cost_list2)
    plt.xticks(index, rotation ='vertical')
    plt.xlabel("Number of Iterarion")
    plt.ylabel("Cost")
    plt.show()
    return parameters, gradients, cost_list


In [None]:
def predict(w, b, x_test):
    # x_test is a input for forward propagation
    z = sigmoid(np.dot(w.T, x_test)+b)
    Y_prediction = np.zeros((1, x_test.shape[1]))

    # if z is bigger than 0.5, our prediction is sign one (y_head = 1),
    # if z is smaller than 0.5, our prediction is sign zero (y_head = 0),
    for i in range(z.shape[1]):
        if z[0, i]<= 0.5:
            Y_prediction[0, i] = 0
        else:
            Y_prediction[0, i] = 1

    return Y_prediction


In [None]:
def forward_backward_propagation(w, b, x_train, y_train):
    """
    Compute the forward propagation and backpropagation for logistic regression.
    """
    # Forward propagation
    z = np.dot(w.T, x_train) + b
    y_head = 1 / (1 + np.exp(-z))  # Sigmoid function

    # Cost function
    m = x_train.shape[1]
    loss = -y_train * np.log(y_head) - (1 - y_train) * np.log(1 - y_head)
    cost = np.sum(loss) / m

    # Backpropagation
    dw = np.dot(x_train, (y_head - y_train).T) / m
    db = np.sum(y_head - y_train) / m

    gradients = {"dw": dw, "db": db}

    return cost, gradients


In [None]:
def update(w, b, x_train, y_train, learning_rate, number_of_iteration):
    """
    Perform gradient descent updates for logistic regression.
    """
    cost_list = []

    for i in range(number_of_iteration):
        # Forward and backward propagation
        cost, gradients = forward_backward_propagation(w, b, x_train, y_train)
        cost_list.append(cost)

        # Update weights and bias
        w = w - learning_rate * gradients["dw"]
        b = b - learning_rate * gradients["db"]

        # Print cost at every 10 iterations
        if i % 10 == 0:
            print(f"Cost after iteration {i}: {cost:.6f}")

    parameters = {"weight": w, "bias": b}

    return parameters, gradients, cost_list


In [None]:
y_train = y_train.reshape(1, -1)  # Now (1, 30)
y_test = y_test.reshape(1, -1)  # Now (1, 86)
def initialize_weights_and_bias(dimension):
    w = np.zeros((dimension, 1))  # Initialize weights as zeros
    b = 0  # Initialize bias as zero
    return w, b

z = np.dot(w.T, x_train) + b
y_head = 1 / (1 + np.exp(-z))  # Sigmoid function

# Reshape to ensure correct broadcasting
y_head = y_head.reshape(1, -1)



NameError: name 'w' is not defined

In [None]:
def forward_backward_propagation(w, b, x_train, y_train):
    """
    Compute forward propagation and backpropagation for logistic regression.
    """
    # Forward propagation
    z = np.dot(w.T, x_train) + b
    y_head = 1 / (1 + np.exp(-z))  # Sigmoid function

    # Reshape to ensure correct broadcasting
    y_train = y_train.reshape(1, -1)
    y_head = y_head.reshape(1, -1)

    # Cost function (adding small epsilon to avoid log(0))
    m = x_train.shape[1]
    loss = -y_train * np.log(y_head + 1e-9) - (1 - y_train) * np.log(1 - y_head + 1e-9)
    cost = np.sum(loss) / m

    # Backpropagation
    dw = np.dot(x_train, (y_head - y_train).T) / m
    db = np.sum(y_head - y_train) / m

    gradients = {"dw": dw, "db": db}

    return cost, gradients

# Function to initialize weights
def initialize_weights_and_bias(dimension):
    w = np.zeros((dimension, 1))  # Initialize weights as zeros
    b = 0  # Initialize bias as zero
    return w, b

# Logistic Regression Function
def logistic_regression(x_train, y_train, x_test, y_test, learning_rate, num_iterations):
    dimension = x_train.shape[0]  # Number of features
    w, b = initialize_weights_and_bias(dimension)  # Initialize weights and bias

    parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate, num_iterations)

    y_prediction_test = predict(parameters["weight"], parameters["bias"], x_test)
    y_prediction_train = predict(parameters["weight"], parameters["bias"], x_train)

    # Train / test accuracy
    print("Train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
    print("Test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))


In [None]:
def forward_backward_propagation(w, b, x_train, y_train):
    """
    Compute forward propagation and backpropagation for logistic regression.
    """
    # Forward propagation
    z = np.dot(w.T, x_train) + b
    y_head = 1 / (1 + np.exp(-z))  # Sigmoid function

    # Reshape to ensure compatible shapes
    y_train = y_train.reshape(1, -1)
    y_head = y_head.reshape(1, -1)

    # Cost function (adding small epsilon to avoid log(0))
    m = x_train.shape[1]
    loss = -y_train * np.log(y_head + 1e-9) - (1 - y_train) * np.log(1 - y_head + 1e-9)
    cost = np.sum(loss) / m

    # Backpropagation
    dw = np.dot(x_train, (y_head - y_train).T) / m
    db = np.sum(y_head - y_train) / m

    gradients = {"dw": dw, "db": db}

    return cost, gradients

