### The Logistic Function

At its core, logistic regression is about making predictions for binary outcomes (Yes/No, 0/1, etc.). The key formula we use is called the **logistic function** or **sigmoid function**, which maps any real number into a range between 0 and 1.


In [27]:
import pandas as pd

df = pd.read_csv('data.csv')
df.diagnosis

0      M
1      M
2      M
3      M
4      M
      ..
564    M
565    M
566    M
567    M
568    B
Name: diagnosis, Length: 569, dtype: object

In [2]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

In [14]:
# set up sigmoid 
import numpy as np

def sigmoid(z):
    z = np.clip(z, -500, 500)
    return 1/(1 + np.exp(-z))

In [15]:
def log_loss(y, y_hat):
    return -np.mean(
        y*np.log(y_hat) + (1-y)*np.log(1-y_hat)
    )

In [16]:
def predict(X, w, b):
    z = np.dot(X, w) + b
    return sigmoid(z)

In [17]:
def gradient_descent(X, y, w, b, learning_rate, epochs):
    m = len(y)
    for epoch in range(epochs):
        # make prediction
        y_hat = predict(X, w, b)
        
        dw = np.dot(
            X.T, (y_hat - y)
        )/m
        db = np.mean(y_hat - y)
        
        # Step 3: Update weights
        w -= learning_rate * dw
        b -= learning_rate * db
        
        # Step 4: Calculate and print the log-loss for monitoring
        if epoch % 100 == 0:
            loss = log_loss(y, y_hat)  # Call the log-loss function here
            print(f'Epoch {epoch}, Loss: {loss}')
            
    return w, b


        
        
        

In [18]:
# Example data (replace with real data)
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])  # 4 training examples, 2 features
y = np.array([0, 0, 1, 1])  # Labels

# Initialize weights
w = np.zeros(X.shape[1])
b = 0

# Hyperparameters
learning_rate = 0.1
epochs = 1000

# Train the model
w, b = gradient_descent(X, y, w, b, learning_rate, epochs)

# Final loss after training
y_hat_train = predict(X, w, b)  # Predictions for the training data
final_loss = log_loss(y, y_hat_train)  # Calculate the final log-loss
print(f'Final training loss: {final_loss}')


Epoch 0, Loss: 0.6931471805599453
Epoch 100, Loss: 0.43544836419731786
Epoch 200, Loss: 0.33167741153496816
Epoch 300, Loss: 0.27285680583235167
Epoch 400, Loss: 0.23497203343834544
Epoch 500, Loss: 0.20828031859877386
Epoch 600, Loss: 0.1882522009069065
Epoch 700, Loss: 0.17252517074513768
Epoch 800, Loss: 0.15975036523409938
Epoch 900, Loss: 0.1491010336869133
Final training loss: 0.14004100260740557


In [28]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Assuming your dataframe is stored in `df`



# Step 2: Convert diagnosis to binary (0 = Benign, 1 = Malignant)
df['diagnosis'] = df['diagnosis'].map({'B': 0, 'M': 1})

# Step 3: Select features (the mean values) and the target
X = df[['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean',
        'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 
        'fractal_dimension_mean']]

y = df['diagnosis']

# Step 4: Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Feature scaling (Standardize the data)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


0      1
1      1
2      1
3      1
4      1
      ..
564    1
565    1
566    1
567    1
568    0
Name: diagnosis, Length: 569, dtype: int64

In [29]:

# Step 1: Initialize weights and bias
n_features = X_train.shape[1]  # Number of features
w = np.zeros(n_features)
b = 0

# Step 2: Train the model using gradient descent
learning_rate = 0.01  # You can adjust this
epochs = 1000  # You can adjust the number of epochs
w, b = gradient_descent(X_train, y_train.to_numpy(), w, b, 0.01, epochs)

# Step 3: Make predictions on the test set
y_test_pred = predict(X_test, w, b)

# Convert predicted probabilities to binary outcomes (0 or 1)
y_test_pred_labels = np.where(y_test_pred >= 0.5, 1, 0)

def accuracy_score(y_true, y_pred):
    correct_predictions = np.sum(y_true == y_pred)
    return correct_predictions / len(y_true)

def precision_score(y_true, y_pred):
    true_positives = np.sum((y_true == 1) & (y_pred == 1))
    predicted_positives = np.sum(y_pred == 1)
    return true_positives / predicted_positives if predicted_positives != 0 else 0

def recall_score(y_true, y_pred):
    true_positives = np.sum((y_true == 1) & (y_pred == 1))
    actual_positives = np.sum(y_true == 1)
    return true_positives / actual_positives if actual_positives != 0 else 0

def f1_score(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    return 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

# Step 4: Evaluate the model
accuracy = accuracy_score(y_test, y_test_pred_labels)
precision = precision_score(y_test, y_test_pred_labels)
recall = recall_score(y_test, y_test_pred_labels)
f1 = f1_score(y_test, y_test_pred_labels)

# Print evaluation metrics
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')


Epoch 0, Loss: 0.6931471805599453
Epoch 100, Loss: 0.3653157814022778
Epoch 200, Loss: 0.28526122055756853
Epoch 300, Loss: 0.24875296603947514
Epoch 400, Loss: 0.22762535543133128
Epoch 500, Loss: 0.21373782607957373
Epoch 600, Loss: 0.2038541883084372
Epoch 700, Loss: 0.19642666105741455
Epoch 800, Loss: 0.1906190975599504
Epoch 900, Loss: 0.18593896377366204
Accuracy: 0.956140350877193
Precision: 0.975
Recall: 0.9069767441860465
F1-Score: 0.9397590361445783
