<a href="https://colab.research.google.com/github/DhanvinS/ML_Models-Supervised-and-Unsupervised/blob/main/Linear%20and%20Logistic%20Regression/Logistic_Regression_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
import numpy as np
import pandas as pd

from google.colab import files
files = files.upload()

# Load your CSV (change filename + column names if needed)
df = pd.read_csv("insurance_data.csv")

# Extract columns
ages = df["age"].values.reshape(1, -1)          # shape (1, m)
insurance = df["bought_insurance"].values.reshape(1, -1)  # shape (1, m)

# Normalize age (important for good training)
ages_norm = (ages - np.mean(ages)) / np.std(ages)

print("Shapes → X:", ages_norm.shape, "Y:", insurance.shape)


Saving insurance_data.csv to insurance_data (3).csv
Shapes → X: (1, 27) Y: (1, 27)


In [43]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [44]:
def initialize_params(n_features):
    w = np.zeros((n_features, 1))  # column vector
    b = 0
    return w, b


In [45]:
def propagate(X, Y, w, b):
    m = X.shape[1]  # number of samples

    # Forward pass
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)
    cost = -(1/m) * np.sum(Y*np.log(A) + (1-Y)*np.log(1-A))

    # Backward pass
    dw = (1/m) * np.dot(X, (A - Y).T)
    db = (1/m) * np.sum(A - Y)

    return {"dw": dw, "db": db}, cost


In [46]:
def optimize(X, Y, w, b, num_iterations, learning_rate):
    for i in range(num_iterations):
        grads, cost = propagate(X, Y, w, b)

        w -= learning_rate * grads["dw"]
        b -= learning_rate * grads["db"]

        if i % 200 == 0:
            print(f"Iteration {i} | Cost: {cost:.4f}")

    return {"w": w, "b": b}


In [47]:
def predict(X, w, b):
    A = sigmoid(np.dot(w.T, X) + b)
    return (A > 0.5).astype(int)


In [51]:
def LR(X, Y, num_iterations=2000, learning_rate=0.1):
    n_features = X.shape[0]

    w, b = initialize_params(n_features)
    params = optimize(X, Y, w, b, num_iterations, learning_rate)

    Y_pred = predict(X, params["w"], params["b"])

    accuracy = 100 - np.mean(np.abs(Y_pred - Y)) * 100
    print("\nTraining Accuracy:", accuracy, "%")

    return params


In [49]:
params = LR(ages_norm, insurance, num_iterations=3000, learning_rate=0.1)


Iteration 0 | Cost: 0.6931
Iteration 200 | Cost: 0.3800
Iteration 400 | Cost: 0.3780
Iteration 600 | Cost: 0.3779
Iteration 800 | Cost: 0.3779
Iteration 1000 | Cost: 0.3779
Iteration 1200 | Cost: 0.3779
Iteration 1400 | Cost: 0.3779
Iteration 1600 | Cost: 0.3779
Iteration 1800 | Cost: 0.3779
Iteration 2000 | Cost: 0.3779
Iteration 2200 | Cost: 0.3779
Iteration 2400 | Cost: 0.3779
Iteration 2600 | Cost: 0.3779
Iteration 2800 | Cost: 0.3779

Training Accuracy: 88.88888888888889 %


In [50]:
test_age = np.array([[40]])  # example age
test_age_norm = (test_age - np.mean(ages)) / np.std(ages)

pred = predict(test_age_norm, params["w"], params["b"])
print("Prediction for age 40:", int(pred))


Prediction for age 40: 1


  print("Prediction for age 40:", int(pred))
