In [59]:
import pandas as pd
import numpy as np

In [61]:
df = pd.read_csv("Social_Network_Ads.csv")
df.head()


Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [63]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [65]:
df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,1,19,19000,0
1,15810944,1,35,20000,0
2,15668575,0,26,43000,0
3,15603246,0,27,57000,0
4,15804002,1,19,76000,0


In [75]:
#df = df.drop("User ID",axis = 1)    #  -> single use
df.describe()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
count,400.0,400.0,400.0,400.0
mean,0.49,37.655,69742.5,0.3575
std,0.500526,10.482877,34096.960282,0.479864
min,0.0,18.0,15000.0,0.0
25%,0.0,29.75,43000.0,0.0
50%,0.0,37.0,70000.0,0.0
75%,1.0,46.0,88000.0,1.0
max,1.0,60.0,150000.0,1.0


In [77]:
print(sum(df["Age"].isnull()))
print(sum(df["EstimatedSalary"].isnull()))

0
0


This tells us that the dataset has no null values and the purchased column has already been preprocessed, ie written in terms of 0 and 1 .
Since we're doing logistic regression, first we need to scale the features using the Z-score method, also known as StandardScaling, 

$$
z = \frac{x - \mu}{\sigma}
$$

Where:  
- x  is the original feature value  
- μ is the mean of the feature  
- σ is the standard deviation of the feature  

In [79]:
X = df[['Age','EstimatedSalary']]
y = df['Purchased']

In [81]:
# Calculate means and standard deviations column-wise
means = np.mean(X, axis=0)
stds = np.std(X, axis=0)

# Standardize
X_scaled = (X - means) / stds

# add bias
X_scaled = np.hstack((np.ones((X_scaled.shape[0], 1)), X))

In [107]:
def sigmoid(z):
    # Clip the values of z to avoid overflow
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))


In [109]:
def compute_cost(X, y, weights):
    m = len(y)  # number of training examples
    z = np.dot(X, weights)  # linear combination (z = Xw)
    predictions = sigmoid(z)  # apply sigmoid to get probabilities
    cost = -(1/m) * np.sum(
        y * np.log(predictions + 1e-15) + 
        (1 - y) * np.log(1 - predictions + 1e-15)
    )
    return cost

In [111]:
def gradient_descent(X, y, learning_rate, num_iterations):
    m = len(y)
    weights = np.zeros(X.shape[1])  # ← initialize here
    cost_history = []

    for i in range(num_iterations):
        z = np.dot(X, weights)
        predictions = sigmoid(z)
        gradient = (1 / m) * np.dot(X.T, (predictions - y))
        weights -= learning_rate * gradient
        cost = compute_cost(X, y, weights)
        cost_history.append(cost)

    return weights, cost_history


In [113]:
def predict(X, weights, threshold=0.5):
    # Step 1: Get the raw scores (z)
    z = np.dot(X, weights)

    # Step 2: Apply sigmoid to get probabilities between 0 and 1
    probs = sigmoid(z)

    # Step 3: Convert probabilities to 0 or 1 using the threshold
    predictions = []
    for p in probs:
        if p >= threshold:
            predictions.append(1)
        else:
            predictions.append(0)
    
    return np.array(predictions)


In [121]:
weights, cost_history = gradient_descent(X, y, learning_rate=0.001, num_iterations=1000)
print(weights,cost_history[0])

[   -7.73375 -1855.625  ] 12.34761256118057


In [99]:
from sklearn.metrics import accuracy_score, confusion_matrix
y_pred = predict(X, weights)
print("Accuracy:", accuracy_score(y, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y, y_pred))
print(X_scaled[5,])

Accuracy: 0.3575
Confusion Matrix:
 [[  0 257]
 [  0 143]]
[1.0e+00 2.7e+01 5.8e+04]
