## Building a Simple Perceptron (Logistic Regression)

In this section, we are going to build a **simple Perceptron model** that uses:

- **Sigmoid activation function**  
- **Binary Cross-Entropy (BCE) loss function**

This setup is essentially equivalent to a **Logistic Regression model**.  

We will use the **Titanic** to train and evaluate our model.


In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
import torch

# 1) Dataset

In [2]:
data = sns.load_dataset('titanic')
data.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [3]:
# Selecting useful features

df = data[["survived", "pclass", "sex", "age", "fare", "embarked"]].copy()
df.head()

Unnamed: 0,survived,pclass,sex,age,fare,embarked
0,0,3,male,22.0,7.25,S
1,1,1,female,38.0,71.2833,C
2,1,3,female,26.0,7.925,S
3,1,1,female,35.0,53.1,S
4,0,3,male,35.0,8.05,S


## 1.1) Handling the missing values

In [4]:
df.isnull().sum()

Unnamed: 0,0
survived,0
pclass,0
sex,0
age,177
fare,0
embarked,2


In [5]:
# Filling missing age value with median of age
median_age = df['age'].median()
df['age'] = df['age'].fillna(median_age)

# Filling missing embarked value with mode
mode_embarked = df['embarked'].mode()[0]
df['embarked'] = df['embarked'].fillna(mode_embarked)

## 1.2) Encoding categorical features

- sex → binary encoding (male=0, female=1)

- embarked → one-hot encoding (C, Q, S → separate columns)

In [6]:
# Binary encoding for sex
df['sex'] = df['sex'].map({'male': 0, 'female': 1})

# One-hot encoding for embarked
df = pd.get_dummies(df, columns=['embarked'], dtype = int)

df.head()

Unnamed: 0,survived,pclass,sex,age,fare,embarked_C,embarked_Q,embarked_S
0,0,3,0,22.0,7.25,0,0,1
1,1,1,1,38.0,71.2833,1,0,0
2,1,3,1,26.0,7.925,0,0,1
3,1,1,1,35.0,53.1,0,0,1
4,0,3,0,35.0,8.05,0,0,1


## 1.3) Train test split

In [7]:
# Defining X(input) and Y(output)

x = df.drop('survived', axis=1)
y = df['survived']

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

## 1.4) Scaling age & fare column

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train[['age', 'fare']] = scaler.fit_transform(x_train[['age', 'fare']])
x_test[['age', 'fare']] = scaler.transform(x_test[['age', 'fare']])

x_train.head()

Unnamed: 0,pclass,sex,age,fare,embarked_C,embarked_Q,embarked_S
331,1,0,1.253641,-0.078684,0,0,1
733,2,0,-0.477284,-0.377145,0,0,1
382,3,0,0.215086,-0.474867,0,0,1
704,3,0,-0.246494,-0.47623,0,0,1
813,3,1,-1.785093,-0.025249,0,0,1


# 2) Converting Numpy arrays to PyTroch tensors

In [10]:
# Converting DataFrames to NumPy arrays
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

# Converting numpy arrays to tensors
x_train = torch.from_numpy(x_train).float()
x_test = torch.from_numpy(x_test).float()
y_train = torch.from_numpy(y_train).long()                       # Labels are categorial, that's why we use long(integer)
y_test = torch.from_numpy(y_test).long()

print(type(x_train))
print(type(y_train))
print(type(x_test))
print(type(y_test))

<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>


In [11]:
# First 5 data points
print(x_train[:5])
print(y_train[:5])

tensor([[ 1.0000,  0.0000,  1.2536, -0.0787,  0.0000,  0.0000,  1.0000],
        [ 2.0000,  0.0000, -0.4773, -0.3771,  0.0000,  0.0000,  1.0000],
        [ 3.0000,  0.0000,  0.2151, -0.4749,  0.0000,  0.0000,  1.0000],
        [ 3.0000,  0.0000, -0.2465, -0.4762,  0.0000,  0.0000,  1.0000],
        [ 3.0000,  1.0000, -1.7851, -0.0252,  0.0000,  0.0000,  1.0000]])
tensor([0, 0, 0, 0, 0])


# 3) Defining the perceptron model

In [12]:
class perceptron():

  def __init__(self, input_data):

    intput_feature_count = input_data.shape[1]                               # Number of columns in the input data

    self.weight = torch.rand(intput_feature_count, requires_grad = True)     # Number of weights = Number of i/p feature
    self.bias = torch.rand(1, requires_grad = True)                          # 1 bias in 1 perceptron

  def forward(self, input_data):

    z = torch.matmul(input_data, self.weight) + self.bias                    # weighted sum = Weight*input_feature + bias
    y_pred = torch.sigmoid(z)                                                 # Sigmoid activation function
    return y_pred

  def loss_function(self, y_pred, y):                                         # Loss function: Binary Cross Entropy

    # Clamping(restricting value in a certain range) prediction to avoid log(0)
    epsilon = 1e-7
    min = epsilon
    max = 1 - epsilon
    y_pred = torch.clamp(y_pred, min, max)

    # Calculating loss
    loss = -(y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred)).mean()     # BCE = -[ylog(y_pred) + (1 - y)log(1 - y_pred)]. We take mean of all loss, because we
    return loss                                                                  # want to return scalar value when called .backward(). And we need to give scalar beacuse
                                                                                 # PyTorch doesn’t know how to backpropagate unless you give it a single scalar value.

In [13]:
# Model Paramameters:
lr = 0.001
num_epoch = 25

# 4) Training model on the dataset(Training pipeline)

In [14]:
# Create a model
model = perceptron(x_train)

# Trainin model
for epoch in range(num_epoch):

  # Forward pass
  y_pred = model.forward(x_train)

  # Loss calculation
  loss = model.loss_function(y_pred, y_train)

  # Backward pass(using Atograd)
  loss.backward()

  # Weights update
  with torch.no_grad():                                   # If torch.no_grad() is not used then PyTorch would start tracking this subtraction as part of the graph —
                                                          # meaning in the next .backward(), gradients would try to flow through the update itself, which is wrong.
    model.weight -= lr * model.weight.grad
    model.bias -= lr * model.bias.grad

  # Zeroing gradients(Reseting gradients for next epoch's backward pass)
  model.weight.grad.zero_()
  model.bias.grad.zero_()

  # print loss in each epoch
  print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.7997661232948303
Epoch: 2, Loss: 0.7991362810134888
Epoch: 3, Loss: 0.7985085248947144
Epoch: 4, Loss: 0.7978827953338623
Epoch: 5, Loss: 0.7972591519355774
Epoch: 6, Loss: 0.7966373562812805
Epoch: 7, Loss: 0.7960177063941956
Epoch: 8, Loss: 0.7954000234603882
Epoch: 9, Loss: 0.7947843074798584
Epoch: 10, Loss: 0.794170618057251
Epoch: 11, Loss: 0.7935588955879211
Epoch: 12, Loss: 0.7929491996765137
Epoch: 13, Loss: 0.792341411113739
Epoch: 14, Loss: 0.7917357087135315
Epoch: 15, Loss: 0.791131854057312
Epoch: 16, Loss: 0.7905300259590149
Epoch: 17, Loss: 0.7899301052093506
Epoch: 18, Loss: 0.7893320918083191
Epoch: 19, Loss: 0.7887360453605652
Epoch: 20, Loss: 0.7881420254707336
Epoch: 21, Loss: 0.7875497937202454
Epoch: 22, Loss: 0.7869595289230347
Epoch: 23, Loss: 0.7863712310791016
Epoch: 24, Loss: 0.7857848405838013
Epoch: 25, Loss: 0.785200297832489


In [15]:
# Looking at the updates parameters:
print(model.weight)
print(model.bias)

tensor([0.1673, 0.5220, 0.5219, 0.8244, 0.2454, 0.1001, 0.1048],
       requires_grad=True)
tensor([0.0091], requires_grad=True)


# 5) Model Evaluation

In [16]:
with torch.no_grad():
  # Predicting values on testing data
  y_test_pred = model.forward(x_test)
  print(y_test_pred)

  # Converting probability values to predicted class
  y_pred = (y_test_pred > 0.8).float()
  print(y_pred)

tensor([0.6065, 0.5424, 0.4637, 0.5113, 0.5815, 0.8036, 0.6661, 0.4636, 0.5520,
        0.5728, 0.7150, 0.6943, 0.7264, 0.5952, 0.6012, 0.5942, 0.7525, 0.6663,
        0.5123, 0.7865, 0.5037, 0.5934, 0.5831, 0.4689, 0.5696, 0.4428, 0.6852,
        0.5522, 0.3845, 0.6936, 0.4537, 0.6116, 0.5850, 0.6664, 0.4443, 0.5939,
        0.5693, 0.6661, 0.7212, 0.5437, 0.6832, 0.5504, 0.5444, 0.5420, 0.8354,
        0.5285, 0.4547, 0.4989, 0.5004, 0.9458, 0.5237, 0.9362, 0.7261, 0.7187,
        0.8394, 0.7503, 0.5523, 0.9945, 0.7348, 0.5820, 0.4731, 0.5280, 0.6937,
        0.5317, 0.5420, 0.8502, 0.6832, 0.5536, 0.4660, 0.9298, 0.6764, 0.9844,
        0.5627, 0.8947, 0.4935, 0.6498, 0.6880, 0.7157, 0.7335, 0.8045, 0.5494,
        0.6483, 0.9523, 0.5420, 0.8127, 0.9144, 0.9086, 0.7961, 0.5292, 0.5412,
        0.4045, 0.5758, 0.8254, 0.5419, 0.5437, 0.5436, 0.7931, 0.7028, 0.7176,
        0.5338, 0.8283, 0.6093, 0.8119, 0.6373, 0.5691, 0.5133, 0.8184, 0.7483,
        0.5698, 0.6964, 0.7433, 0.4661, 

In [17]:
# Accuracy
num_correct_pred = (y_pred == y_test).sum().item()
accuracy = num_correct_pred / len(y_test)

print("Test Accuracy:", accuracy * 100, "%")

Test Accuracy: 60.33519553072626 %
