In [1]:
import numpy as np
import pandas as pd

In [2]:
# loading the dataset

# since no columns are there in the dataset
columns = [
    "age", "workclass", "fnlwgt", "education", "education-num",
    "marital-status", "occupation", "relationship", "race", "sex",
    "capital-gain", "capital-loss", "hours-per-week", "native-country", "income"
]

train_df = pd.read_csv("FCNN_Lab1_dataset/adult.data",names = columns, sep = ', *')
test_df = pd.read_csv('FCNN_Lab1_dataset/adult.test', names=columns, sep=', *',skiprows=1)

train_df['income'] = train_df['income'].str.replace('.', '', regex=False).str.strip()
test_df['income'] = test_df['income'].str.replace('.', '', regex=False).str.strip()

  train_df = pd.read_csv("FCNN_Lab1_dataset/adult.data",names = columns, sep = ', *')
  test_df = pd.read_csv('FCNN_Lab1_dataset/adult.test', names=columns, sep=', *',skiprows=1)


In [3]:
#dealing with NA values, by filling with Mode

columns_with_na = train_df.columns[train_df.isnull().any()].tolist()

for col in columns_with_na:
    mode_value = train_df[col].mode()[0]
    train_df[col] = train_df[col].fillna(mode_value)
    test_df[col] = test_df[col].fillna(mode_value)

#now separating the output from the input features, in both train and test sets
#train sets
x_t_r = train_df.drop("income",axis=1)
y_t_r = train_df["income"].map({"<=50K":0, ">50K":1})

#test sets
x_te_r = test_df.drop("income",axis =1)
y_te_r = test_df["income"].map({"<=50K":0, ">50K":1})

#one hot encoding
x_train = pd.get_dummies(x_t_r)
x_test = pd.get_dummies(x_te_r)

#handling the case in which if the test set doesnt have a particular feature value the one hot encoded vector wont be aligned with the training data in whihch the particular extra feature did appear.

x_train, x_test = x_train.align(x_test,join='left', axis=1, fill_value=0) # fill in the left values as 0

In [4]:
#converting to np

x_train = x_train.values.astype(np.float32)
y_train = y_t_r.values.astype(np.float32).reshape(-1,1) # reshaping the 1-D vector to a 2-D vector so to calculate the loss yi for each input xi

x_test = x_test.values.astype(np.float32)
y_test = y_te_r.values.astype(np.float32).reshape(-1,1)
print(x_train.shape) # gives the number of input neurons required, here 108 are required



(32561, 108)


## **FCNN implementation**

1. total neurons in input layer = 108 
2. total neurons in 1st hidden layer = 65, total neurons in second hidden layer = 33
3. total neurons in output layer = 1
4. total weights in 1st layer = 108*65 = 7020 + 65 biases = 7085 in layer 1->2
5. total weights and biases in 2nd layer = 65*33 + 33 biases = 2178 in layer 2->3
6. total weights and biases in layer 3-> output = 34
7. total parameters to tune = 9297

In [6]:
def forward_pass(x,theta): # relu for hidden and sigmoid for output
    A1 = np.dot(x,theta["w1"]) + theta["b1"]
    H1 = np.maximum(0,A1) #ReLU activation

    A2 = np.dot(H1,theta["w2"]) + theta["b2"]
    H2 = np.maximum(0,A2)

    A3 = np.dot(H2,theta["w3"]) + theta["b3"]
    A3_clipped = np.clip(A3, -500, 500)
    H3 = 1 / (1 + np.exp(-A3_clipped))

    cache = {"A1":A1, "H1":H1, "A2":A2, "H2":H2, "A3":A3, "H3":H3}
    return cache


In [7]:
def backpropagation(x,y,cache,theta):
    # compute the derivative of loss fn wrt output unit (y_exp (is just H3) then aL), the required loss function is the cross-entropy loss
    dA3 = cache["H3"] - y
    # now we compute the grad wrt w3 and b3 (before out layer)
    m = x.shape[0]
    dw3 = (1/m)*(np.dot(cache["H2"].T,dA3))
    db3 = (1/m)*np.sum(dA3, axis=0, keepdims=True)

    dH2 = np.dot(dA3,theta["w3"].T)
    dA2 = np.array(dH2, copy=True)
    dA2[cache["A2"] <= 0] = 0 # if the contribution is less than 0 deriv is 0 other wise 1 so 1*dh2

    dw2 = (1/m)*(np.dot(cache["H1"].T,dA2))
    db2 = (1/m)*np.sum(dA2, axis=0, keepdims=True)

    dH1 = np.dot(dA2,theta["w2"].T)
    dA1 = np.array(dH1, copy=True)
    dA1[cache["A1"] <= 0] = 0

    dw1 = (1/m)*(np.dot(x.T,dA1))
    db1 = (1/m)*np.sum(dA1, axis=0, keepdims=True)

    grad = {"dw1": dw1, "db1": db1,
    "dw2": dw2, "db2": db2,
    "dw3": dw3, "db3": db3}
    return grad

In [8]:
def update(theta, grad, neta = 0.01):
    theta["w1"] -= neta * grad["dw1"]
    theta["b1"] -= neta * grad["db1"]
    
    theta["w2"] -= neta * grad["dw2"]
    theta["b2"] -= neta * grad["db2"]
    
    theta["w3"] -= neta * grad["dw3"]
    theta["b3"] -= neta * grad["db3"]
    
    return theta

In [None]:
def Loss_calc(y,H3): #calculate the cross entropy loss (as we are focusing on binary classification)
    m = y.shape[0] #total samples
    corr = 1e-8
    H3 = np.clip(H3, corr, 1 - corr) # avoiding the log(0) and log(1) cases
    
    # weighted Cross entropy loss
    loss = - (1/m)*np.sum(y * np.log(H3) + 3*(1 - y)*np.log(1 - H3))
    return loss

In [24]:
def SGD(ep):
    # layer 1 
    w1 = np.random.randn(108,65)*np.sqrt(2/108) #He-initialization of weights
    b1 = np.zeros((1,65))
    # layer 2
    w2 = np.random.randn(65,33)*np.sqrt(2/65)
    b2 = np.zeros((1,33)) 
    # layer 3
    w3 = np.random.randn(33,1)*np.sqrt(2/33) 
    b3 = np.zeros((1,1))
    # applying the forwardPass algorithm with backprop for learning the weights
    theta = {"w1":w1, "b1":b1, "w2":w2, "b2":b2, "w3":w3, "b3":b3}
    iter = 0
    neta = 0.01
    for epoch in range(ep): # outer loop for training different epochs, to stop the network from memorising the training data
        ind = np.arange(x_train.shape[0])
        np.random.shuffle(ind)
        x_shf = x_train[ind]
        y_shf = y_train[ind]
        # inner loop for minibatch grad descent
        for i in range(0,x_shf.shape[0],128):
            x_b = x_shf[i:i+128]
            y_b = y_shf[i:i+128]

            cache = forward_pass(x_b,theta) #cache is the activations + pre-activations
            grad = backpropagation(x_b, y_b, cache, theta)
            theta = update(theta,grad,neta)
            if iter%100 == 0:
            # Calculate current loss on the current batch
                current_loss = Loss_calc(y_b, cache['H3'])
                print(f"Iteration {iter} | Loss: {current_loss:.3f}")
            iter+=1
        neta = neta*0.97
    return theta

In [None]:
# main

theta = SGD(50)
def evaluate_metrics(x_test, y_test, theta):
    # 1. Get predictions
    cache = forward_pass(x_test, theta)
    # Convert probabilities to binary 0 or 1
    predictions = (cache["H3"] > 0.5).astype(int)
    
    # 2. Basic Accuracy
    accuracy = np.mean(predictions == y_test) * 100
    tp = np.sum((predictions == 1) & (y_test == 1))
    
    tn = np.sum((predictions == 0) & (y_test == 0))
    fp = np.sum((predictions == 1) & (y_test == 0))
    fn = np.sum((predictions == 0) & (y_test == 1))
    
    precision = tp / (tp + fp + 1e-15) 
    recall = tp / (tp + fn + 1e-15)
    f1_score = 2 * (precision * recall) / (precision + recall + 1e-15)
    
    # Print a clean report
    print("--- Model Performance Report ---")
    print(f"Accuracy:  {accuracy:.2f}%")
    print(f"Precision: {precision:.4f} (How many predicted high-earners actually are)")
    print(f"Recall:    {recall:.4f} (How many actual high-earners were caught)")
    print(f"F1-Score:  {f1_score:.4f} (Balanced harmonic mean)")
    print("--------------------------------")
    
    return {"acc": accuracy, "p": precision, "r": recall, "f1": f1_score}

# Usage:
results = evaluate_metrics(x_test, y_test, theta)

Iteration 0 | Loss: 41.447
Iteration 100 | Loss: 1.558
Iteration 200 | Loss: 1.382
Iteration 300 | Loss: 1.272
Iteration 400 | Loss: 1.206
Iteration 500 | Loss: 1.152
Iteration 600 | Loss: 1.105
Iteration 700 | Loss: 1.091
Iteration 800 | Loss: 1.055
Iteration 900 | Loss: 1.036
Iteration 1000 | Loss: 1.040
Iteration 1100 | Loss: 1.029
Iteration 1200 | Loss: 1.000
Iteration 1300 | Loss: 1.003
Iteration 1400 | Loss: 0.993
Iteration 1500 | Loss: 0.951
Iteration 1600 | Loss: 1.010
Iteration 1700 | Loss: 0.969
Iteration 1800 | Loss: 1.003
Iteration 1900 | Loss: 0.977
Iteration 2000 | Loss: 0.960
Iteration 2100 | Loss: 0.972
Iteration 2200 | Loss: 0.934
Iteration 2300 | Loss: 0.966
Iteration 2400 | Loss: 0.944
Iteration 2500 | Loss: 0.951
Iteration 2600 | Loss: 1.014
Iteration 2700 | Loss: 0.966
Iteration 2800 | Loss: 0.934
Iteration 2900 | Loss: 1.020
Iteration 3000 | Loss: 0.971
Iteration 3100 | Loss: 0.961
Iteration 3200 | Loss: 0.956
Iteration 3300 | Loss: 0.960
Iteration 3400 | Loss: 0.

In [26]:
def min_max_scaling(df):
    # Identify numerical columns (exclude one-hot encoded ones)
    num_cols = ["age", "fnlwgt", "education-num", "capital-gain", "capital-loss", "hours-per-week"]
    
    # Apply Min-Max formula: (x - min) / (max - min) 
    for col in num_cols:
        col_min = df[col].min()
        col_max = df[col].max()
        df[col] = (df[col] - col_min) / (col_max - col_min)
    return df

x_t_r_scaled = min_max_scaling(x_t_r.copy())
x_te_r_scaled = min_max_scaling(x_te_r.copy())

x_train_scaled = pd.get_dummies(x_t_r_scaled)
x_test_scaled = pd.get_dummies(x_te_r_scaled)
x_train_scaled, x_test_scaled = x_train_scaled.align(x_test_scaled, join='left', axis=1, fill_value=0)

x_train = x_train_scaled.values.astype(np.float32)
x_test = x_test_scaled.values.astype(np.float32)
theta_scaled = SGD(50)

print("--- Results with Min-Max Scaling ---")
evaluate_metrics(x_test, y_test, theta_scaled)

Iteration 0 | Loss: 1.418
Iteration 100 | Loss: 1.039
Iteration 200 | Loss: 0.986
Iteration 300 | Loss: 0.873
Iteration 400 | Loss: 0.835
Iteration 500 | Loss: 0.858
Iteration 600 | Loss: 0.852
Iteration 700 | Loss: 0.706
Iteration 800 | Loss: 0.781
Iteration 900 | Loss: 0.759
Iteration 1000 | Loss: 0.705
Iteration 1100 | Loss: 0.771
Iteration 1200 | Loss: 0.677
Iteration 1300 | Loss: 0.652
Iteration 1400 | Loss: 0.630
Iteration 1500 | Loss: 0.699
Iteration 1600 | Loss: 0.645
Iteration 1700 | Loss: 0.733
Iteration 1800 | Loss: 0.604
Iteration 1900 | Loss: 0.714
Iteration 2000 | Loss: 0.619
Iteration 2100 | Loss: 0.666
Iteration 2200 | Loss: 0.610
Iteration 2300 | Loss: 0.599
Iteration 2400 | Loss: 0.832
Iteration 2500 | Loss: 0.662
Iteration 2600 | Loss: 0.813
Iteration 2700 | Loss: 0.600
Iteration 2800 | Loss: 0.548
Iteration 2900 | Loss: 0.783
Iteration 3000 | Loss: 0.690
Iteration 3100 | Loss: 0.703
Iteration 3200 | Loss: 0.677
Iteration 3300 | Loss: 0.716
Iteration 3400 | Loss: 0.6

{'acc': 84.1901603095633,
 'p': 0.7101784534038335,
 'r': 0.5587623504940198,
 'f1': 0.6254365541327118}