 Learning Activations in NeuralNetworks


In [1]:
import numpy as np
import pandas as pd

In [2]:
#Load the Dataset
df=pd.read_csv('/content/data (1).csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df=df.isna().sum()
df

id                           0
diagnosis                    0
radius_mean                  0
texture_mean                 0
perimeter_mean               0
area_mean                    0
smoothness_mean              0
compactness_mean             0
concavity_mean               0
concave points_mean          0
symmetry_mean                0
fractal_dimension_mean       0
radius_se                    0
texture_se                   0
perimeter_se                 0
area_se                      0
smoothness_se                0
compactness_se               0
concavity_se                 0
concave points_se            0
symmetry_se                  0
fractal_dimension_se         0
radius_worst                 0
texture_worst                0
perimeter_worst              0
area_worst                   0
smoothness_worst             0
compactness_worst            0
concavity_worst              0
concave points_worst         0
symmetry_worst               0
fractal_dimension_worst      0
Unnamed:

In [4]:
from sklearn.model_selection import train_test_split
x, y = train_test_split(df, test_size=0.2, random_state=42)

In [5]:
def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

In [6]:
# define cross-entropy loss function
def categorical_crossentropy(y_pred, y_true):
    num_samples = y_pred.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / num_samples

In [7]:
# define forward propagation function
def forward_propagation(X, W1, b1, W2, b2):
    # calculate dot product of input and first layer weights, add bias and apply activation function
    z1 = np.dot(X, W1) + b1
    a1 = softmax(z1)
    # calculate dot product of first layer output and second layer weights, add bias and apply softmax activation function
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)
    return a1, a2

In [8]:
def backward_propagation(X, y_true, a1, a2, W2):
    num_samples = X.shape[0]
    
    # calculate gradients for second layer weights and bias
    dZ2 = a2 - y_true
    dW2 = np.dot(a1.T, dZ2) / num_samples
    db2 = np.sum(dZ2, axis=0, keepdims=True) / num_samples
    
    # calculate gradients for first layer weights and bias
    dZ1 = np.dot(dZ2, W2.T) * (a1 * (1 - a1))
    dW1 = np.dot(X.T, dZ1) / num_samples
    db1 = np.sum(dZ1, axis=0, keepdims=True) / num_samples
    
    return dW1, db1, dW2, db2

In [9]:
def train(X, y_true, n_hidden, n_classes, learning_rate, n_epochs):
    # initialize weights and biases
    W1 = np.random.randn(X.shape[1], n_hidden)
    b1 = np.zeros((1, n_hidden))
    W2 = np.random.randn(n_hidden, n_classes)
    b2 = np.zeros((1, n_classes))
    
    # iterate over epochs
    for i in range(n_epochs):
        # perform forward propagation
        a1, a2 = forward_propagation(X, W1, b1, W2, b2)
        
        # calculate loss
        loss = categorical_crossentropy(a2, y_true)
        
        # perform backward propagation
        dW1, db1, dW2, db2 = backward_propagation(X, y_true, a1, a2, W2)
        
        # update weights and biases
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        
        # print loss every 100 epochs
        if i % 100 == 0:
            print(f'Epoch {i}, Loss: {loss:.4f}')

In [15]:
# generate some sample data
X = np.random.randn(100, 10)
y_true = np.random.randint(0, 3, (100, 1))
y_true = np.eye(3)[y_true.reshape(-1)]

# train the neural network
n_hidden = 5
n_classes = 3
learning_rate = 0.01
n_epochs = 500

train(X, y_true, n_hidden, n_classes, learning_rate, n_epochs)

Epoch 0, Loss: 1.2587
Epoch 100, Loss: 1.1907
Epoch 200, Loss: 1.1536
Epoch 300, Loss: 1.1325
Epoch 400, Loss: 1.1193
