In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from google.colab import drive

In [2]:
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [3]:
# Load the iris dataset
data = load_iris()
X = data['data']
T = data['target']

def one_hot_encoding(targets):
    # Get unique categories
    categories = np.unique(targets)
    # print('categories:',categories)
    # Get the number of categories
    number_of_categories = categories.size
    # Initialize an array to store the encoded target values
    encoded_T = np.array([list(np.zeros(number_of_categories, dtype=int)) for i in range(targets.size)])

    # Iterate through each target value and encode it
    for (i, t) in enumerate(targets):
        # Get the index of the category in the unique categories array
        category_index = list(categories).index(t)
        # Set the corresponding element in the encoded target array to 1
        encoded_T[i][category_index] = 1
    return encoded_T

In [4]:
XNAME = 'x1'; XLABEL = r'$x_1$'
YNAME = 'x2'; YLABEL = r'$x_2$'
RANGE = (-6, 6); STEP = 0.1
def plot_scatter(sig, bkg, ds=None, xname=XNAME, xlabel=XLABEL, yname=YNAME, ylabel=YLABEL, range=RANGE, step=STEP, title="Scatter plot"):

  fig, ax = plt.subplots()

  # Decision surface
  if ds:
    (xx, yy, Z) = ds # unpack contour data
    cs = plt.contourf(xx, yy, Z, levels=[0,0.5,1], colors=['orange','dodgerblue'], alpha=0.3)

  # Scatter signal and background:
  ax.scatter(sig[xname], sig[yname], marker='o', s=10, c='dodgerblue', alpha=1, label='Positive class')
  ax.scatter(bkg[xname], bkg[yname], marker='o', s=10, c='orange',     alpha=1, label='Negative class')

  # Axes, legend and plot:
  ax.set_xlim(range); ax.set_xlabel(xlabel)
  ax.set_ylim(range); ax.set_ylabel(ylabel)

  ax.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", frameon=False)
  ax.set_title(title)
  plt.show()

## Weighted Sum function

In [5]:
def weighted_sum(x, w, b):
    """
    Compute the weighted sum of inputs.

    Parameters:
    x (numpy.ndarray): Input data.
    w (numpy.ndarray): Weights matrix.
    b (numpy.ndarray): Bias vector.

    Returns:
    numpy.ndarray or None: Weighted sum of inputs, or None if an error occurs.
    """
    try:
        # print(w.shape,x.shape)
        w = w.transpose()
        # print(w.shape,x.shape)
        z = np.dot(w, x) + b

        return z
    except Exception as e:
        # Handle exceptions, if any, and print an error message.
        print("An error occurred:", e)
        return


## Activation functions

In [6]:
def tanh(z):
    """
    Compute the hyperbolic tangent function.

    Parameters:
    z (numpy.ndarray): Input array.

    Returns:
    numpy.ndarray: Output of the hyperbolic tangent function.
    """
    return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))

def sigmoid(z):
    """
    Compute the sigmoid function.

    Parameters:
    z (numpy.ndarray): Input array.

    Returns:
    numpy.ndarray: Output of the sigmoid function.
    """
    # print('z received',z)
    return 1 / (1 + np.exp(-z))

def relu(z):
    """
    Compute the ReLU (Rectified Linear Unit) function.

    Parameters:
    z (numpy.ndarray): Input array.

    Returns:
    numpy.ndarray: Output of the ReLU function.
    """
    z[z <= 0] = 0
    return z

def softmax(z):
    exp_z=np.exp(z)
    return  exp_z/ np.sum(exp_z)

## Get activation function

In [7]:
def feedforward_activation(input_x, weights, biases, activation='sigmoid'):
    """
    Perform a feedforward operation with an optional activation function.

    Parameters:
    input_x (numpy.ndarray): Input data.
    weights (numpy.ndarray): Weights for the feedforward operation.
    biases (numpy.ndarray): Biases for the feedforward operation.
    activation (str): Activation function to use ('sigmoid', 'tanh', or 'relu'). Default is 'sigmoid'.

    Returns:
    numpy.ndarray: Output of the feedforward operation with optional activation.
    """
    z = weighted_sum(input_x, weights, biases)
    # print('z',z)
    if activation == 'tanh':
        nodes = tanh(z)
    elif activation == 'sigmoid':
        nodes = sigmoid(z)
    elif activation == 'relu':
        nodes = relu(z)
    elif activation=='softmax':

          nodes =softmax(z)
    return nodes,z


## Cost function

In [8]:
def cost(y,t):
   return np.sum((y-t)**2)/2

## Output layer delta function

In [9]:
def delta_n(a_n,t_n):
     return (a_n-t_n)*a_n*(1-a_n)

## Hidden layer m delta function

In [10]:
def delta_m(a_m,w,s_n):

    try:
        # print(w.shape,s_n.shape)
        # w = w.transpose()
        # print(w.shape,s_n.shape)

        return np.dot(w,s_n)
    except Exception as e:
        # Handle exceptions, if any, and print an error message.
        print("An error occurred:", e)
        return


## Derivative functions

In [11]:
def tanh_derivative(z):
    return 1 - np.tanh(z)**2

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

def relu_derivative(z):
    return np.where(z < 0, 0, 1)

def softmax_derivative(z):

    return  softmax_derivative(z)*(1-softmax_derivative(z))
def cost_derivative():
    print()

## Get prime function

In [12]:
def get_prime(z_n,func_name='sigmoid'):

    if func_name == 'tanh':
        derivative_z = tanh_derivative(z_n)
    elif func_name == 'sigmoid':
        derivative_z = sigmoid_derivative(z_n)
    elif func_name == 'relu':
        derivative_z = relu_derivative(z_n)
    elif func_name=='softmax':
          derivative_z =softmax_derivative(z_n)
    return derivative_z


## Feedforward function

In [13]:
def feedforward(input_x, weights, biases,number_of_layers_excluding_input=1, hidden_layer_activation='sigmoid',
                output_activation='sigmoid'):
    """
    Perform a feedforward operation through the neural network.

    Parameters:
    input_x (numpy.ndarray): Input data.
    weights (list of numpy.ndarray): List of weight matrices for each layer.
    biases (list of numpy.ndarray): List of bias vectors for each layer.
    hidden_layer_activation (str): Activation function for hidden layers. Default is 'sigmoid'.
    output_activation (str): Activation function for the output layer. Default is 'sigmoid'.
    number_of_layers_excluding_input (int): Number of layers excluding input in the network. Default is 1.
    """
    print()
    # List to store the outputs of each layer
    a0,z0=feedforward_activation(input_x[0], weights[0], biases[0], hidden_layer_activation)
    layer_outputs = [a0]
    weight_sums=[z0]

    # Loop through each hidden layer
    if number_of_layers_excluding_input > 1:
        for num_layer in range(1, number_of_layers_excluding_input):

            # If not the last hidden layer
            if num_layer != number_of_layers_excluding_input-1:

                # Perform feedforward with hidden layer activation
                a,z=feedforward_activation(layer_outputs[-1], weights[num_layer], biases[num_layer], hidden_layer_activation)
                layer_outputs.append(a)
                weight_sums.append(z)

            else:
                # Perform feedforward with output layer activation for the last layer
                # print('last layer num=',num_layer)
                a,z=feedforward_activation(layer_outputs[-1], weights[num_layer], biases[num_layer], output_activation)
                layer_outputs.append(a)
                weight_sums.append(z)

    return layer_outputs,weight_sums

Create a neural network with the required number of layers
and the required number of nodes in each layer.
Create matrices for the edge weights between each pair of successive layers
and fill the matrices with random values.
Create arrays for the bias values for each layer and fill with random values.
Choose an activation function for each layer.
(Usually the random values are chosen from a fixed range such as [−1, 1].)
(We use W to denote the current values of edge weights and biases.)
Choose a learning rate η (usually between 0.001 and 0.1).

The number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer.

In [14]:
range(5)[::-1]

range(4, -1, -1)

In [15]:
def neural_training(input_x,T,number_of_nodes_per_hidden_layer=[],learning_rate=0.01,hidden_layer_activation='sigmoid',output_activation='sigmoid',nodes_inc=3 ):
    number_of_features=input_x[0].size

    if len(number_of_nodes_per_hidden_layer)==0:
          number_of_layers_excluding_input=int(number_of_features*2/3)+1
          number_of_nodes_per_hidden_layer=[number_of_features+nodes_inc for num in range(number_of_layers_excluding_input-1) ]
    else:
          number_of_layers_excluding_input=len(number_of_nodes_per_hidden_layer)+1
    # print(T)
    number_of_nodes_per_hidden_layer.append(len(T[0]))
    print('len(T[0]=',len(T[0]))

    print('number_of_nodes_per_hidden_layer+otput: ',number_of_nodes_per_hidden_layer)
    print('number_of_nodes_per_hidden_layer: ',number_of_nodes_per_hidden_layer[:-1])
    print('number of layers in the network: ',len(number_of_nodes_per_hidden_layer)+1)
    w=[np.random.uniform(low=-1, high=1, size=(number_of_features,number_of_nodes_per_hidden_layer[0]))]

    b=np.array([np.random.uniform(low=-1, high=1) for num in range(number_of_layers_excluding_input)])
    for i in range(0,number_of_layers_excluding_input-1):
        w.append(np.random.uniform(low=-1, high=1, size=(number_of_nodes_per_hidden_layer[i],number_of_nodes_per_hidden_layer[i+1])))

    # w=np.array(w)
    # b=np.array(b)
    print('w=',w)
    print('b',b)
    # Create a dataset array combining inputs and targets
    D = np.array(list(zip(input_x, T)), dtype=object)
    epoch=0
    while True:
       # Shuffle the dataset
        np.random.shuffle(D)

        # Lists to store predicted and actual targets
        pred_target = []

        for (x,t) in D:
               # FORWARD PROPAGATION
                output_activations,z=feedforward([x], w, b,number_of_layers_excluding_input, hidden_layer_activation=hidden_layer_activation,output_activation=output_activation)
                pred_target.append(output_activations[-1])
                errors = []
                # for each output node n, where an is the output value, compute δn = (an−tn)an(1−an)
                s_n=delta_n(output_activations[-1],t)
                errors.append(s_n)
                for (a_m,w_mn) in zip(output_activations[:-1][::-1],w[::-1]):

                    s_n=errors[-1]
                    s_m=delta_m(a_m,w_mn,s_n)
                    errors.append(s_m)
                for (a_m,w_mn,s_n) in zip(output_activations[:-1][::-1],w,b):
                    # w=w-learning_rate*s_n*a_m
                    # print('s_n*a_m=',s_n*a_m)
                    b=b-learning_rate*s_n
                    print(b)
                epoch+=1
                if epoch>=1:
                  break
        print('b updte=',b)
        break

    return errors


In [16]:
input_X=data['data']
targets=one_hot_encoding(T)

neural_training(input_X,targets,number_of_nodes_per_hidden_layer=[5,4,3],learning_rate=0.01,hidden_layer_activation='sigmoid',output_activation='sigmoid')
# a1,z1=neural_training(np.array([[5.1,3.5,1.4,0.2]]),targets,number_of_nodes_per_hidden_layer=[5,6,6,5,4],learning_rate=0.01,hidden_layer_activation='softmax',output_activation='softmax')


len(T[0]= 3
number_of_nodes_per_hidden_layer+otput:  [5, 4, 3, 3]
number_of_nodes_per_hidden_layer:  [5, 4, 3]
number of layers in the network:  5
w= [array([[-0.17362962, -0.82801628,  0.88304859, -0.7044858 , -0.32727179],
       [-0.51354489, -0.58427348, -0.41898675,  0.55431867, -0.97566944],
       [-0.35062456, -0.07155512,  0.20984045, -0.19132949, -0.38126885],
       [-0.36878893, -0.19485914, -0.95727087,  0.86563991, -0.43248876]]), array([[ 0.00847081,  0.88457307, -0.43434259,  0.12875564],
       [ 0.46395141,  0.26954269, -0.8733722 ,  0.74448716],
       [-0.53202787,  0.81309421,  0.69986955, -0.13916759],
       [-0.29134085,  0.92243092, -0.03823083, -0.47824586],
       [ 0.85570754, -0.52028646, -0.36633767, -0.91428207]]), array([[ 0.39914691, -0.07756788, -0.22790611],
       [ 0.69880544,  0.85205137,  0.10957426],
       [ 0.61682413, -0.93697394, -0.59480784],
       [ 0.66705151, -0.32789738, -0.11406423]]), array([[-0.2550513 ,  0.13946419, -0.43523986],
  

[array([-0.0678013 ,  0.14639324,  0.1476967 ]),
 array([-0.02657407, -0.06185051,  0.04540119]),
 array([-0.01615655, -0.06629511,  0.01455581, -0.0026243 ]),
 array([-0.06543983, -0.04003161, -0.03475605, -0.05574701,  0.01773418])]

In [17]:
# # Specifying the file path where the dataset is stored on Google Drive
# file_path = '/content/gdrive/MyDrive/MSc e-Science/ACML/datasets/xor_data_train.csv'
# file_path_one = '/content/gdrive/MyDrive/MSc e-Science/ACML/datasets/xor_data_test.csv'
# # Reading the dataset from the specified csv file into a Pandas DataFrame
# train_data = pd.read_csv(file_path)
# test_data= pd.read_csv(file_path_one)
# # Create separate dataframes for signal and background
# sig = pd.concat([train_data[train_data['class'] == 1], test_data[test_data['class'] == 1]])
# bkg = pd.concat([train_data[train_data['class'] == 0], test_data[test_data['class'] == 0]])
# T1=np.reshape(train_data['class'].to_numpy(),(320,1))
# neural_training(train_data[['x1','x2']].to_numpy(),T1,learning_rate=0.01,hidden_layer_activation='sigmoid',output_activation='sigmoid',nodes_inc=3 )