In [None]:
# import relevant libraries
import torch
from torch import nn
import numpy as np

In [None]:
# set the but function 
class BTU(torch.nn.Module):
  def __init__(self, T=0.2):
      super(BTU, self).__init__()
      self.T = T    ## for slop control

  def forward(self, input: torch.Tensor) -> torch.Tensor:
      return 1 / (1 + torch.exp(-input/self.T))

In [None]:
# Linear layer for supporting tensors.
# for the linear calculation of each layer, will get the sizes of the mat for calculation
# the same math logic will apply for the output layer and the hidden layer

class Linear2(torch.nn.Module):
  def __init__(self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None) -> None:
    factory_kwargs = {'device': device, 'dtype': dtype}
    super(Linear2, self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.weight = nn.Parameter(torch.empty((in_features, out_features), **factory_kwargs))
    if bias:
        self.bias = nn.Parameter(torch.empty(out_features, **factory_kwargs))
    else:
        self.register_parameter('bias', None)
    self.reset_parameters()

  def reset_parameters(self) -> None:
    self.weight = nn.Parameter(torch.rand([self.in_features, self.out_features]))
    if self.bias is not None:
      self.bias = nn.Parameter(torch.rand([self.out_features]))
    
  def set_weights(self, w, b):
    self.weight = nn.Parameter(torch.tensor(w))
    self.bias = nn.Parameter(torch.tensor(b))
      
  def forward(self, input: torch.Tensor) -> torch.Tensor:
    return torch.matmul(input, self.weight) + self.bias # * is elementwise

  def extra_repr(self) -> str:
    return 'in_features={}, out_features={}, bias={}'.format(
        self.in_features, self.out_features, self.bias is not None
      )
  

In [None]:
###### ex1 input runs ######

# variables 
n = 2
k = [1, 3, 4]  # hidden layer sizes
# set the data
input_dim = n  # input size
out_dim = 1   # output size
bypass = True   # bypass flag
Temp = 0.001  # sigmoid slop control


In [None]:
class Network(nn.Module):
  def __init__(self, num_hidden, bypass=True):    #initialize network
    super().__init__()
    self.bypass = bypass
    self.hidden = Linear2(input_dim, num_hidden)      # set the hidden layer linear func for calculation
    if self.bypass or num_hidden == 1:
      self.bypass = True
      self.output = Linear2(num_hidden + input_dim, out_dim)    # in there is a bypass also consider the inputs on the output calculation, default bypass for k = 1
    else:
      self.output = Linear2(num_hidden, out_dim)      # set the output layer linear func for calculation
    self.BTU = BTU(Temp)                              # give the network the BTU function
  
  def set_weights(self, w, b, layer): # set weights and biases
    try:
      if layer == 'output':
        self.output.set_weights(w, b)
      if layer == 'hidden':
        self.hidden.set_weights(w, b)
    except:
      print("Exception thrown: sizes not matched")    # if given a not valid mat proportions
  def forward(self, input):
    z1 = self.hidden(input)     # activate the func on the input
    y1 = self.BTU(z1)           # then apply btu to the res
    if self.bypass:              # to bypass add the inputs to the output cal
      y1_concat = torch.cat((input, y1), num_hidden)
      print("y1_concat")
      print(y1_concat)
      z2 = self.output(y1_concat)   # cal the output
    else:
      z2 = self.output(y1)  # cal the output 
    return self.BTU(z2)


In [None]:
 ## diff assessment from the  model results vs expeceted results
def Loss(x, t, print_deltas=False):
  squared_deltas = torch.square(my_model(x) - t) # first
  if print_deltas:
    print("the squared_deltas:")
    print(squared_deltas)
  return torch.sum(squared_deltas)    # return the sum of SSE

In [None]:
#2^n input possibilitys
xor_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
for i in k: # create all modles (each with different k's)
  # craete modle
  my_model = Network(i, bypass = False)
  num_hidden = i
  # set weights for the hidden and the output layers
  if num_hidden == 3:
    w = [[-1., 1., 1.], [-1., 1., 1.]]
    b = [0.5, -0.5, -1.5]
    print("hidden layer:")
    print("the weights: ", end = "")
    print(w)
    print("the baises: ", end = "")
    print(b)
    my_model.set_weights(w, b, 'hidden')
    w = [[-2.], [1.],[-2.]]
    b = [-0.5]
    print("output layer:")
    print("the weights: ", end = "")
    print(w)
    print("the baises: ", end = "")
    print(b)
    my_model.set_weights(w, b, 'output')
  elif num_hidden == 1:
    w = [[1.], [1.]]
    b = [-1.5]
    print("hidden layer:")
    print("the weights: ", end = "")
    print(w)
    print("the baises: ", end = "")
    print(b)
    my_model.set_weights(w, b, 'hidden')
    print("output layer:")
    w = [[1.],[1.],[-2.]]
    b = [-0.5]
    print("the weights: ", end = "")
    print(w)
    print("the baises: ", end = "")
    print(b)
    my_model.set_weights(w, b, 'output')
  elif num_hidden == 4:
    w = [[-1., -1., 1., 1.], [-1., 1., -1., 1.]]
    b = [1.5, -0.5, -0.5, -1.5]
    print("hidden layer:")
    print("the weights: ", end = "")
    print(w)
    print("the baises: ", end = "")
    print(b)
    my_model.set_weights(w, b, 'hidden')
    print("output layer:")
    w = [[0.], [1.], [1.], [0.]]
    b = [-0.5]
    print("the weights: ", end = "")
    print(w)
    print("the baises: ", end = "")
    print(b)
    my_model.set_weights(w, b, 'output')
  else:
    print("the k input is not valid...")
    break

  res = my_model(xor_train)
  print("modle with k = " + str(i) + " for input = " , end =" ")
  print(xor_train , end =" ")
  print(": ")
  print("output = " , end =" ")
  print(res)

  t = torch.tensor([[0.], [1.], [1.], [0.]], dtype=torch.float32) 
  print("the expected results for xor:")
  print(t)  
  lost_val = Loss(xor_train, t, True)   # t is the results for a modle for all xor_train inputs
  print("the Loss:")
  print(lost_val)
  print("-----------------------------------------------------")


hidden layer:
the weights: [[1.0], [1.0]]
the baises: [-1.5]
output layer:
the weights: [[1.0], [1.0], [-2.0]]
the baises: [-0.5]
y1_concat
tensor([[0., 0., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 1., 1.]], grad_fn=<CatBackward0>)
modle with k = 1 for input =  tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]]) : 
output =  tensor([[0.],
        [1.],
        [1.],
        [0.]], grad_fn=<MulBackward0>)
the expected results for xor:
tensor([[0.],
        [1.],
        [1.],
        [0.]])
y1_concat
tensor([[0., 0., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 1., 1.]], grad_fn=<CatBackward0>)
the squared_deltas:
tensor([[0.],
        [0.],
        [0.],
        [0.]], grad_fn=<PowBackward0>)
the Loss:
tensor(0., grad_fn=<SumBackward0>)
-----------------------------------------------------
hidden layer:
the weights: [[-1.0, 1.0, 1.0], [-1.0, 1.0, 1.0]]
the baises: [0.5, -0.5, -1.5]
output layer:
the weights: [[-2.0], [1.0], [-2

Summry:

the main idea is the create each layer with the suitble mat proportion so we'll be able to calculate each layer, for this procces we must insert the right data with the right sizes of mats.

In each layer calculation we will get the new mat of values then run  ithem in the btu function so each layer "inputs" and binary.

For cases with bypass the calculation of the output will also include the original inputs of the network as well, and of course this will change the size of the mat in the output calculation.

To prove the networks work the code runs all cases for each network and we compare the each ouput to the expected results in the xor truth table,
furthermore we can see the loss function that does the actual comparison by SSE calculation return a value of zero. 