<a href="https://colab.research.google.com/github/Asnvir/ANN/blob/main/ANN_HW_2_Artur_Kopytin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import packages


In [58]:
%matplotlib inline
import torch 
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


#Device

In [59]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device - {device}")

Device - cpu


#Linear class declaration

In [60]:
class Linear(torch.nn.Module):
  def __init__(self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None) -> None:
    factory_kwargs = {'device': device, 'dtype': dtype}
    super(Linear, self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.weight = nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs))
    if bias:
        self.bias = nn.Parameter(torch.empty(out_features, **factory_kwargs))
    else:
        self.register_parameter('bias', None)
    self.reset_parameters()

  def reset_parameters(self) -> None:
    self.weight = nn.Parameter(torch.rand([self.out_features, self.in_features]))
    if self.bias is not None:
      self.bias = nn.Parameter(torch.rand([self.out_features]))

  def forward(self, input: torch.Tensor) -> torch.Tensor:
    return torch.matmul(input, torch.transpose(self.weight,0,1)) + self.bias

  def extra_repr(self) -> str:
    return 'in_features={}, out_features={}, bias={}'.format(self.in_features, self.out_features, self.bias is not None)

#Init variables

In [61]:
general_mean_epocs =[]
general_std_epocs =[]
output_hidden_neyron = []

dim = 2
out_dim = 1
l_rate = 0.01


x_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], requires_grad=True, dtype=torch.float32)
x_addition = torch.tensor([[1, 0.1], [1, 0.9], [0.9, 0.9], [0.1, 0.9]], requires_grad=True, dtype=torch.float32)
x_validation = torch.cat((x_train, x_addition))
print(f"x_train: \n{x_train}\n--------------------------------------------------------\n")
print(f"x_validation: \n{x_validation}\n--------------------------------------------------------\n")

t_train = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)
t_addition = torch.tensor([[1], [0], [0], [1]], dtype=torch.float32)
t_validation = torch.cat((t_train,t_addition))
print(f"t_train: \n{t_train}\n--------------------------------------------------------\n")
print(f"t_validation: \n{t_validation}\n--------------------------------------------------------\n")



x_train: 
tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]], requires_grad=True)
--------------------------------------------------------

x_validation: 
tensor([[0.0000, 0.0000],
        [0.0000, 1.0000],
        [1.0000, 0.0000],
        [1.0000, 1.0000],
        [1.0000, 0.1000],
        [1.0000, 0.9000],
        [0.9000, 0.9000],
        [0.1000, 0.9000]], grad_fn=<CatBackward0>)
--------------------------------------------------------

t_train: 
tensor([[0.],
        [1.],
        [1.],
        [0.]])
--------------------------------------------------------

t_validation: 
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.]])
--------------------------------------------------------



#Network class (XOR) declaration

In [62]:
class XOR_Net_Model(nn.Module):
  def __init__(self,num_hidden = 4, bypass = True):
    super().__init__()
    self.bypass = bypass
    self.hidden = Linear(dim, num_hidden)
    if self.bypass:
      self.output = Linear(num_hidden + dim, out_dim)
    else:
      self.output = Linear(num_hidden, out_dim)
    self.BTU = BTU(0.5)

  def forward(self, input):
    z1 = self.hidden(input)
    y1 = self.BTU(z1)
    if( torch.equal(input,x_train)):
      self.y1 = y1
    
    if self.bypass:
      y1_concat = torch.cat((input, y1), 1)
      z2 = self.output(y1_concat)
    else:
      z2 = self.output(y1)
    return self.BTU(z2)

#BTU func declaration


In [63]:
class BTU(torch.nn.Module):
  def __init__(self, T=0.2, inplace: bool = False):
      super(BTU, self).__init__()
      self.T = T

  def forward(self, input: torch.Tensor) -> torch.Tensor:
      return 1 / (1 + torch.exp(-input/self.T))

# Cross Entropy loss function declaration

In [64]:
def Loss(out, t_train):
  return -torch.sum(t_train * torch.log(out) + (1.0 - t_train) * torch.log(1.0 - out))  # Cross Entropy loss function

#Train func declaration

In [65]:
def train(model, x_train, t_train, optimizer):
  y_pred = model(x_train)
  loss = Loss(y_pred, t_train)

  # zero gradients berfore running the backward pass
  optimizer.zero_grad()

  # backward pass to compute the gradient of loss
  # backprop + accumulate 
  loss.backward()

  # update params
  optimizer.step()
  return loss

#Test func declaration

In [66]:
# define test step operation:
def test(model, x_test, t_test):
  loss = Loss(model(x_test), t_test)
  return loss

#Executing train on XOR model

In [67]:
def triesFunc(general_mean_epocs, general_std_epocs, l_rate,num_hidden, bypass):
  
  validation_boundary = 0.2
  max_validation_loss_improve = 0.0001
  max_num_good_epocs = 10
  max_num_epocs = 40000
  num_epocs_to_run = 40001
  num_of_good_tries = 10


  counter_of_good_tries = 0
  counter_of_bad_tries = 0

  arr_num_epocs = []
  arr_num_val_loss = []
  arr_num_train_loss = []


  
  while(counter_of_good_tries != num_of_good_tries ):

    model = XOR_Net_Model(num_hidden,bypass)
    optimizer = torch.optim.SGD(model.parameters(), lr=l_rate)
    counter_good_epocs = 0
    prev_validation_loss = None
    y1 = None

    for i in range(num_epocs_to_run):

      if(i == 0):
        train_loss = train(model, x_train, t_train, optimizer)
        validation_loss = test(model, x_validation, t_validation)
        prev_validation_loss = validation_loss
      else:
        train_loss = train(model, x_train, t_train, optimizer)
        validation_loss = test(model, x_validation, t_validation)
        difference_losses = prev_validation_loss - validation_loss
        

        if(difference_losses <= max_validation_loss_improve and validation_loss < validation_boundary):
          counter_good_epocs += 1
        else:
          counter_good_epocs = 0
        
  
        if counter_good_epocs == max_num_good_epocs:
          
          counter_of_good_tries += 1
        
          arr_num_epocs.append(i+1)
          arr_num_val_loss.append(validation_loss)
          arr_num_train_loss.append(train_loss)
          
          break
        
        elif i == max_num_epocs:          
          counter_of_bad_tries += 1
          

        prev_validation_loss = validation_loss
        y1 = model.y1

    if(bypass == True and num_hidden == 1):
      output_hidden_neyron = y1
      print(f"Input:")
      print(f"{x_train}\n")
      print(f"Y hidden:")
      print(f"{y1}\n")
      print(f"------------------------------------")
        

  mean_epocs = torch.mean(torch.FloatTensor(arr_num_epocs))
  std_epocs = torch.std(torch.FloatTensor(arr_num_epocs)) 

  mean_validation_loss = torch.mean(torch.FloatTensor(arr_num_val_loss))
  std_validation_loss = torch.std(torch.FloatTensor(arr_num_val_loss))

  mean_train_loss = torch.mean(torch.FloatTensor(arr_num_train_loss))
  std_train_loss = torch.std(torch.FloatTensor(arr_num_train_loss)) 

  general_mean_epocs.append(mean_epocs)
  general_std_epocs.append(std_epocs)

  print(f"Mean of epocs for this experement = {mean_epocs} \nStandard deviation of epocs for this experement= {std_epocs}\n")
  print(f"Mean of validation loss for this experement= {mean_validation_loss} \nStandard deviation of validation loss for this experement= {std_validation_loss}\n")
  print(f"Mean of train  loss for this experement= {mean_train_loss} \nStandard deviation of train loss for this experement= {std_train_loss}\n")
  print(f"Bad training count for this experement= {counter_of_bad_tries}\n")


#Test 1 
#Learning rate = 0.01 , Num hidden = 2, Bypass = True

In [68]:

triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.01,num_hidden = 2, bypass=True)


Mean of epocs for this experement = 11851.7001953125 
Standard deviation of epocs for this experement= 2920.7119140625

Mean of validation loss for this experement= 0.19981828331947327 
Standard deviation of validation loss for this experement= 7.222982821986079e-05

Mean of train  loss for this experement= 0.04905339702963829 
Standard deviation of train loss for this experement= 0.005615175236016512

Bad training count for this experement= 0



#Test 2
#Learning rate = 0.01 , Num hidden = 2, Bypass = False

In [69]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.01,num_hidden = 2, bypass=False)


Mean of epocs for this experement = 7193.0 
Standard deviation of epocs for this experement= 1661.6097412109375

Mean of validation loss for this experement= 0.19948795437812805 
Standard deviation of validation loss for this experement= 1.919268470373936e-05

Mean of train  loss for this experement= 0.06919687241315842 
Standard deviation of train loss for this experement= 0.0012208158150315285

Bad training count for this experement= 13



#Test 3
#Learning rate = 0.01 , Num hidden = 4, Bypass = True

In [70]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.01,num_hidden = 4, bypass=True)


Mean of epocs for this experement = 7240.60009765625 
Standard deviation of epocs for this experement= 1157.052734375

Mean of validation loss for this experement= 0.1996794044971466 
Standard deviation of validation loss for this experement= 8.619247819297016e-05

Mean of train  loss for this experement= 0.05496741458773613 
Standard deviation of train loss for this experement= 0.005072572268545628

Bad training count for this experement= 0



#Test 4
#Learning rate = 0.01 , Num hidden = 4, Bypass = False

In [71]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.01,num_hidden = 4, bypass=False)


Mean of epocs for this experement = 6246.2001953125 
Standard deviation of epocs for this experement= 731.1707153320312

Mean of validation loss for this experement= 0.1995137631893158 
Standard deviation of validation loss for this experement= 7.916201138868928e-05

Mean of train  loss for this experement= 0.06841523945331573 
Standard deviation of train loss for this experement= 0.0025545929092913866

Bad training count for this experement= 1



#Test 5 
#Learning rate = 0.1 , Num hidden = 2, Bypass = True

In [72]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.1,num_hidden = 2, bypass=True)


Mean of epocs for this experement = 1547.5999755859375 
Standard deviation of epocs for this experement= 148.451416015625

Mean of validation loss for this experement= 0.15996132791042328 
Standard deviation of validation loss for this experement= 0.023604683578014374

Mean of train  loss for this experement= 0.035571951419115067 
Standard deviation of train loss for this experement= 0.0041445898823440075

Bad training count for this experement= 0



#Test 6
#Learning rate = 0.1 , Num hidden = 2, Bypass = False

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.1,num_hidden = 2, bypass=False)


#Test 7
#Learning rate = 0.1 , Num hidden = 4, Bypass = True

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.1,num_hidden = 4, bypass=True)


#Test 8
#Learning rate = 0.1 , Num hidden = 4, Bypass = False

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.1,num_hidden = 4, bypass=False)


#Test 9
#Learning rate = 0.1 , Num hidden = 1, Bypass = True
#If we look on y1 (value of activation function in hidden layer we can see that with our input it behaves liker logical OR function)

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.1,num_hidden = 1, bypass=True)


#Test 10 - Rshut #1
#Learning rate = 0.1 , Num hidden = 3, Bypass = True

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.1,num_hidden = 3, bypass=True)


#Test 11 - Rshut #2
#Learning rate = 0.1 , Num hidden = 3, Bypass = False

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.1,num_hidden = 3, bypass=False)


#Test 12 - Rshut #3
#Learning rate = 0.01 , Num hidden = 3, Bypass = True

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.01,num_hidden = 3, bypass=True)


#Test 13 - Rshut #4
#Learning rate = 0.01 , Num hidden = 3, Bypass = False

In [None]:
triesFunc(general_mean_epocs, general_std_epocs,l_rate=0.01,num_hidden = 3, bypass=False)


#Graphs

In [None]:
num_hidden = [2, 2, 4, 4, 2, 2, 4, 4, 1, 3, 3, 3, 3]
num_hidden
#


In [None]:
general_mean_epocs

#Graphs

#First graph

In [None]:

mean_epocs =[]

for data in general_mean_epocs:
  mean_epocs.append(data.item())

data_plot = pd.DataFrame({"Num hidden neyrons": num_hidden, "Mean epocs": mean_epocs})
sns.scatterplot(data=data_plot, x="Num hidden neyrons", y="Mean epocs")
plt.show()
data_plot

#Second graph

In [None]:
bridge = [1,0,1,0,1,0,1,0,1,1,0,1,0]


data_plot = pd.DataFrame({"Bridge": bridge, "Mean epocs": mean_epocs})
sns.scatterplot(data=data_plot, x="Bridge", y="Mean epocs")
plt.show()
data_plot

#Third graph

In [None]:
std_epocs =[]

for data in general_std_epocs:
  std_epocs.append(data.item())

l_rate = [0.01,0.01,0.01,0.01,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.01,0.01]

data_plot = pd.DataFrame({"Learning rate": l_rate, "STD epocs": std_epocs})
sns.scatterplot(data=data_plot, x="Learning rate", y="STD epocs")
plt.show()
data_plot

#Fourth graph