# Common Tools

In [None]:
import sys, os
import torch
import torchvision.models as models
import numpy as np

# Define the path to the directory
directory = os.path.abspath('../../Prod/Python/')

# Add the directory to sys.path if not already there
if directory not in sys.path:
    sys.path.append(directory)

from pytorch_utils import pytorch_utils
from pytorch_to_sharpnet import pytorch_to_sharpnet
Train = pytorch_utils.Train
numpy_array_for_tests = pytorch_utils.numpy_array_for_tests
y_numpy_array_for_tests = pytorch_utils.y_numpy_array_for_tests
save_sharpnet = pytorch_to_sharpnet.save_sharpnet

torch.manual_seed(0)
np.random.seed(0)

print(f'log_file_name: {pytorch_utils.log_file_name}')
print(f'PyTorch version: {torch.__version__}')


#to save a model in sharpnet format:
#save_sharpnet(model, 'model_pytorch', os.getcwd(), optimizer, torch.nn.HuberLoss(delta=0.5), input_shape=[rows,3,2,2], verbose=True)


# Compute Gradient in PyTorch

In [None]:
loss = torch.nn.MSELoss()
input = torch.tensor( [[0.01995766907930374, 0.05039015784859657, 0.04157894104719162]], requires_grad=True)
target = torch.tensor([[1.0, 0, 0]])
output = loss(input, target)
output.backward()
input.grad

# Compute Gradient in TensorFlow

In [None]:
'''
import tensorflow as tf
# Define your predictions and ground truth
predictions = tf.constant([[0.01995766907930374, 0.05039015784859657, 0.04157894104719162]])
ground_truth = tf.constant([[1.0, 0, 0]])

# Compute the Mean Squared Error (MSE) loss
mse_loss = tf.reduce_mean(tf.square(predictions - ground_truth))

# Use GradientTape to compute the gradient
with tf.GradientTape() as tape:
    # Watch the variables we want to compute gradients with respect to
    tape.watch(predictions)
    tape.watch(ground_truth)
    
    # Compute the loss
    loss = tf.reduce_mean(tf.square(predictions - ground_truth))

# Compute the gradient of the loss with respect to the predictions
gradients = tape.gradient(loss, predictions)
gradients
'''

# TestParallelRunWithPyTorch_Mse

In [None]:
import torch
import numpy as np

class TestParallelRunWithPyTorch_Mse(torch.nn.Module):

    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.dense = torch.nn.Linear(5, 3)
        torch.nn.init.zeros_(self.dense.bias)
        self.relu = torch.nn.ReLU()
        self.flatten_dense_1 = torch.nn.Flatten(1)
        self.dense_1 = torch.nn.Linear(3*4*3, 3)
        torch.nn.init.zeros_(self.dense_1.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_dense = self.dense(x)
        y_relu = self.relu(y_dense)
        y_flatten_dense_1 = self.flatten_dense_1(y_relu)
        y_dense_1 = self.dense_1(y_flatten_dense_1)
        return y_dense_1

device = 'cuda'
model = TestParallelRunWithPyTorch_Mse().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum = 0.9)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([2, 3, 4, 5]),
      y_numpy_array_for_tests(2, 3),
      device = device,
      loss_criterion = torch.nn.MSELoss(), 
      optimizer = optimizer, 
      num_epochs = 10
      )

assert abs(loss_before-0.297953724861145) < 1e-6
assert abs(loss_after-0.09050967544317245) < 1e-6


# TestParallelRunWithPyTorch_Mse_AdamW

In [None]:
import torch
import numpy as np
class TestParallelRunWithPyTorch_Mse_AdamW(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.flatten_dense = torch.nn.Flatten(1)
        self.dense = torch.nn.Linear(in_features=1*1*1, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_flatten_dense = self.flatten_dense(x)
        y_dense = self.dense(y_flatten_dense)
        return y_dense

model = TestParallelRunWithPyTorch_Mse_AdamW().to(device)


device = 'cuda'
model = TestParallelRunWithPyTorch_Mse_AdamW().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr = 1.0, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.1)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([1, 1, 1, 1]),
      y_numpy_array_for_tests(1, 2),
      device = device,
      loss_criterion = torch.nn.MSELoss(), 
      optimizer = optimizer, 
      num_epochs = 1
      )

assert abs(loss_before-0.6364270448684692) < 1e-6
assert abs(loss_after-1.6577095985412598) < 1e-6


# TestParallelRunWithPyTorch_Convolution

In [None]:
import torch
import numpy as np

    
import torch
import numpy as np

class TestParallelRunWithPyTorch_Convolution(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=(1,1), stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv2d.bias)
        self.conv2d_1 = torch.nn.Conv2d(in_channels=2, out_channels=2, kernel_size=(1,1), stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv2d_1.bias)
        self.average_pooling2d_ = torch.nn.AdaptiveAvgPool2d(output_size=1)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=40, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_conv2d_1 = self.conv2d_1(y_conv2d)
        y_average_pooling2d_ = self.average_pooling2d_(y_conv2d_1)
        y_multiply = y_conv2d * y_average_pooling2d_
        y_flatten = self.flatten(y_multiply)
        y_dense = self.dense(y_flatten)
        return y_dense



device = 'cuda'
model = TestParallelRunWithPyTorch_Convolution().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([2, 3, 4, 5]),
      y_numpy_array_for_tests(2, 2),
      device = device,
      loss_criterion = torch.nn.CrossEntropyLoss(), 
      optimizer = optimizer, 
      num_epochs = 10, 
      )    

assert abs(loss_before-0.6931346654891968) < 1e-6
assert abs(loss_after-0.6924928426742554) < 1e-6

# TestParallelRunWithPyTorch_Convolution_AdamW

In [None]:
import torch
import numpy as np

    
import torch
import numpy as np

class TestParallelRunWithPyTorch_Convolution_AdamW(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=(1,1), stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv2d.bias)
        self.conv2d_1 = torch.nn.Conv2d(in_channels=2, out_channels=2, kernel_size=(1,1), stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv2d_1.bias)
        self.average_pooling2d_ = torch.nn.AdaptiveAvgPool2d(output_size=1)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=40, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_conv2d_1 = self.conv2d_1(y_conv2d)
        y_average_pooling2d_ = self.average_pooling2d_(y_conv2d_1)
        y_multiply = y_conv2d * y_average_pooling2d_
        y_flatten = self.flatten(y_multiply)
        y_dense = self.dense(y_flatten)
        return y_dense



device = 'cuda'
model = TestParallelRunWithPyTorch_Convolution_AdamW().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr = 0.01, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.005)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([2, 3, 4, 5]),
      y_numpy_array_for_tests(2, 2),
      device = device,
      loss_criterion = torch.nn.CrossEntropyLoss(), 
      optimizer = optimizer, 
      num_epochs = 10, 
      )    


assert abs(loss_before-0.6931346654891968) < 1e-6
assert abs(loss_after-0.6222155690193176) < 1e-6

# Test_Convolution_With_Asymmetric_Padding

In [None]:
import torch
import numpy as np

class Test_Convolution_With_Asymmetric_Padding(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=3, out_channels=1, kernel_size=(3,3), stride=2, padding=(1,1), bias=False)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=6, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_flatten = self.flatten(y_conv2d)
        y_dense = self.dense(y_flatten)
        return y_dense

device = 'cuda'
model = Test_Convolution_With_Asymmetric_Padding().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.9, weight_decay=0.15)
rows = 2 # fix to correct number of rows
loss_before, loss_after = Train(model,
    numpy_array_for_tests([rows, 3, 4, 5]),
    y_numpy_array_for_tests(rows, 2),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10
    )

assert abs(loss_before-0.6809013485908508) < 1e-6
#??D assert abs(loss_after-0.456775605678558354) < 1e-6
assert abs(loss_after-0.456775605678558354) < 1e-4

# TestParallelRunWithPyTorch_Conv1D

In [None]:
import torch
import numpy as np

class TestParallelRunWithPyTorch_Conv1D(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv1d = torch.nn.Conv1d(in_channels=4, out_channels=2, kernel_size=3, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1d.bias)
        self.conv1d_1 = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size=3, stride=2, padding='valid', bias=True)
        torch.nn.init.zeros_(self.conv1d_1.bias)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=4, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv1d = self.conv1d(x)
        y_conv1d_1 = self.conv1d_1(y_conv1d)
        y_flatten = self.flatten(y_conv1d_1)
        y_dense = self.dense(y_flatten)
        return y_dense



device = 'cuda'
model = TestParallelRunWithPyTorch_Conv1D().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum = 0.9)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([3, 4, 5]),
      y_numpy_array_for_tests(3, 3),
      device = device,
      loss_criterion = torch.nn.CrossEntropyLoss(), 
      optimizer = optimizer, 
      num_epochs = 10, 
      )    

assert abs(loss_before-1.0818352699279785) < 1e-6
assert abs(loss_after-0.7389676570892334) < 1e-6

# TestParallelRunWithPyTorch_LayerNormalizationNchw

In [None]:
import torch
import numpy as np


class TestParallelRunWithPyTorch_LayerNormalizationNchw(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=(5,5), stride=2, padding=(2,2), bias=False)
        self.layer_normalization = torch.nn.LayerNorm(normalized_shape=(3), eps=0.001)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=12, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_layer_normalization = self.layer_normalization(y_conv2d)
        y_flatten = self.flatten(y_layer_normalization)
        y_dense = self.dense(y_flatten)
        return y_dense


device = 'cuda'
model = TestParallelRunWithPyTorch_LayerNormalizationNchw().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([2, 3, 4, 5]),
      y_numpy_array_for_tests(2, 2),
      device = device,
      loss_criterion = torch.nn.CrossEntropyLoss(), 
      optimizer = optimizer, 
      num_epochs = 10, 
      )    

assert abs(loss_before-0.8300741910934448) < 1e-6
assert abs(loss_after-0.5169422626495361) < 1e-6

# TestParallelRunWithPyTorch_RMSNormNchw

In [None]:
import torch
import numpy as np


class TestParallelRunWithPyTorch_RMSNormNchw(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=(5,5), stride=2, padding=(2,2), bias=False)
        self.rms_normalization = torch.nn.RMSNorm(normalized_shape=(3), eps = 0.001)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=12, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_rms_normalization = self.rms_normalization(y_conv2d)
        y_flatten = self.flatten(y_rms_normalization)
        y_dense = self.dense(y_flatten)
        return y_dense


device = 'cuda'
model = TestParallelRunWithPyTorch_RMSNormNchw().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([2, 3, 4, 5]),
      y_numpy_array_for_tests(2, 2),
      device = device,
      loss_criterion = torch.nn.CrossEntropyLoss(), 
      optimizer = optimizer, 
      num_epochs = 10, 
      )    

assert abs(loss_before-0.8518396019935608) < 1e-6
assert abs(loss_after-0.6556870937347412) < 1e-6

# TestParallelRunWithPyTorch_BatchNormalizationNchw2345

In [None]:
import torch
import numpy as np
import torch.nn.functional as F

class TestParallelRunWithPyTorch_BatchNormalizationNchw2345(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.relu = torch.nn.ReLU()
        self.batch_normalization = torch.nn.BatchNorm2d(num_features=3, eps=0.001, momentum=0.010000000000000009)
        self.flatten = torch.nn.Flatten()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_relu = self.relu(x)
        y_batch_normalization = self.batch_normalization(y_relu)
        y_flatten = self.flatten(y_batch_normalization)
        return y_flatten

device = 'cuda'
model = TestParallelRunWithPyTorch_BatchNormalizationNchw2345().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.9)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([10, 3, 4, 5]),
    y_numpy_array_for_tests(10, 60),
    device = device,
    loss_criterion = torch.nn.L1Loss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 2
    )

assert abs(loss_before-0.2588789165019989) < 1e-6
assert abs(loss_after-0.035503968596458435) < 1e-6

# TestResNet_Shortcut_Same_Dimension_NCHW_2_1_4_4

In [None]:
import torch
import numpy as np

class TestResNet_Shortcut_Same_Dimension_NCHW_2_1_4_4(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', bias=True)
        torch.nn.init.zeros_(self.conv2d.bias)
        self.conv2d_1 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', bias=True)
        torch.nn.init.zeros_(self.conv2d_1.bias)
        self.flatten_dense = torch.nn.Flatten(1)
        self.dense = torch.nn.Linear(in_features=1*4*4, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_conv2d_1 = self.conv2d_1(y_conv2d)
        y_add = y_conv2d_1 + y_conv2d
        y_flatten_dense = self.flatten_dense(y_add)
        y_dense = self.dense(y_flatten_dense)
        return y_dense
    

device = 'cuda'
model = TestResNet_Shortcut_Same_Dimension_NCHW_2_1_4_4().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum = 0.9)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([2, 1, 4, 4]),
      y_numpy_array_for_tests(2, 3),
      device = device,
      loss_criterion = torch.nn.CrossEntropyLoss(), 
      optimizer = optimizer, 
      num_epochs = 10, 
      )    

assert abs(loss_before-1.0986199378967285) < 1e-6
assert abs(loss_after-0.7018476724624634) < 1e-6

# TestResNet_Shortcut_Different_Dimension_With_Conv_1x1_to_change_Dimension_NCHW_2_1_4_4()


In [None]:
import torch
import numpy as np

class TestResNet_Shortcut_Different_Dimension_With_Conv_1x1_to_change_Dimension_NCHW_2_1_4_4(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', bias=True)
        torch.nn.init.zeros_(self.conv2d.bias)
        self.conv2d_1 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', bias=True)
        torch.nn.init.zeros_(self.conv2d_1.bias)
        self.conv2d_2 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', bias=True)
        torch.nn.init.zeros_(self.conv2d_2.bias)
        self.flatten_dense = torch.nn.Flatten(1)
        self.dense = torch.nn.Linear(in_features=1*4*4, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_conv2d_1 = self.conv2d_1(y_conv2d)
        y_conv2d_2 = self.conv2d_2(y_conv2d)
        y_add = y_conv2d_2 + y_conv2d_1
        y_flatten_dense = self.flatten_dense(y_add)
        y_dense = self.dense(y_flatten_dense)
        return y_dense
    
    
device = 'cuda'
# lambdaL2Regularization = 0.00;
model = TestResNet_Shortcut_Different_Dimension_With_Conv_1x1_to_change_Dimension_NCHW_2_1_4_4().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum = 0.9)

loss_before, loss_after = Train(model, 
      numpy_array_for_tests([2, 1, 4, 4]),
      y_numpy_array_for_tests(2, 3),
      device = device,
      loss_criterion = torch.nn.CrossEntropyLoss(), 
      optimizer = optimizer, 
      num_epochs = 10
      )    
    
    
assert abs(loss_before-1.0974769592285156) < 1e-6
assert abs(loss_after-0.5784467458724976) < 1e-6

# TestL2Regularization_ConvolutionLayer_SGDVanilla_NCHW_2_1_4_4

In [None]:
import torch
import numpy as np

class TestL2Regularization_ConvolutionLayer_SGDVanilla_NCHW_2_1_4_4(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', bias=True)
        torch.nn.init.zeros_(self.conv2d.bias)
        self.flatten_dense = torch.nn.Flatten(1)
        self.dense = torch.nn.Linear(in_features=1*4*4, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_flatten_dense = self.flatten_dense(y_conv2d)
        y_dense = self.dense(y_flatten_dense)
        return y_dense

device = 'cuda'
model = TestL2Regularization_ConvolutionLayer_SGDVanilla_NCHW_2_1_4_4().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.6, weight_decay=0.9)
rows = 2 # fix to correct number of rows
loss_before, loss_after = Train(model,
    numpy_array_for_tests([rows, 1, 4, 4]),
    y_numpy_array_for_tests(rows, 3),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10
    )
    
assert abs(loss_before-1.099147081375122) < 1e-6
assert abs(loss_after-0.9695441722869873) < 1e-6    

# TestReluActivation_NCHW_2_1_4_4

In [None]:
import torch
import numpy as np

class TestReluActivation_NCHW_2_1_4_4(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.flatten_dense = torch.nn.Flatten(1)
        self.dense = torch.nn.Linear(in_features=1*4*4, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)
        self.relu = torch.nn.ReLU()
        self.dense_1 = torch.nn.Linear(in_features=3, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense_1.bias)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_flatten_dense = self.flatten_dense(x)
        y_dense = self.dense(y_flatten_dense)
        y_relu = self.relu(y_dense)
        y_dense_1 = self.dense_1(y_relu)
        y_sigmoid = self.sigmoid(y_dense_1)
        return y_sigmoid

device = 'cuda'
model = TestReluActivation_NCHW_2_1_4_4().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.9, weight_decay=0.03)
rows = 2 # fix to correct number of rows
loss_before, loss_after = Train(model,
    numpy_array_for_tests([rows, 1, 4, 4]),
    y_numpy_array_for_tests(rows, 3),
    device = device,
    loss_criterion = torch.nn.BCELoss(),
    optimizer = optimizer,
    num_epochs = 10
    )

    
assert abs(loss_before-0.6726791262626648) < 1e-6
assert abs(loss_after-0.5357139110565186) < 1e-6

# Test_Huber

In [None]:
import torch
import numpy as np

class Test_Huber(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.flatten_dense = torch.nn.Flatten(1)
        self.dense = torch.nn.Linear(in_features=3*2*2, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)
        self.relu = torch.nn.ReLU()
        self.dense_1 = torch.nn.Linear(in_features=3, out_features=1, bias=True)
        torch.nn.init.zeros_(self.dense_1.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_flatten_dense = self.flatten_dense(x)
        y_dense = self.dense(y_flatten_dense)
        y_relu = self.relu(y_dense)
        y_dense_1 = self.dense_1(y_relu)
        return y_dense_1

device = 'cuda'
model = Test_Huber().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.9, weight_decay=0.05)
rows = 4 # fix to correct number of rows
loss_before, loss_after = Train(model,
    numpy_array_for_tests([rows, 3, 2, 2]),
    y_numpy_array_for_tests(rows, 1),
    device = device,
    loss_criterion = torch.nn.HuberLoss(delta=0.5),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size=2
    )

assert abs(loss_before-0.32030075788497925) < 1e-6
assert abs(loss_after-0.0027381146792322397) < 1e-6

# Test_DepthwiseConvolution

In [None]:
import torch
import numpy as np

class Test_DepthwiseConvolution(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.depthwiseconv2d = torch.nn.Conv2d(in_channels=2, out_channels=2, kernel_size=(3,3), stride=1, padding='same', groups=2, bias=True)
        torch.nn.init.zeros_(self.depthwiseconv2d.bias)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=8, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_depthwiseconv2d = self.depthwiseconv2d(x)
        y_flatten = self.flatten(y_depthwiseconv2d)
        y_dense = self.dense(y_flatten)
        return y_dense

device = 'cuda'
model = Test_DepthwiseConvolution().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.9, weight_decay=0.01)
rows = 12 # fix to correct number of rows
loss_before, loss_after = Train(model,
    numpy_array_for_tests([rows, 2, 2, 2]),
    y_numpy_array_for_tests(rows, 2),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size=4
    )

assert abs(loss_before-0.700163722038269) < 1e-6
assert abs(loss_after-0.6944987177848816) < 1e-6

# TestConcatenate_NCHW_9_1_1_1

In [None]:
import torch
import numpy as np

class TestConcatenate_NCHW_9_1_1_1(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', groups=1, bias=True)
        torch.nn.init.zeros_(self.conv2d.bias)
        self.conv2d_1 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1,1), stride=1, padding='valid', groups=1, bias=True)
        torch.nn.init.zeros_(self.conv2d_1.bias)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=2, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_conv2d_1 = self.conv2d_1(y_conv2d)
        y_concatenate = torch.cat((y_conv2d, y_conv2d_1), dim = 1)
        y_flatten = self.flatten(y_concatenate)
        y_dense = self.dense(y_flatten)
        return y_dense

device = 'cuda'
model = TestConcatenate_NCHW_9_1_1_1().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.75, weight_decay=0.07, nesterov=True)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([9, 1, 1, 1]),
    y_numpy_array_for_tests(9, 2),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 3
    )

assert abs(loss_before-0.6930493116378784) < 1e-6
assert abs(loss_after-0.6860392093658447) < 1e-6

# TestLeakyReluActivation_NCHW_2_1_4_4

In [None]:
import torch
import numpy as np

class TestLeakyReluActivation_NCHW_10_1_4_4(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.flatten_dense = torch.nn.Flatten(1)
        self.dense = torch.nn.Linear(in_features=1*4*4, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)
        self.leakyrelu = torch.nn.LeakyReLU(negative_slope=0.1)
        self.dense_1 = torch.nn.Linear(in_features=3, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense_1.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_flatten_dense = self.flatten_dense(x)
        y_dense = self.dense(y_flatten_dense)
        y_leakyrelu = self.leakyrelu(y_dense)
        y_dense_1 = self.dense_1(y_leakyrelu)
        return y_dense_1

device = 'cuda'
model = TestLeakyReluActivation_NCHW_10_1_4_4().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.66, weight_decay=0.17, nesterov=True)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([10, 1, 4, 4]),
    y_numpy_array_for_tests(10, 3),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 2
    )


assert abs(loss_before-1.1183210611343384) < 1e-6
assert abs(loss_after-1.0901305675506592) < 1e-6

# TestMultiply_NCHW_2_3_4_5_different_dimension

In [None]:
import torch
import numpy as np

class TestMultiply_NCHW_2_3_4_5_different_dimension(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv2d = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=(1,1), stride=1, padding='same', groups=1, bias=True)
        torch.nn.init.zeros_(self.conv2d.bias)
        self.conv2d_1 = torch.nn.Conv2d(in_channels=2, out_channels=2, kernel_size=(1,1), stride=1, padding='same', groups=1, bias=True)
        torch.nn.init.zeros_(self.conv2d_1.bias)
        self.max_pooling2d_ = torch.nn.MaxPool2d(kernel_size=(4,5) )
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=40, out_features=2, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv2d = self.conv2d(x)
        y_conv2d_1 = self.conv2d_1(y_conv2d)
        y_max_pooling2d_ = self.max_pooling2d_(y_conv2d_1)
        y_multiply = y_conv2d * y_max_pooling2d_
        y_flatten = self.flatten(y_multiply)
        y_dense = self.dense(y_flatten)
        return y_dense

device = 'cuda'
model = TestMultiply_NCHW_2_3_4_5_different_dimension().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.9, weight_decay=0.07)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([2, 3, 4, 5]),
    y_numpy_array_for_tests(2, 2),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 2
    )




assert abs(loss_before-0.6887038946151733) < 1e-6
assert abs(loss_after-0.6871695518493652) < 1e-6

# Test_SimpleRNN

In [None]:
import torch
import numpy as np

class Test_SimpleRNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.bidirectional = torch.nn.RNN(input_size=1, hidden_size=2, num_layers=2, nonlinearity='tanh', bias=True, batch_first=True, dropout=0, bidirectional=True)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l0)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l0)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l1)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l1)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l1_reverse)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l1_reverse)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l0_reverse)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l0_reverse)
        self.simple_rnn_1 = torch.nn.RNN(input_size=4, hidden_size=2, num_layers=1, nonlinearity='tanh', bias=True, batch_first=True, dropout=0, bidirectional=False)
        torch.nn.init.zeros_(self.simple_rnn_1.bias_ih_l0)
        torch.nn.init.zeros_(self.simple_rnn_1.bias_hh_l0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_bidirectional, y_bidirectional_hidden = self.bidirectional(x)
        y_simple_rnn_1_return_sequences, y_simple_rnn_1_hidden_return_sequences = self.simple_rnn_1(y_bidirectional)
        y_simple_rnn_1 = y_simple_rnn_1_return_sequences[:,-1,:]
        return y_simple_rnn_1

device = 'cuda'
model = Test_SimpleRNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([4, 2, 1]),
    y_numpy_array_for_tests(4, 2),
    device = device,
    loss_criterion = torch.nn.MSELoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 2
    )


assert abs(loss_before-0.48370790481567383) < 1e-6
assert abs(loss_after-0.2722862660884857) < 1e-6

# Test_LSTM

In [None]:
import torch
import numpy as np

class Test_LSTM(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.bidirectional = torch.nn.LSTM(input_size=1, hidden_size=1, num_layers=2, bias=True, batch_first=True, dropout=0.25, bidirectional=True)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l0)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l0)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l1)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l1)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l1_reverse)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l1_reverse)
        torch.nn.init.zeros_(self.bidirectional.bias_hh_l0_reverse)
        torch.nn.init.zeros_(self.bidirectional.bias_ih_l0_reverse)
        self.lstm_1 = torch.nn.LSTM(input_size=2, hidden_size=1, num_layers=1, bias=True, batch_first=True, dropout=0, bidirectional=False)
        torch.nn.init.zeros_(self.lstm_1.bias_ih_l0)
        torch.nn.init.zeros_(self.lstm_1.bias_hh_l0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_bidirectional, y_bidirectional_hidden = self.bidirectional(x)
        y_lstm_1_return_sequences, y_lstm_1_hidden_return_sequences = self.lstm_1(y_bidirectional)
        y_lstm_1 = y_lstm_1_return_sequences[:,-1,:]
        return y_lstm_1

device = 'cuda'
model = Test_LSTM().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([4, 2, 1]),
    y_numpy_array_for_tests(4, 1),
    device = device,
    loss_criterion = torch.nn.MSELoss(),
    optimizer = optimizer,
    num_epochs = 1,
    batch_size = 2
    )


# Test_DotProductAttention

In [None]:
import torch
import numpy as np
import torch.nn.functional as F

class Test_DotProductAttention(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv1D_Q = torch.nn.Conv1d(in_channels=4, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_Q.bias)
        self.conv1D_K = torch.nn.Conv1d(in_channels=4, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_K.bias)
        self.conv1D_V = torch.nn.Conv1d(in_channels=4, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_V.bias)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=10, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv1D_Q = self.conv1D_Q(x)
        y_conv1D_K = self.conv1D_K(x)
        y_conv1D_V = self.conv1D_V(x)
        y_scaleddotproductattention = F.scaled_dot_product_attention(y_conv1D_Q, y_conv1D_K, y_conv1D_V, attn_mask=None, dropout_p=0.0, is_causal=False, scale=1)
        y_flatten = self.flatten(y_scaleddotproductattention)
        y_dense = self.dense(y_flatten)
        return y_dense

device = 'cuda'
model = Test_DotProductAttention().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.9, weight_decay=0.07)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([6, 4, 5]),
    y_numpy_array_for_tests(6, 3),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 2
    )

assert abs(loss_before-1.0957681735356648) < 1e-6
assert abs(loss_after-1.0050253868103027) < 1e-6

# Test_MultiHeadAttention

In [None]:
import torch
import numpy as np
import torch.nn.functional as F

class Test_MultiHeadAttention(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv1D_Q = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_Q.bias)
        self.conv1D_K = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_K.bias)
        self.conv1D_V = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_V.bias)
        self.attn_mask_dict = dict()
        self.multi_head_attention = torch.nn.MultiheadAttention(embed_dim=6, num_heads=3, dropout=0, bias=False, add_bias_kv=False, add_zero_attn=False, batch_first=True)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=12, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv1D_Q = self.conv1D_Q(x)
        y_conv1D_K = self.conv1D_K(x)
        y_conv1D_V = self.conv1D_V(x)
        attn_mask = None
        
        # We build the attention mask 'attn_mask'
        if not isinstance(y_conv1D_Q, torch.fx.proxy.Proxy):
            sz = y_conv1D_Q.size(1)  # L: Target sequence Length
            if sz not in self.attn_mask_dict: self.attn_mask_dict[sz] = torch.nn.Transformer.generate_square_subsequent_mask(sz, device = x.device, dtype = x.dtype)
            attn_mask = self.attn_mask_dict[sz]
        
        y_multi_head_attention, _ = self.multi_head_attention(y_conv1D_Q, y_conv1D_K, y_conv1D_V, key_padding_mask=None, need_weights=False, attn_mask=attn_mask, average_attn_weights=False, is_causal=True)
        y_flatten = self.flatten(y_multi_head_attention)
        y_dense = self.dense(y_flatten)
        return y_dense

device = 'cuda'
model = Test_MultiHeadAttention().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.9)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([10, 2, 6]),
    y_numpy_array_for_tests(10, 3),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 2
    )

assert abs(loss_before-1.103563904762268) < 1e-6
assert abs(loss_after-1.0815070867538452) < 1e-6

# Test_MultiHeadAttention_with_bias

In [None]:
import torch
import numpy as np
import torch.nn.functional as F

class Test_MultiHeadAttention_with_bias(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        np.random.seed(0)
        self.conv1D_Q = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_Q.bias)
        self.conv1D_K = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_K.bias)
        self.conv1D_V = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1, stride=1, padding='same', bias=True)
        torch.nn.init.zeros_(self.conv1D_V.bias)
        self.attn_mask_dict = dict()
        self.multi_head_attention = torch.nn.MultiheadAttention(embed_dim=6, num_heads=3, dropout=0, bias=True, add_bias_kv=False, add_zero_attn=False, batch_first=True)
        self.flatten = torch.nn.Flatten()
        self.dense = torch.nn.Linear(in_features=12, out_features=3, bias=True)
        torch.nn.init.zeros_(self.dense.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_conv1D_Q = self.conv1D_Q(x)
        y_conv1D_K = self.conv1D_K(x)
        y_conv1D_V = self.conv1D_V(x)
        attn_mask = None
        
        # We build the attention mask 'attn_mask'
        if not isinstance(y_conv1D_Q, torch.fx.proxy.Proxy):
            sz = y_conv1D_Q.size(1)  # L: Target sequence Length
            if sz not in self.attn_mask_dict: self.attn_mask_dict[sz] = torch.nn.Transformer.generate_square_subsequent_mask(sz, device = x.device, dtype = x.dtype)
            attn_mask = self.attn_mask_dict[sz]
        
        y_multi_head_attention, _ = self.multi_head_attention(y_conv1D_Q, y_conv1D_K, y_conv1D_V, key_padding_mask=None, need_weights=False, attn_mask=attn_mask, average_attn_weights=False, is_causal=True)
        y_flatten = self.flatten(y_multi_head_attention)
        y_dense = self.dense(y_flatten)
        return y_dense

device = 'cuda'
model = Test_MultiHeadAttention_with_bias().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1, momentum=0.9)
loss_before, loss_after = Train(model,
    numpy_array_for_tests([10, 2, 6]),
    y_numpy_array_for_tests(10, 3),
    device = device,
    loss_criterion = torch.nn.CrossEntropyLoss(),
    optimizer = optimizer,
    num_epochs = 10,
    batch_size = 2
    )


assert abs(loss_before-1.1090373992919922) < 1e-6
assert abs(loss_after-1.0816659927368164) < 1e-6