In [28]:
!git clone https://github.com/SluKate/GVS-3.git


Cloning into 'GVS-3'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 6 (delta 0), reused 3 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (6/6), 11.46 KiB | 2.29 MiB/s, done.


In [29]:
!apt-get install -y nvidia-cuda-toolkit

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
nvidia-cuda-toolkit is already the newest version (11.5.1-1ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 50 not upgraded.


In [30]:
%ls

[0m[01;34mGVS-3[0m/  GVS-3.2.ipynb  linearlayer.cu


In [31]:
%cd GVS-3/

/content/GVS-3/GVS-3


In [32]:
%ls

GVS-3.2.ipynb  linearlayer.cu


In [33]:
!apt-get install ninja-build
!apt-get install build-essential

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ninja-build is already the newest version (1.10.1-1).
0 upgraded, 0 newly installed, 0 to remove and 50 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
build-essential is already the newest version (12.9ubuntu3).
0 upgraded, 0 newly installed, 0 to remove and 50 not upgraded.


In [34]:
!nvcc --version
!gcc --version


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [35]:
!sudo apt update
!sudo apt install gcc-9 g++-9


[33m0% [Working][0m            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:6 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:7 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Ign:9 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:10 https://r2u.stat.illinois.edu/ubuntu jammy Release
Hit:12 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Fetched 257 kB in 2s (148 kB/s)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
50 packages can be upgraded. Run 'apt list --upg

In [36]:
!sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100
!sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 100

!sudo update-alternatives --config gcc
!sudo update-alternatives --config g++

!gcc --version
!g++ --version


There is only one alternative in link group gcc (providing /usr/bin/gcc): /usr/bin/gcc-9
Nothing to configure.
There is only one alternative in link group g++ (providing /usr/bin/g++): /usr/bin/g++-9
Nothing to configure.
gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

g++ (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [37]:
import linearlayer

In [38]:
import os
import torch
import torch.utils.cpp_extension as cpp_extension
import unittest

# Архитектура CUDA
os.environ['TORCH_CUDA_ARCH_LIST'] = '7.5;8.0'

# Компиляция CUDA-расширения
linearlayer = cpp_extension.load(
    name='linearlayer',
    sources=['linearlayer.cu'],
    extra_cuda_cflags=['-gencode', 'arch=compute_75,code=sm_75']
)

class TestDotProductCuda(unittest.TestCase):
    def __init__(self, num_tests=1, *args, **kwargs):
        super(TestDotProductCuda, self).__init__(*args, **kwargs)
        self.num_tests = num_tests

    def runTest(self):
        results = []
        for _ in range(self.num_tests):
            m = 5  # количество примеров
            k = 3  # количество входных признаков
            n = 4  # количество выходных признаков

            # Создайте тензоры
            x = torch.randn(m, k).cuda()  # Входной тензор
            w = torch.randn(n, k).cuda()  # Тензор весов
            b = torch.randn(n).cuda()      # Тензор смещений

            # Вычисление с помощью расширения
            cuda_result = linearlayer.my_forward_linear(x, w, b)

            linear_layer = torch.nn.Linear(k, n).cuda()
            with torch.no_grad():
                linear_layer.weight.copy_(w)  # Устанавливаем веса
                linear_layer.bias.copy_(b)     # Устанавливаем смещения
                y_builtin = linear_layer(x)     # Вызов библиотечной функции

            # Проверка совпадения результатов с приемлемой точностью
            self.assertTrue(torch.allclose(cuda_result, y_builtin, atol=1e-6),
                            f"Results do not match: CUDA result = {cuda_result}, PyTorch result = {y_builtin}")

            # Преобразование тензоров в NumPy
            results.append((cuda_result.detach().cpu().numpy(), y_builtin.detach().cpu().numpy()))

        for i, (cuda_result, torch_result) in enumerate(results):
            print(f"Test {i + 1}: CUDA result = \n{cuda_result},\n PyTorch result = \n{torch_result}\n")

if __name__ == '__main__':
    num_tests = int(input("Введите количество тестов: "))  # Ввод количества тестов
    # Создаем тестовый набор
    suite = unittest.TestSuite()
    suite.addTest(TestDotProductCuda(num_tests=num_tests))
    unittest.TextTestRunner().run(suite)


Введите количество тестов: 100


.
----------------------------------------------------------------------
Ran 1 test in 0.362s

OK


Test 1: CUDA result = 
[[ 1.9112868   1.1171244  -1.3567338  -1.3373904 ]
 [ 1.4314034   1.1907101  -0.8126314  -1.2536358 ]
 [ 1.1036934   1.0439898  -0.48358998 -1.0921097 ]
 [-1.8847437   1.3292809   1.5620627   4.116872  ]
 [ 0.9215004   0.06917584 -0.33757943 -1.0781134 ]],
 PyTorch result = 
[[ 1.911287    1.1171244  -1.3567338  -1.3373905 ]
 [ 1.4314034   1.1907101  -0.8126314  -1.2536358 ]
 [ 1.1036934   1.0439897  -0.48358998 -1.0921096 ]
 [-1.8847437   1.3292809   1.5620627   4.116872  ]
 [ 0.92150044  0.06917584 -0.33757943 -1.0781134 ]]

Test 2: CUDA result = 
[[-0.14660183 -1.5228994   0.4673218  -0.78254354]
 [-0.6611709  -0.18384695 -1.2055745   0.22811282]
 [ 1.0994222  -1.4229728   1.7196178  -2.2288678 ]
 [ 1.1223315  -2.3550603   3.1103303  -3.099592  ]
 [ 0.37547863 -1.8824222   0.28672877 -0.505186  ]],
 PyTorch result = 
[[-0.1466018  -1.5228994   0.4673218  -0.78254354]
 [-0.66117084 -0.18384695 -1.2055745   0.22811288]
 [ 1.0994221  -1.4229728   1.7196178  -2.22