#Mount Drive, Open main directory and all the necessary

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignementCUDA/

In [None]:
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git

In [None]:
%load_ext nvcc_plugin

# TO DO if is a k80

In [None]:
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update

In [None]:
!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64.deb
!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2


#All GPU SPECS

In [None]:
!nvcc --version
!nvidia-smi

In [None]:
%%cu

#include <stdio.h>
#include <stdlib.h>

void deviceQuery()
{
  cudaDeviceProp prop;
  int nDevices=0, i;
  cudaError_t ierr;

  ierr = cudaGetDeviceCount(&nDevices);
  if (ierr != cudaSuccess) { printf("Sync error: %s\n", cudaGetErrorString(ierr)); }



  for( i = 0; i < nDevices; ++i )
  {
     ierr = cudaGetDeviceProperties(&prop, i);
     printf("Device number: %d\n", i);
     printf("  Device name: %s\n", prop.name);
     printf("  Compute capability: %d.%d\n\n", prop.major, prop.minor);
     
     printf("  Clock Rate: %d kHz\n", prop.clockRate);
     printf("  Total SMs: %d \n", prop.multiProcessorCount);
     printf("  Shared Memory Per SM: %lu bytes\n", prop.sharedMemPerMultiprocessor);
     printf("  Registers Per SM: %d 32-bit\n", prop.regsPerMultiprocessor);
     printf("  Max threads per SM: %d\n", prop.maxThreadsPerMultiProcessor);
     printf("  L2 Cache Size: %d bytes\n", prop.l2CacheSize);
     printf("  Total Global Memory: %lu bytes\n", prop.totalGlobalMem);
     printf("  Memory Clock Rate: %d kHz\n\n", prop.memoryClockRate);
     
     
     printf("  Max threads per block: %d\n", prop.maxThreadsPerBlock);
     printf("  Max threads in X-dimension of block: %d\n", prop.maxThreadsDim[0]);
     printf("  Max threads in Y-dimension of block: %d\n", prop.maxThreadsDim[1]);
     printf("  Max threads in Z-dimension of block: %d\n\n", prop.maxThreadsDim[2]);

     printf("  Max blocks in X-dimension of grid: %d\n", prop.maxGridSize[0]);
     printf("  Max blocks in Y-dimension of grid: %d\n", prop.maxGridSize[1]);
     printf("  Max blocks in Z-dimension of grid: %d\n\n", prop.maxGridSize[2]);     
     
     printf("  Shared Memory Per Block: %lu bytes\n", prop.sharedMemPerBlock);
     printf("  Registers Per Block: %d 32-bit\n", prop.regsPerBlock);
     printf("  Warp size: %d\n\n", prop.warpSize);

  }
}

int main() {
    deviceQuery();
}

#GPU Bandwidth

In [None]:
%cd /usr/local/cuda/samples
%cd 1_Utilities/bandwidthTest/
!make
!./bandwidthTest --mode-range --start=1000 --end=1000000 --increment=100000

#CPU INFO

In [None]:
!cat /proc/cpuinfo

#MEMOMRY INFO

In [None]:
!cat /proc/meminfo

#DISK INFO

In [None]:
!df -h

# Counting Sort - Global Memory

In [10]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Global CountingSort_Global.cu
!./CountingSort_Global 33554432 52 512

/content/drive/MyDrive/CUDA/CommonAssignmentCUDA
Kernel elapsed time 2.073844s 

occorrenze trovate: 33554432, occorrenze calcolate: 33554432
ordinato = 1


# TEST Counting Sort - Global Memory - K80

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Global CountingSort_Global.cu

for _ in range(20):
  !./CountingSort_Global 33554432 52 512
for _ in range(20):
  !./CountingSort_Global 33554432 104 256
for _ in range(20):
  !./CountingSort_Global 33554432 208 128
for _ in range(20):
  !./CountingSort_Global 33554432 416 64
for _ in range(20):
  !./CountingSort_Global 33554432 832 32
for _ in range(20):
  !./CountingSort_Global 33554432 1664 16
for _ in range(20):
  !./CountingSort_Global 33554432 3328 8
for _ in range(20):
  !./CountingSort_Global 33554432 6656 4
for _ in range(20):
  !./CountingSort_Global 33554432 13312 2

# TEST Counting Sort - Global Memory - T4

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Global CountingSort_Global.cu

for _ in range(20):
  !./CountingSort_Global 33554432 80 512
for _ in range(20):
  !./CountingSort_Global 33554432 160 256
for _ in range(20):
  !./CountingSort_Global 33554432 320 128
for _ in range(20):
  !./CountingSort_Global 33554432 640 64
for _ in range(20):
  !./CountingSort_Global 33554432 1280 32
for _ in range(20):
  !./CountingSort_Global 33554432 2560 16
for _ in range(20):
  !./CountingSort_Global 33554432 5120 8
for _ in range(20):
  !./CountingSort_Global 33554432 10240 4
for _ in range(20):
  !./CountingSort_Global 33554432 20480 2

# Counting Sort - Shared Memory

In [11]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Shared CountingSort_Shared.cu
!./CountingSort_Shared 33554432 52 512

/content/drive/MyDrive/CUDA/CommonAssignmentCUDA
Kernel elapsed time 0.220519s 

occorrenze trovate: 33554432, occorrenze calcolate: 33554432
ordinato = 1


# TEST Counting Sort - Shared Memory - k80

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Shared CountingSort_Shared.cu

for _ in range(20):
  !./CountingSort_Shared 33554432 52 512
for _ in range(20):
  !./CountingSort_Shared 33554432 104 256
for _ in range(20):
  !./CountingSort_Shared 33554432 208 128
for _ in range(20):
  !./CountingSort_Shared 33554432 416 64
for _ in range(20):
  !./CountingSort_Shared 33554432 832 32
for _ in range(20):
  !./CountingSort_Shared 33554432 1664 16
for _ in range(20):
  !./CountingSort_Shared 33554432 3328 8
for _ in range(20):
  !./CountingSort_Shared 33554432 6656 4
for _ in range(20):
  !./CountingSort_Shared 33554432 13312 2

# TEST Counting Sort - Shared Memory - T4

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Shared CountingSort_Shared.cu

for _ in range(20):
  !./CountingSort_Shared 33554432 80 512
for _ in range(20):
  !./CountingSort_Shared 33554432 160 256
for _ in range(20):
  !./CountingSort_Shared 33554432 320 128
for _ in range(20):
  !./CountingSort_Shared 33554432 640 64
for _ in range(20):
  !./CountingSort_Shared 33554432 1280 32
for _ in range(20):
  !./CountingSort_Shared 33554432 2560 16
for _ in range(20):
  !./CountingSort_Shared 33554432 5120 8
for _ in range(20):
  !./CountingSort_Shared 33554432 10240 4
for _ in range(20):
  !./CountingSort_Shared 33554432 20480 2

# Counting Sort - Texture Memory

In [12]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Texture CountingSort_Texture.cu
!./CountingSort_Texture 33554432 52 512

/content/drive/MyDrive/CUDA/CommonAssignmentCUDA
Kernel elapsed time 0.543635s 

occorrenze trovate: 33554432, occorrenze calcolate: 33554432
ordinato = 1


# TEST Counting Sort - Texture Memory - K80

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Texture CountingSort_Texture.cu

for _ in range(20):
  !./CountingSort_Texture 33554432 52 512
for _ in range(20):
  !./CountingSort_Texture 33554432 104 256
for _ in range(20):
  !./CountingSort_Texture 33554432 208 128
for _ in range(20):
  !./CountingSort_Texture 33554432 416 64
for _ in range(20):
  !./CountingSort_Texture 33554432 832 32
for _ in range(20):
  !./CountingSort_Texture 33554432 1664 16
for _ in range(20):
  !./CountingSort_Texture 33554432 3328 8
for _ in range(20):
  !./CountingSort_Texture 33554432 6656 4
for _ in range(20):
  !./CountingSort_Texture 33554432 13312 2

# TEST Counting Sort - Texture Memory - T4

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Texture CountingSort_Texture.cu

for _ in range(20):
  !./CountingSort_Texture 33554432 80 512
for _ in range(20):
  !./CountingSort_Texture 33554432 160 256
for _ in range(20):
  !./CountingSort_Texture 33554432 320 128
for _ in range(20):
  !./CountingSort_Texture 33554432 640 64
for _ in range(20):
  !./CountingSort_Texture 33554432 1280 32
for _ in range(20):
  !./CountingSort_Texture 33554432 2560 16
for _ in range(20):
  !./CountingSort_Texture 33554432 5120 8
for _ in range(20):
  !./CountingSort_Texture 33554432 10240 4
for _ in range(20):
  !./CountingSort_Texture 33554432 20480 2

# Counting Sort - Texture and Shared Memory

In [13]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Shared_Texture CountingSort_Shared_Texture.cu
!./CountingSort_Shared_Texture 33554432 52 512

/content/drive/MyDrive/CUDA/CommonAssignmentCUDA
Kernel elapsed time 0.230444s 

occorrenze trovate: 33554432, occorrenze calcolate: 33554432
ordinato = 1


# TEST Counting Sort - Texture Memory and Shared Memory - K80

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Shared_Texture CountingSort_Shared_Texture.cu

for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 52 512
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 104 256
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 208 128
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 416 64
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 832 32
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 1664 16
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 3328 8
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 6656 4
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 13312 2

# TEST Counting Sort - Texture Memory and Shared Memory - T4

In [None]:
%cd /content/drive/MyDrive/CUDA/CommonAssignmentCUDA/
!nvcc -o CountingSort_Shared_Texture CountingSort_Shared_Texture.cu

for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 80 512
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 160 256
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 320 128
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 640 64
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 1280 32
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 2560 16
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 5120 8
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 10240 4
for _ in range(20):
  !./CountingSort_Shared_Texture 33554432 20480 2