# Sprawozdanie z zajęć nr 9
### Mateusz Stelmaszek
### 80275

In [None]:
!nvcc --version
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

# GPU 


In [None]:
%%cu
#include <stdio.h>
#define DIM 6000
#define rnd( x ) (x * rand() / RAND_MAX)
#define INF     2e10f
struct Sphere {
    float   r,b,g;
    float   radius;
    float   x,y,z;
    __device__ float hit( float ox, float oy, float *n ) {
        float dx = ox - x;
        float dy = oy - y;
        if (dx*dx + dy*dy < radius*radius) {
            float dz = sqrtf( radius*radius - dx*dx - dy*dy );
            *n = dz / sqrtf( radius * radius );
            return dz + z;
        }
        return -INF;
    }
};
#define SPHERES 20
__global__ void kernel( Sphere *s, unsigned char *ptr ) {
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;
    float   x1 = (x - DIM/2);
    float   y1 = (y - DIM/2);
    float   r=0, g=0, b=0;
    float   maxz = -INF;
    for(int i=0; i<SPHERES; i++) {
        float   n;
        float   t = s[i].hit( x1, y1, &n );
        if (t > maxz) {
            float fscale = n;
            r = s[i].r * fscale;
            g = s[i].g * fscale;
            b = s[i].b * fscale;
            maxz = t;
        }
    } 
    ptr[offset*4 + 0] = (int)(r * 255);
    ptr[offset*4 + 1] = (int)(g * 255);
    ptr[offset*4 + 2] = (int)(b * 255);
    ptr[offset*4 + 3] = 255;
}
struct DataBlock{
    unsigned char *dev_bitmap;
};
void save_to_file(unsigned char *ptr){
  FILE *fp=fopen("RTXCPU.ppm","w");
  fprintf(fp,"P3\n%d %d\n255\n", DIM, DIM);
  for(int y=0;y<DIM;y++){
    for(int x=0;x<DIM;x++){
      int offset=x+y*DIM;
      fprintf(fp,"\n%d %d %d", ptr[offset*4+0], ptr[offset*4+1], ptr[offset*4+2]);
    }
  }
  fclose(fp);
}
int main(void) {
    FILE *fp = fopen("czascpu.txt", "w");
  // capture the start time 
  cudaEvent_t start, stop;
  cudaEventCreate( &start );
  cudaEventCreate( &stop);
  DataBlock data;
  unsigned char *bitmap = (unsigned char*)malloc(DIM* DIM*4* sizeof(unsigned char));
  int image_size = DIM* DIM*4;
  unsigned char *dev_bitmap;

  cudaMalloc((void**)&dev_bitmap, image_size); 
  data.dev_bitmap=dev_bitmap;
  Sphere *s;
// allocate memory for the Sphere dataset
cudaMalloc((void**)&s, sizeof(Sphere)* SPHERES );
// allocate temp memory, initialize it, copy to
// memory on the GPU, then free our temp memory
Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES );
for (int i=0; i<SPHERES; i++) {
     temp_s[i].r = rnd( 1.0f);
     temp_s[i].g= rnd( 1.0f);
     temp_s[1].b= rnd( 1.0f);
     temp_s[i].x =rnd( 1000.0f)- 500;
     temp_s[i].y =rnd( 1000.0f) - 500;
     temp_s[1].z = rnd( 1000.0f) - 500;
     temp_s[i].radius = rnd( 100.0f) + 20;
     cudaMemcpy( s, temp_s, sizeof(Sphere)* SPHERES, cudaMemcpyHostToDevice);
     free( temp_s);
}
dim3 block(16, 16);
dim3 grid((DIM + block.x - 1) / block.x, (DIM + block.y - 1) / block.y);
// pobierz czas startu 
cudaEventRecord( start, 0);
// kernel
kernel<<<grid, block>>>(s, dev_bitmap);
// pobierz czas zatrzymania
cudaEventRecord( stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaMemcpy( bitmap, dev_bitmap, image_size, cudaMemcpyDeviceToHost);
printf("czas: %f s\n", elapsedTime);
float elapsed_time;
cudaEventElapsedTime(&elapsed_time, start, stop);
printf("czas: %.3f ms\n", elapsed_time);
fprintf(fp, "%f\n", elapsedTime);
    fclose(fp);
save_to_file(bitmap);
}

# CPU

In [None]:
%%cu
#include <stdio.h>
#define DIM 6000
#define rnd( x ) (x * rand() / RAND_MAX)
#define INF     2e10f
struct Sphere {
    float   r,b,g;
    float   radius;
    float   x,y,z;
    float hit( float ox, float oy, float *n ) {
        float dx = ox - x;
        float dy = oy - y;
        if (dx*dx + dy*dy < radius*radius) {
            float dz = sqrtf( radius*radius - dx*dx - dy*dy );
            *n = dz / sqrtf( radius * radius );
            return dz + z;
        }
        return -INF;
    }
};
#define SPHERES 20
void kernel(Sphere *s, unsigned char *ptr) {
  for (int y = 0; y < DIM; y++) {
    for (int x = 0; x < DIM; x++) {
      int offset = x + y * DIM;
      float ox = (x - DIM / 2);
      float oy = (y - DIM / 2);
      float r = 0, g = 0, b = 0;
      float maxz = -INF;
      for (int i = 0; i < SPHERES; i++) {
        float n;
        float t = s[i].hit(ox, oy, &n);
        if (t > maxz) {
          float fscale = n;
          r = s[i].r * fscale;
          g = s[i].g * fscale;
          b = s[i].b * fscale;
          maxz = t;
        }
      }
      ptr[offset * 4 + 0] = (int)(r * 255);
      ptr[offset * 4 + 1] = (int)(g * 255);
      ptr[offset * 4 + 2] = (int)(b * 255);
      ptr[offset * 4 + 3] = 255;
    }
  }
}
struct DataBlock{
    unsigned char *dev_bitmap;
};
void save_to_file(unsigned char *ptr){
  FILE *fp=fopen("RTXGPU.ppm","w");
  fprintf(fp,"P3\n%d %d\n255\n", DIM, DIM);
  for(int y=0;y<DIM;y++){
    for(int x=0;x<DIM;x++){
      int offset=x+y*DIM;
      fprintf(fp,"\n%d %d %d", ptr[offset*4+0], ptr[offset*4+1], ptr[offset*4+2]);
    }
  }
  fclose(fp);
}
int main(void) {
    FILE *fp = fopen("czasgpu6000.txt", "w");
  // pobierz czas rozpoczęcia
  clock_t start, stop;
  start = clock();
  // przyznanie pamięci dla Sphere
  Sphere *s = (Sphere*)malloc(sizeof(Sphere)* SPHERES );
  unsigned char *bitmap = (unsigned char*)malloc(DIM* DIM*4* sizeof(unsigned char));
  // przyznanie tymczasowej pamięci a potem zwolnienie
  Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES );
  for (int i=0; i<SPHERES; i++) {
    temp_s[i].r = rnd( 1.0f);
     temp_s[i].g= rnd( 1.0f);
     temp_s[1].b= rnd( 1.0f);
     temp_s[i].x =rnd( 1000.0f)- 500;
     temp_s[i].y =rnd( 1000.0f) - 500;
     temp_s[1].z = rnd( 1000.0f) - 500;
     temp_s[i].radius = rnd( 100.0f) + 20;
  }
  memcpy(s, temp_s, sizeof(Sphere)* SPHERES );
  free(temp_s);
  kernel(s, bitmap);
  // pobranie momentu zatrzymania
  stop = clock();
 printf("Time: %.7f seconds\n", ((double)(stop-start))/CLOCKS_PER_SEC);
 printf("Time: %.3f milliseconds\n", ((double)(stop-start))/CLOCKS_PER_SEC*1000);
 double czas = (((double)(stop-start))/CLOCKS_PER_SEC*1000);
 fprintf(fp, "%f\n", czas);
    fclose(fp);
  // zapis do pliku
  save_to_file(bitmap);
  // wyczyszczenie pamięci
  free(s);
  return 0;

}

# Porównanie

In [None]:
import matplotlib.pyplot as plt
czasgpu = [0, ]
with open("czasgpu500.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("czasgpu1000.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("czasgpu2000.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("czasgpu6000.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
DIMGPU = [ 0 ,500 , 1000 , 2000 , 6000 ]

czascpu = [0, ]
with open("czascpu.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("czascpu.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("czascpu.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("czascpu.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
        DIMCPU = [ 0 ,500 , 1000 , 2000 , 6000 ]

plt.plot(DIMCPU,czascpu, label="GPU")
plt.plot(DIMGPU,czasgpu, label="CPU")
plt.xlabel("DIM")
plt.ylabel("Czas wykonywania(ms)")
plt.legend()
plt.show()