In [None]:
# Sprawozdanie z zajęć nr 10
### Mateusz Stelmaszek
### 80275

In [None]:
!nvcc --version
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

In [None]:
!apt-get install gnuplot
!pip install matplotlib

# CPU - Prostokąty

In [None]:
%%cu
#include<stdio.h>
#include<time.h>

float dokladnosc(int N)
{
float sum = 0;
for(int i = 0; i < N; ++i)
{
float x = (float) i / (float) N;
float fx = (x*x+x+2);
sum += fx;
}
sum *= 1.0f / (float)N;
return sum;
}
int main(){
    FILE *fp = fopen("prostokatcpu1000.txt", "w");
    
    clock_t start, stop;
    float calka ; 
    start = clock();
    calka =  dokladnosc(1000);
    stop = clock();
    printf("wartość całki: %f\n", calka);
    printf("czas wykonywania: %lf s", (double)(stop - start) / CLOCKS_PER_SEC);
    fprintf(fp, "%f\n", (double)(stop - start) / CLOCKS_PER_SEC);
    fclose(fp);
    return 0 ;  
}

# GPU - Prostokąty

In [None]:
%%cu
#include <cuda_runtime.h>
#include<stdio.h>
__global__ void integratorKernel(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
float x = (float)idx /(float)N;
if(idx < N)
a[idx] = (x*x+x+2);
}
float licz(int N)
{
size_t size = N * sizeof(float);
float* a_h = (float *)malloc(size);
float* a_d ; cudaMalloc((void **) &a_d, size);
int block_size = 256;
int n_blocks = N/block_size + (N % block_size == 0 ? 0:1);
integratorKernel <<< n_blocks, block_size >>> (a_d, N);
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
float sum = 0;
for (int i=0; i < N; i++) sum += a_h[i];
sum *= 1.0 / (float)N;;
free(a_h); cudaFree(a_d);
return sum;
}

int main(void){
    FILE *fp = fopen("prostokatgpu1000.txt", "w");
    cudaEvent_t start, stop;
    float x;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start, 0);
    x = licz(1000);
    cudaEventRecord(stop, 0);
    cudaEventSynchronize(stop);
    float milliseconds = 0;
    cudaEventElapsedTime(&milliseconds, start, stop);
    float seconds = milliseconds / 1000;
    printf("czas wykonywania: %lf s\n", seconds);
    printf("wartość całki: %f" , x) ;
    fprintf(fp, "%f\n", seconds);
    fclose(fp);
    cudaEventDestroy(start);
    cudaEventDestroy(stop);
    return 0 ; 
}

# Porównanie

In [None]:
import matplotlib.pyplot as plt
czasgpu = [0, ]
with open("prostokatgpu100.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("prostokatgpu200.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("prostokatgpu500.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("prostokatgpu1000.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
dokladnosc = [ 0 ,100 , 200 , 500 , 1000 ]

czascpu = [0, ]
with open("prostokatcpu100.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("prostokatcpu200.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("prostokatcpu500.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("prostokatcpu1000.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
dokladnosc = [ 0 ,100 , 200 , 500 , 1000 ]

plt.plot(dokladnosc,czasgpu, label="GPU")
plt.plot(dokladnosc,czascpu, label="CPU")
plt.xlabel("dokladnosc")
plt.ylabel("Czas wykonywania(s)")
plt.legend()
plt.show()

# CPU - trapezy

In [None]:
%%cu
#include<time.h>
#include<stdio.h>

float dokladnosc(int N)
{
    float sum = 0;
    for(int i = 0; i < N-1; ++i)
    {
        float x = (float)i/(float) N;
        float nastepny = (float)(i+1)/(float) N;
        float fx = (x*x+x+2);
        float fx_nastepny = ((nastepny*nastepny+nastepny+2) );
        sum += (fx + fx_nastepny)/2;
    }
    sum *= 1.0f / (float)N;
    return sum;
}
int main(){
    FILE *fp = fopen("trapezycpu500.txt", "w");
    clock_t start, stop;
    float calka ; 
    start = clock();
    calka = dokladnosc(500);
    stop = clock();
    printf("wartość całki: %f\n", calka);
    printf("czas wykonywania: %lf s", (double)(stop - start) / CLOCKS_PER_SEC);
    fprintf(fp, "%f\n", (double)(stop - start) / CLOCKS_PER_SEC);
    fclose(fp);
    return 0 ; 
}

# GPU - trapezy

In [None]:
%%cu 
#include<stdio.h>
#include <cuda_runtime.h>
__global__ void integratorKernel(float *a, int N, float h)
{
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    float x = (float)idx / (float)N;
    if(idx < N)
    a[idx] = (x*x+x+2);
}
float licz(int N)
{
    size_t size = N * sizeof(float);
    float* a_h = (float *)malloc(size);
    float* a_d ; cudaMalloc((void **) &a_d, size);
    int block_size = 256;
    int n_blocks = N/block_size + (N % block_size == 0 ? 0:1);
    float h = 1.0f / (float)N;
    integratorKernel <<< n_blocks, block_size >>> (a_d, N, h);
    cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
    float sum = (a_h[0] + a_h[N-1]) / 2.0f;
    for (int i=1; i < N-1; i++) sum += a_h[i];
    sum *= h;
    free(a_h); cudaFree(a_d);
    return sum;
}
int main(void){
     FILE *fp = fopen("trapezygpu1000.txt", "w");
     cudaEvent_t start, stop;
     cudaEventCreate(&start); 
     cudaEventCreate(&stop);
     cudaEventRecord(start, 0);
     float x = licz(1000);
     cudaEventRecord(stop, 0);
     cudaEventSynchronize(stop);
     float milliseconds = 0;
     cudaEventElapsedTime(&milliseconds, start, stop);
     float seconds = milliseconds / 1000; 
     printf("czas wykonywania: %lf s\n", seconds);
     printf("wartość całki: %f" , x) ;
     fprintf(fp, "%f\n", seconds);
     fclose(fp);
     cudaEventDestroy(start);
     cudaEventDestroy(stop); 
     
     return 0 ; 

}

# Porównanie

In [None]:
import matplotlib.pyplot as plt
czasgpu = [0, ]
with open("trapezygpu100.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("trapezygpu200.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("trapezygpu500.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("trapezygpu1000.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
dokladnosc = [ 0 ,100 , 200 , 500 , 1000 ]

czascpu = [0, ]
with open("trapezycpu100.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("trapezycpu200.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("trapezycpu500.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("trapezycpu1000.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
dokladnosc = [ 0 ,100 , 200 , 500 , 1000 ]

plt.plot(dokladnosc,czasgpu, label="GPU")
plt.plot(dokladnosc,czascpu, label="CPU")
plt.xlabel("dokladnosc")
plt.ylabel("Czas wykonywania(s)")
plt.legend()
plt.show()

# CPU - Simpson

In [None]:
%%cu
#include<time.h>
#include<stdio.h>
float dokladnosc(int N)
{
    float sum = 0;
    for(int i = 0; i < N-1; i+=1)
    {
        float x = (float)i/(float) N;
        float srodkowy = (float)(i+1)/(float) N;
        float nastepny = (float)(i+2)/(float) N;
        float fx = (x*x+x+2);
        float fx_srodkowy = (srodkowy*srodkowy+srodkowy+2);
        float fx_nastepny = (nastepny*nastepny+nastepny+2);
        sum +=(fx + 4*fx_srodkowy + fx_nastepny)/6;
}
sum *= 1.0f/(float)N;
return sum;
}

int main(){
    FILE *fp = fopen("simpsoncpu1000.txt", "w");
    clock_t start, stop;
    float calka ; 
    start = clock();
    calka = dokladnosc(1000);
    stop = clock();
    printf("wartość całki: %f\n", calka);
    printf("czas wykonywania: %lf s", (double)(stop - start) / CLOCKS_PER_SEC);
    fprintf(fp, "%f\n", (double)(stop - start) / CLOCKS_PER_SEC);
    fclose(fp);
return 0 ; 
}


# GPU - Simpson

In [None]:
%%cu 
#include<stdio.h>
#include <cuda_runtime.h>
__global__ void integratorKernel(float *a, int N, float h)
{
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    float x = (float)idx / (float)N;
    if(idx < N)
    a[idx] = (x*x+x+2);
}
float licz(int N)
{
    size_t size = N * sizeof(float);
    float* a_h = (float *)malloc(size);
    float* a_d ; cudaMalloc((void **) &a_d, size);
    int block_size = 256;
    int n_blocks = N/block_size + (N % block_size == 0 ? 0:1);
    float h = 1.0f / (float)N;
    integratorKernel <<< n_blocks, block_size >>> (a_d, N, h); 
    cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
    float sum = a_h[0] + a_h[N-1];
    for (int i=1; i < N-1; i++)
    {
        if (i % 2 == 0)
        sum += 2 * a_h[i];
        else
        sum += 4 * a_h[i];
    }
    sum *= h / 3.0f;
    free(a_h); cudaFree(a_d);
    return sum;
}
int main(void){
    FILE *fp = fopen("simpsongpu1000.txt", "w");
    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start, 0);
    float x = licz(1000);
    cudaEventRecord(stop, 0);
    cudaEventSynchronize(stop);
    float milliseconds = 0;
    cudaEventElapsedTime(&milliseconds, start, stop);
    float seconds = milliseconds / 1000;
    printf("czas wykonywania: %f s\n", seconds);
    printf("wartość całki: %f" , x) ;
    fprintf(fp, "%f\n", seconds);
    fclose(fp);
    cudaEventDestroy(start);
    cudaEventDestroy(stop);
    return 0 ; 

}

# Porównanie

In [None]:
import matplotlib.pyplot as plt
czasgpu = [0, ]
with open("simpsongpu100.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("simpsongpu200.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("simpsongpu500.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
with open("simpsongpu1000.txt", "r") as file:
    for line in file:
        czasgpu.append(float(line.strip()))
dokladnosc = [ 0 ,100 , 200 , 500 , 1000 ]

czascpu = [0, ]
with open("simpsoncpu100.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("simpsoncpu200.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("simpsoncpu500.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
with open("simpsoncpu1000.txt", "r") as file:
    for line in file:
        czascpu.append(float(line.strip()))
dokladnosc = [ 0 ,100 , 200 , 500 , 1000 ]

plt.plot(dokladnosc,czasgpu, label="GPU")
plt.plot(dokladnosc,czascpu, label="CPU")
plt.xlabel("dokladnosc")
plt.ylabel("Czas wykonywania(s)")
plt.legend()
plt.show()