<a href="https://colab.research.google.com/github/Andres8bit/parallel-computing/blob/main/Bezier_curve.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git

Collecting git+git://github.com/andreinechaev/nvcc4jupyter.git
  Cloning git://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-mu2iozq8
  Running command git clone -q git://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-mu2iozq8
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-cp36-none-any.whl size=4308 sha256=b0ee1ad8af52126a3d2418da40d47f73a89d4defb72b6e96d8bdf74311208fd7
  Stored in directory: /tmp/pip-ephem-wheel-cache-o2p3mfc2/wheels/10/c2/05/ca241da37bff77d60d31a9174f988109c61ba989e4d4650516
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2


In [3]:
%load_ext nvcc_plugin

created output directory at /content/src
Out bin /content/result.out


Bezier Curves:
    A Bezier curve is defined by a set of points P0 -> Pn where n
   denotes the order of the curve i.e n = 1 denotes linear, n =2 denotes
   quadratic ect.
   They are often used in computer graphics to draw smooth curves.

In [19]:
%%cu
#include <iostream>
#include <stdio.h>

#define MAX_TESS_POINTS 32
#define N_LINES 256
#define BLOCK_DIM 32

struct BezierLine{
    float2 control_points[3];
    float2 *vertex_pos;
    int n_vertices;    
};

__forceinline__ __device__ float2 operator + (float2 a,float2 b){
  float2 c;
  c.x = a.x + b.x;
  c.y = a.y + b.y;
  
  return c;  
}

__forceinline__ __device__ float2 operator - (float2 a,float2 b){
    float2 c;
    c.x = a.x - b.x;
    c.y = a.y - b.y;
    
    return c; 
}

__forceinline__ __device__ float2 operator * (float2 a,float2 b){
    float2 c;
    c.x = a.x * b.x;
    c.y = a.y * b.y;
    
    return c;
}

__forceinline__ __device__ float length(float2 a){
    return sqrtf(a.x * a.x + a.y*a.y);
}

__device__ float compute_curvature(BezierLine *b_lines){
    int block_id = blockIdx.x;
    float curvature = length(b_lines[block_id].control_points[1] - 
                      (b_lines[block_id].control_points[0] + 
                      b_lines[block_id].control_points[2]))/
                      length(b_lines[block_id].control_points[2] - 
                             b_lines[block_id].control_points[0]);
    return curvature; 
}

void init(BezierLine *b_lines){
    float2 last = {0,0};

    for (int i = 0; i < N_LINES; i++){
        b_lines[i].control_points[0] = last;
        for (int j = 1; j < 3; j++){
            b_lines[i].control_points[j].x = (float)rand() / (float)RAND_MAX;
            b_lines[i].control_points[j].y = (float)rand() / (float) RAND_MAX;
        }
        last = b_lines[i].control_points[2];
        b_lines[i].n_vertices = 0;
    }
}

__global__ void compute_bezier_child(int line_id,BezierLine* b_lines,int n_tess_points){
    int idx = threadIdx.x + blockDim.x * blockIdx.x;

    if (idx < n_tess_points){
        float u = (float)idx/(float)(n_tess_points - 1);
        float omu = 1.0f - u;
        float quad_coef[3];
        quad_coef[0] = omu*omu;
        quad_coef[1] = 2.0f * u * omu;
        quad_coef[2] = u*u;
        float2 pos = {0,0};
        float2 coef = {0,0};

        for (int i = 0; i < 3; i++){
            coef = {quad_coef[i],quad_coef[i]};
            pos = pos + coef * b_lines[line_id].control_points[i];
        }
        b_lines[line_id].vertex_pos[idx] = pos;
      }
}

__global__ void compute_bezier_parent(BezierLine *b_lines, int n_lines){
    cudaStream_t stream;
    cudaStreamCreateWithFlags(&stream,cudaStreamNonBlocking);

    int line_id = threadIdx.x + blockDim.x * blockIdx.x;

    if (line_id < n_lines){
        float curvature = compute_curvature(b_lines);
        int n_tess_points = min(max((int)(curvature*16.0f),4),32);
        b_lines[line_id].n_vertices = n_tess_points;
        cudaMalloc((void**) &b_lines[line_id].vertex_pos,
                   b_lines[line_id].n_vertices * sizeof(float2));
        
        compute_bezier_child<<<ceil((float)b_lines[line_id].n_vertices/32.0f),32,0,stream>>>
                            (line_id,b_lines,b_lines[line_id].n_vertices);
    } 
}

__global__ void free_vertex_mem(BezierLine *b_lines,int n_lines){
    int line_id = threadIdx.x + blockDim.x * blockIdx.x;
    if (line_id < n_lines)
        cudaFree(b_lines[line_id].vertex_pos);
}

int main(){
    BezierLine *host_bezier = new BezierLine[N_LINES];
    BezierLine *device_bezier;    
    init(host_bezier);

    cudaMalloc((void **)&device_bezier,N_LINES * sizeof(BezierLine));
    cudaMemcpy(device_bezier,host_bezier,N_LINES * sizeof(BezierLine),cudaMemcpyHostToDevice);

    compute_bezier_parent<<<ceil((float)N_LINES/(float)BLOCK_DIM), BLOCK_DIM>>>(device_bezier,N_LINES);

    free_vertex_mem<<<ceil((float)N_LINES/(float)BLOCK_DIM),BLOCK_DIM>>>(device_bezier,N_LINES);

    delete[] host_bezier;

    return 0;
}

/tmp/tmpz1ryro77/4fe7dacf-2a0c-45e3-b961-6e20d53fac35.cu(100): error: calling a __global__ function("compute_bezier_child") from a __global__ function("compute_bezier_parent") is only allowed on the compute_35 architecture or above

1 error detected in the compilation of "/tmp/tmpxft_00000318_00000000-8_4fe7dacf-2a0c-45e3-b961-6e20d53fac35.cpp1.ii".

