<a href="https://colab.research.google.com/github/ItsFreakinDay/hybridCompSys/blob/task1/first.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
%%writefile copy.cu
#include <cuda_runtime.h>
#include <curand_kernel.h>

extern "C" {
#include <stdio.h>

__host__ void h_divide(float* a, float scalar, float* c, int n) {
    for (int i = 0; i < n; i++) {
        c[i] = a[i] / scalar;
    }
}

__global__ void d_divide(float* a, float scalar, float* c, int n) {
    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < n) {
        c[i] = a[i] / scalar;
    }
}

__global__ void d_fill_uniform(
    float* a, float* b, int n, float r, unsigned long long seed) {
    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < n) {
        curandState_t state;
        curand_init(seed, i, 0, &state);

        a[i] = -r + 2 * r * curand_uniform(&state);
        b[i] = -r + 2 * r * curand_uniform(&state);
    }
}

float compare(float* a, float* b, int n, float eps) {
    float diff = 0;

    for (int i = 0; i < n; i++) {
        diff = fabs(a[i] - b[i]);
        if (diff >= eps) {
            return diff;
        }
    }

    return diff;
}
}

#ifndef REDEFINE
#define VEC_LEN 51200000
#define VEC_LEN_INC 512000
#define CHECK_FIRST 51200
#define BLOCK_SIZE 128
#define FNAME_STAMPS "timings.stmp"
#define PRECISION 10e-10
#define SEED 27
#define VEC_MAX_ABS_VAL 101
#endif

#define VEC_MEM_SIZE (VEC_LEN * sizeof(float))
#define ts_to_ms(ts) (ts.tv_sec * 10e3 + ts.tv_nsec * 10e-6)
#define calc_grid_size(m) ((m + BLOCK_SIZE - 1) / BLOCK_SIZE)

int main() {
    float* h_a __attribute__((aligned(64)));
    float* h_b __attribute__((aligned(64)));
    float* h_c __attribute__((aligned(64)));
    float* h_d __attribute__((aligned(64)));

    h_a = (float*)malloc(VEC_MEM_SIZE);
    h_b = (float*)malloc(VEC_MEM_SIZE);
    h_c = (float*)malloc(VEC_MEM_SIZE);
    h_d = (float*)malloc(VEC_MEM_SIZE);

    float* d_a, *d_b, *d_c;
    cudaMalloc((void**)&d_a, VEC_MEM_SIZE);
    cudaMalloc((void**)&d_b, VEC_MEM_SIZE);
    cudaMalloc((void**)&d_c, VEC_MEM_SIZE);

    d_fill_uniform<<<calc_grid_size(VEC_LEN), BLOCK_SIZE>>>(
        d_a, d_b, VEC_LEN, VEC_MAX_ABS_VAL, SEED);
    cudaMemcpy(h_a, d_a, VEC_MEM_SIZE, cudaMemcpyDeviceToHost);
    cudaMemcpy(h_b, d_b, VEC_MEM_SIZE, cudaMemcpyDeviceToHost);

    float scalar = 2.0f; // Ввод значения скаляра

    h_divide(h_a, scalar, h_c, CHECK_FIRST);
    d_divide<<<calc_grid_size(CHECK_FIRST), BLOCK_SIZE>>>(d_a, scalar, d_c, CHECK_FIRST);
    cudaMemcpy(h_d, d_c, CHECK_FIRST * sizeof(float), cudaMemcpyDeviceToHost);

    if (compare(h_c, h_d, CHECK_FIRST, PRECISION) > PRECISION) {
        printf("Panic!\n");
        return -1;
    }

    float h_time;
    timespec h_start, h_stop;

    float d_time;
    cudaEvent_t d_start, d_stop;
    cudaEventCreate(&d_start);
    cudaEventCreate(&d_stop);

    FILE* file = fopen(FNAME_STAMPS, "w");
    fprintf(file, "Vector Length, CPU Time, GPU Time\n");

    for (int m = VEC_LEN_INC; m <= VEC_LEN; m += VEC_LEN_INC) {
        clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &h_start);
        h_divide(h_a, scalar, h_c, m);
        clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &h_stop);
        h_time = (ts_to_ms(h_stop) - ts_to_ms(h_start)); // time in ms

        cudaEventRecord(d_start);
        d_divide<<<calc_grid_size(m), BLOCK_SIZE>>>(d_a, scalar, d_c, m);
        cudaEventRecord(d_stop);
        cudaEventSynchronize(d_stop);
        cudaEventElapsedTime(&d_time, d_start, d_stop); // time in ms

        fprintf(file, "%d, %f, %f\n", m, h_time, d_time);
    }

    free(h_a);
    free(h_b);
    free(h_c);
    free(h_d);

    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    fclose(file);

    return 0;
}


Overwriting copy.cu


In [4]:
!nvcc -o qwert copy.cu

In [5]:
!./qwert

In [6]:
import pandas as pd
import plotly.graph_objects as go
import argparse


def make_plot(df):
    fig = go.Figure(
        data=[
            go.Scatter(
                x=df["Vector Length"], y=df["CPU Time"],
                mode="lines+markers",
                line=dict(dash='solid', color='indianred'),
                name="CPU"
            ),
            go.Scatter(
                x=df["Vector Length"], y=df["GPU Time"],
                mode="lines+markers",
                line=dict(dash='solid', color='limegreen'),
                name="GPU"
            )
        ],

        layout=go.Layout(
            xaxis=dict(
                range=[df["Vector Length"].min(), df["Vector Length"].max()],
                autorange=True,
                title_text="Vector Length"
            ),
            yaxis=dict(autorange=True, title_text="Time, ms"),
            title_text="Real complexity of operation",
            hovermode="closest",
            title_x=0.5,
            template="plotly_dark"),
    )

    fig.show()

df = pd.read_csv("timings.stmp", skipinitialspace=True)
make_plot(df)