In [32]:
%%writefile input.txt
2
100000000000 0.0 0.0 0.0 0.0 0.0 0.0
10 10.0 0.0 0.0 0.0 1.0 0.0

Overwriting input.txt


In [33]:
%%writefile timer.h
#ifndef _TIMER_H_
#define _TIMER_H_

#include <sys/time.h>

#define GET_TIME(now) { \
   struct timeval t; \
   gettimeofday(&t, NULL); \
   now = t.tv_sec + t.tv_usec/1000000.0; \
}

#endif


Overwriting timer.h


In [34]:
%%writefile task2_cuda.cu
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
#include "timer.h"

const double G = 6.67430e-11;
const double delta_t = 0.01;

void checkCuda(cudaError_t result) {
    if (result != cudaSuccess) {
        fprintf(stderr, "CUDA Error: %s (Code: %d)\n",
                cudaGetErrorString(result), result);
        exit(1);
    }
}

__global__ void reset_forces(double* fx, double* fy, double* fz, int n) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < n) {
        fx[i] = 0.0;
        fy[i] = 0.0;
        fz[i] = 0.0;
    }
}

__global__ void compute_forces(const double* m, const double* x, const double* y, const double* z,
                               double* fx, double* fy, double* fz, int n) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;

    if (i >= n) return;

    double pos_xi = x[i];
    double pos_yi = y[i];
    double pos_zi = z[i];
    double mass_i = m[i];

    double f_ix = 0.0;
    double f_iy = 0.0;
    double f_iz = 0.0;

    for (int j = i + 1; j < n; ++j) {
        double dx = x[j] - pos_xi;
        double dy = y[j] - pos_yi;
        double dz = z[j] - pos_zi;

        double distSq = dx*dx + dy*dy + dz*dz;
        double dist = sqrt(distSq);

        if (dist < 1e-9) continue;

        double distCubed = dist * dist * dist;
        double f_mag = G * mass_i * m[j] / distCubed;

        double force_x = f_mag * dx;
        double force_y = f_mag * dy;
        double force_z = f_mag * dz;

        f_ix += force_x;
        f_iy += force_y;
        f_iz += force_z;

        atomicAdd(&fx[j], -force_x);
        atomicAdd(&fy[j], -force_y);
        atomicAdd(&fz[j], -force_z);
    }

    atomicAdd(&fx[i], f_ix);
    atomicAdd(&fy[i], f_iy);
    atomicAdd(&fz[i], f_iz);
}

__global__ void update_particles(double* m, double* x, double* y, double* z,
                                 double* vx, double* vy, double* vz,
                                 const double* fx, const double* fy, const double* fz,
                                 int n) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < n) {
        double mass = m[i];

        double ax = fx[i] / mass;
        double ay = fy[i] / mass;
        double az = fz[i] / mass;

        x[i] += vx[i] * delta_t;
        y[i] += vy[i] * delta_t;
        z[i] += vz[i] * delta_t;

        vx[i] += ax * delta_t;
        vy[i] += ay * delta_t;
        vz[i] += az * delta_t;
    }
}

int main(int argc, char* argv[]) {
    double start_time, end_time;

    if (argc < 3) {
        fprintf(stderr, "Usage: %s <tend> <filename>\n", argv[0]);
        return 1;
    }

    double t_end = atof(argv[1]);
    char* filename = argv[2];
    int n;

    FILE* file = fopen(filename, "r");
    if (!file) {
        fprintf(stderr, "Error: Cannot open file %s\n", filename);
        return 1;
    }
    if (fscanf(file, "%d", &n) != 1) {
        fprintf(stderr, "Error reading number of particles\n");
        return 1;
    }

    double *h_m = (double*)malloc(n * sizeof(double));
    double *h_x = (double*)malloc(n * sizeof(double));
    double *h_y = (double*)malloc(n * sizeof(double));
    double *h_z = (double*)malloc(n * sizeof(double));
    double *h_vx = (double*)malloc(n * sizeof(double));
    double *h_vy = (double*)malloc(n * sizeof(double));
    double *h_vz = (double*)malloc(n * sizeof(double));

    for (int i = 0; i < n; ++i) {
        fscanf(file, "%lf %lf %lf %lf %lf %lf %lf",
               &h_m[i],
               &h_x[i],
               &h_y[i],
               &h_z[i],
               &h_vx[i],
               &h_vy[i],
               &h_vz[i]);
    }
    fclose(file);

    FILE* outFile = fopen("output.csv", "w");
    if (!outFile) {
        fprintf(stderr, "Error creating output file\n");
        return 1;
    }

    double *d_m, *d_x, *d_y, *d_z, *d_vx, *d_vy, *d_vz, *d_fx, *d_fy, *d_fz;
    checkCuda(cudaMalloc(&d_m, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_x, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_y, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_z, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_vx, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_vy, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_vz, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_fx, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_fy, n * sizeof(double)));
    checkCuda(cudaMalloc(&d_fz, n * sizeof(double)));

    checkCuda(cudaMemcpy(d_m, h_m, n * sizeof(double), cudaMemcpyHostToDevice));
    checkCuda(cudaMemcpy(d_x, h_x, n * sizeof(double), cudaMemcpyHostToDevice));
    checkCuda(cudaMemcpy(d_y, h_y, n * sizeof(double), cudaMemcpyHostToDevice));
    checkCuda(cudaMemcpy(d_z, h_z, n * sizeof(double), cudaMemcpyHostToDevice));
    checkCuda(cudaMemcpy(d_vx, h_vx, n * sizeof(double), cudaMemcpyHostToDevice));
    checkCuda(cudaMemcpy(d_vy, h_vy, n * sizeof(double), cudaMemcpyHostToDevice));
    checkCuda(cudaMemcpy(d_vz, h_vz, n * sizeof(double), cudaMemcpyHostToDevice));

    int threadsPerBlock = 256;
    int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock;

    double t = 0.0;

    GET_TIME(start_time);
    while (t <= t_end) {
        checkCuda(cudaMemcpy(h_x, d_x, n * sizeof(double), cudaMemcpyDeviceToHost));
        checkCuda(cudaMemcpy(h_y, d_y, n * sizeof(double), cudaMemcpyDeviceToHost));
        cudaDeviceSynchronize();

        fprintf(outFile, "%.6f", t);
        for (int i = 0; i < n; ++i) {
            fprintf(outFile, " %.6f %.6f", h_x[i], h_y[i]);
        }
        fprintf(outFile, "\n");

        if (t >= t_end) break;

        reset_forces<<<blocksPerGrid, threadsPerBlock>>>(d_fx, d_fy, d_fz, n);
        checkCuda(cudaGetLastError());

        compute_forces<<<blocksPerGrid, threadsPerBlock>>>(d_m, d_x, d_y, d_z, d_fx, d_fy, d_fz, n);
        checkCuda(cudaGetLastError());

        update_particles<<<blocksPerGrid, threadsPerBlock>>>(d_m, d_x, d_y, d_z, d_vx, d_vy, d_vz, d_fx, d_fy, d_fz, n);
        checkCuda(cudaGetLastError());

        t += delta_t;
    }

    GET_TIME(end_time);

    printf("%lf\n", end_time - start_time);

    fclose(outFile);

    free(h_m);
    free(h_x);
    free(h_y);
    free(h_z);
    free(h_vx);
    free(h_vy);
    free(h_vz);

    cudaFree(d_m);
    cudaFree(d_x);
    cudaFree(d_y);
    cudaFree(d_z);
    cudaFree(d_vx);
    cudaFree(d_vy);
    cudaFree(d_vz);
    cudaFree(d_fx);
    cudaFree(d_fy);
    cudaFree(d_fz);

    return 0;
}

Overwriting task2_cuda.cu


In [35]:
!nvcc task2_cuda.cu -o task2_cuda -arch=sm_75

In [36]:
!./task2_cuda 10.0 input.txt

0.047533


In [37]:
!head output.csv

0.000000 0.000000 0.000000 10.000000 0.000000
0.010000 0.000000 0.000000 10.000000 0.010000
0.020000 0.000000 0.000000 9.999993 0.020000
0.030000 0.000000 0.000000 9.999980 0.030000
0.040000 0.000000 0.000000 9.999960 0.040000
0.050000 0.000000 0.000000 9.999933 0.050000
0.060000 0.000000 0.000000 9.999900 0.060000
0.070000 0.000000 0.000000 9.999860 0.070000
0.080000 0.000000 0.000000 9.999813 0.080000
0.090000 0.000000 0.000000 9.999760 0.089999


In [38]:
%%writefile runner.sh
#!/bin/bash

PROGRAM="./task2_cuda"
ARGS="10.0 input.txt"
RUNS=10

if [ ! -f "$PROGRAM" ]; then
    echo "Error: program not found"
    exit 1
fi

total_time=0

for ((i=1; i<=RUNS; i++)); do
    val=$($PROGRAM $ARGS)
    echo "Run $i: $val"

    total_time=$(echo "$total_time + $val" | bc -l)
done

avg=$(echo "scale=6; $total_time / $RUNS" | bc -l)

echo "----------------"
echo "Average: $avg"

Overwriting runner.sh


In [39]:
!chmod +x runner.sh

!./runner.sh

Run 1: 0.048045
Run 2: 0.048597
Run 3: 0.053817
Run 4: 0.053443
Run 5: 0.047976
Run 6: 0.047580
Run 7: 0.047807
Run 8: 0.047728
Run 9: 0.046985
Run 10: 0.051596
----------------
Average: .049357


In [40]:
import random

counts = [16, 64, 128, 256, 512, 1024, 2048, 4096, 8192]

for n in counts:
    filename = f"input_{n}.txt"
    with open(filename, "w") as f:
        f.write(f"{n}\n")

        for _ in range(n):
            m = random.uniform(1000.0, 100000.0)

            x = random.uniform(-100.0, 100.0)
            y = random.uniform(-100.0, 100.0)
            z = random.uniform(-100.0, 100.0)

            vx = random.uniform(-2.0, 2.0)
            vy = random.uniform(-2.0, 2.0)
            vz = random.uniform(-2.0, 2.0)

            f.write(f"{m:.2f} {x:.2f} {y:.2f} {z:.2f} {vx:.2f} {vy:.2f} {vz:.2f}\n")

    print(f"Создан файл: {filename}")

Создан файл: input_16.txt
Создан файл: input_64.txt
Создан файл: input_128.txt
Создан файл: input_256.txt
Создан файл: input_512.txt
Создан файл: input_1024.txt
Создан файл: input_2048.txt
Создан файл: input_4096.txt
Создан файл: input_8192.txt


In [41]:
%%writefile benchmark.sh
#!/bin/bash

PROGRAM="./task2_cuda"

T_END=5.0
RUNS=10

if [ ! -f "$PROGRAM" ]; then
    echo "Ошибка: Программа $PROGRAM не найдена."
    exit 1
fi

echo "=========================================================="
echo " ЗАПУСК ТЕСТОВ ПРОИЗВОДИТЕЛЬНОСТИ (CUDA)"
echo " Усреднение по $RUNS запускам для каждого размера."
echo "=========================================================="

printf "%-10s | %-15s\n" "N (Тел)" "Avg Time (sec)"
echo "------------------------------"

for N in 16 64 128 256 512 1024 2048 4096 8192; do
    INPUT_FILE="input_$N.txt"

    if [ ! -f "$INPUT_FILE" ]; then
        echo "Файл $INPUT_FILE не найден, пропускаем..."
        continue
    fi

    total_time=0

    # Цикл запусков для усреднения
    for ((i=1; i<=RUNS; i++)); do
        time_val=$($PROGRAM $T_END $INPUT_FILE)


        total_time=$(echo "$total_time + $time_val" | bc -l)
    done

    avg_time=$(echo "scale=6; $total_time / $RUNS" | bc -l)

    printf "%-10d | %-15s\n" "$N" "$avg_time"
done

echo "=========================================================="

Overwriting benchmark.sh


In [42]:
!chmod +x benchmark.sh
!./benchmark.sh

 ЗАПУСК ТЕСТОВ ПРОИЗВОДИТЕЛЬНОСТИ (CUDA)
 Усреднение по 10 запускам для каждого размера.
N (Тел) | Avg Time (sec) 
------------------------------
16         | .055158        
64         | .128925        
128        | .181683        
256        | .453285        
512        | .991621        
1024       | 2.119525       
2048       | 4.414196       
4096       | 8.952852       
8192       | 17.945219      
