In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-zgf4383x
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-zgf4383x
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 0a71d56e5dce3ff1f0dd2c47c29367629262f527
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4295 sha256=ec10e475d63d52eee341337989527887b927d4d2d2785f748582acc2c1e0479e
  Stored in directory: /tmp/pip-ephem-wheel-cache-_g63n38b/wheels/a8/b9/18/23f8ef71ceb0f63297dd1903aedd067e6243a68ea756d6feea
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2
created output directory at /content

In [44]:
%%cuda --name SDL.cu

#include "/content/drive/MyDrive/graph/coloring.h"
#include "/content/drive/MyDrive/graph/graph_d.h"

#define THREADxBLOCK 128

using namespace std;

__global__ void checkUnweightedNodes(uint n, bool* unweightedNodes, bool * flag) {
    uint idx = threadIdx.x + blockDim.x * blockIdx.x;
    if (idx >= n)
        return;

    if(unweightedNodes[idx] == false)
        *flag = true;
}

__global__ void setDegrees(GraphStruct *str, uint * degrees, bool * weightedNodes, uint k, uint weight){
    uint idx = threadIdx.x + blockDim.x * blockIdx.x;
    uint n = str->nodeSize;

    if(idx >= n)
        return;

    uint offset = str->cumDegs[idx];
	uint originalDeg = str->cumDegs[idx + 1] - str->cumDegs[idx];
    uint inducedDeg = 0;

    for (uint i = 0; i < originalDeg; i++){
	    uint neighID = str->neighs[offset + i];
        if(!weightedNodes[neighID])
            inducedDeg += 1;
    }


    // warp divergence???
    if (inducedDeg <= k && !weightedNodes[idx]){
        degrees[idx] = weight;
        weightedNodes[idx] = true;
    }

}

void initDegrees(GraphStruct *str, uint * degrees, bool * weightedNodes){
    uint degree = 1;
    uint weigth = 1;

    dim3 threads ( THREADxBLOCK);
    dim3 blocks ((str->nodeSize + threads.x - 1) / threads.x, 1, 1 );

    for(int i = 0; i < str->nodeSize; i++){
        setDegrees <<< blocks,threads >>> (str, degrees, weightedNodes, degree, weigth);
        cudaDeviceSynchronize();

        degree += 1;
        weigth += 1;
     //potrei ferarmi direttamente qujando arrivo a n?
    }
}

__global__ void findCandidates (Coloring* col, GraphStruct *str, uint* degrees, uint* weigths, bool* candidateNodes) {
	uint idx = threadIdx.x + blockDim.x * blockIdx.x;

	if (idx >= str->nodeSize)
		return;

	if (col->coloring[idx] != 0)
		return;

	uint offset = str->cumDegs[idx];
	uint deg = str->cumDegs[idx + 1] - str->cumDegs[idx];

	bool candidate = true;
    for (uint j = 0; j < deg; j++) {
	    uint neighID = str->neighs[offset + j];

		if (col->coloring[neighID] == 0 &&
				((degrees[idx] < degrees[neighID]) ||
				((degrees[idx] == degrees[neighID]) && (weigths[idx] < weigths[neighID])))) {
			candidate = false;
		}
	}

    if(candidate){
        candidateNodes[idx] = true;
    }
}

__global__ void colorer (Coloring* col, GraphStruct *str, bool* candidateNodes) {
	uint idx = threadIdx.x + blockDim.x * blockIdx.x;

	if (idx >= str->nodeSize)
		return;

	if (col->coloring[idx] != 0)
		return;

	uint offset = str->cumDegs[idx];
	uint deg = str->cumDegs[idx + 1] - str->cumDegs[idx];

    if(candidateNodes[idx]){
        bool* forbidden;
        uint n = str->nodeSize;
        cudaMalloc((void**) &forbidden, n * sizeof(bool));
        memset(forbidden, false, n);

        for (uint j = 0; j < deg; j++) {
                uint neighID = str->neighs[offset + j];
                forbidden[col->coloring[neighID]] = true;
        }

        for(uint i = 1; i <= n; i++){
            if(forbidden[i] == false){
                col->coloring[idx] = i;
                free(forbidden);
                return;
            }
        }
        free(forbidden);
    }

    else
        col->uncoloredNodes = true;

}

void h_swap(uint* array, int idx_a, int idx_b){
    int tmp = array[idx_a];
    array[idx_a] = array[idx_b];
    array[idx_b] = tmp;

    return;
}

void FYshuffle(uint * weights, uint n){
    for(int i = 0; i < n; i++){
        int swapIdx = (rand() % (n - i)) + i;
        h_swap(weights, i, swapIdx);
    }
}

Coloring* graphColoring(GraphStruct *str) {
    Coloring* col;
	CHECK(cudaMallocManaged(&col, sizeof(Coloring)));
	uint n = str->nodeSize;
	col->uncoloredNodes = true;

    CHECK(cudaMallocManaged( &(col->coloring), n * sizeof(uint)));
	memset(col->coloring,0,n);

	uint* weigths;
    uint* degrees;
    curandState_t* states;
    bool * weightedNodes;
    cudaMallocManaged((void**) &weightedNodes, n * sizeof(bool));
    cudaMalloc((void**) &states, n * sizeof(curandState_t));
    cudaMallocManaged((void**) &weigths, n * sizeof(uint));
    cudaMallocManaged((void**) &degrees, n * sizeof(uint));
    dim3 threads ( THREADxBLOCK);
    dim3 blocks ((str->nodeSize + threads.x - 1) / threads.x, 1, 1 );

    memset(weightedNodes, false, n);
    for (int i = 0; i < n; i++){
        weigths[i] = i;
    }
    FYshuffle(weigths, n);

    initDegrees(str, degrees, weightedNodes);
		printf("Pesi: ");
		for (int i = 0; i < n; i++){
        printf("%d ", weigths[i]);
    }
		printf("\n");

		printf("Gradi: ");
    for (int i = 0; i < n; i++){
        printf("%d ", degrees[i]);
    }
		printf("\n");


	bool* candidateNodes;
	cudaMallocManaged((void**) &candidateNodes, n * sizeof(bool));
	cudaMemset(candidateNodes, false, n);

    col->numOfColors = 0;
	while (col->uncoloredNodes) {
        col->uncoloredNodes = false;
        col->numOfColors++;
        findCandidates <<< blocks, threads >>> (col, str, degrees, weigths, candidateNodes);
				cudaDeviceSynchronize();
        colorer <<< blocks, threads >>> (col, str, candidateNodes);
        cudaDeviceSynchronize();
	}

	cudaFree(states);
	return col;
}

'File written in /content/src/SDL.cu'

In [45]:
%%cuda --name test_SDL.cu

#include "/content/drive/MyDrive/graph/coloring.h"
#include "/content/drive/MyDrive/graph/graph_d.h"


int main(void) {
	unsigned int n = 7;		 // number of nodes for random graphs
	float prob = 0.5;				    // density (percentage) for random graphs
	std::default_random_engine eng{0};  // fixed seed

	srand(time(0));
  cudaEvent_t start, stop;
  cudaEventCreate(&start);
  cudaEventCreate(&stop);

	// new graph with n nodes
	Graph graph(n,1);

	// generate a random graph
	graph.randGraph(prob,eng);

	// get the graph struct
	GraphStruct *str = graph.getStruct();
	print_d<<<1,1>>>(str, true);

  cudaEventRecord(start);

	Coloring* col = graphColoring(str);
	cudaDeviceSynchronize();

	cudaEventRecord(stop);
  cudaEventSynchronize(stop);



	//Stampo in millisecondi quanto tempo ci ha messo a colorare il grafo.
  float milliseconds = 0;
  cudaEventElapsedTime(&milliseconds, start, stop);
  printf("%f ms\n", milliseconds);

	//printColoring(col, str, 1);
 printf("Coloratura trovata: ");
	for(int i = 0; i < str->nodeSize; i++){
			printf("%d ", col->coloring[i]);
	}

	return EXIT_SUCCESS;
}

'File written in /content/src/test_SDL.cu'

In [46]:
!nvcc -dc src/test_SDL.cu /content/src/SDL.cu /content/drive/MyDrive/graph/graph.cpp /content/drive/MyDrive/graph/graph_d.cu
!nvcc test_SDL.o SDL.o graph.o graph_d.o -o test_SDL
!./test_SDL

** Graph (num node: 7, num edges: 9)
  node(0)[3]-> 1 2 3 
  node(1)[4]-> 0 2 4 5 
  node(2)[2]-> 0 1 
  node(3)[3]-> 0 5 6 
  node(4)[2]-> 1 6 
  node(5)[2]-> 1 3 
  node(6)[2]-> 3 4 

Pesi: 0 3 5 2 1 6 4 
Gradi: 3 3 2 3 2 2 2 
6.350848 ms
Coloratura trovata: 2 1 3 1 3 2 2 