In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-l2_bahk2
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-l2_bahk2
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 0a71d56e5dce3ff1f0dd2c47c29367629262f527
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4295 sha256=73fa773c11d5a049ea231f7655a8fcf5416c9e2ab2c8cf9f97a602518f30511f
  Stored in directory: /tmp/pip-ephem-wheel-cache-ibu8is0m/wheels/a8/b9/18/23f8ef71ceb0f63297dd1903aedd067e6243a68ea756d6feea
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2
created output directory at /content

In [92]:
%%cuda --name MIS.cu

#include "/content/drive/MyDrive/graph/coloring.h"
#include "/content/drive/MyDrive/graph/graph_d.h"

#define THREADxBLOCK 128

using namespace std;

__device__ bool result = true;

__global__ void findIS (Coloring* col, GraphStruct *str, uint* weights, bool *currentIS, bool *X) {
	uint idx = threadIdx.x + blockDim.x * blockIdx.x;

	if(X[idx])
			return;

	if (idx >= str->nodeSize)
		return;

	if (col->coloring[idx]){
			X[idx] = true;
			return;
	}


	uint offset = str->cumDegs[idx];
	uint deg = str->cumDegs[idx + 1] - str->cumDegs[idx];

	bool candidate = true;
	for (uint j = 0; j < deg; j++) {
		uint neighID = str->neighs[offset + j];
		if (!X[neighID] && !col->coloring[neighID] &&
				((weights[idx] < weights[neighID]) ||
				((weights[idx] == weights[neighID]) && idx < neighID))) {
			candidate = false;
		}
	}
	if (candidate) {
		currentIS[idx] = true;
    X[idx] = true;
    for (uint j = 0; j < deg; j++) {
        uint neighID = str->neighs[offset + j];
        X[neighID] = true;
    }
	}
}

__global__ void checkX(GraphStruct *str, bool* X) {
		uint idx = threadIdx.x + blockDim.x * blockIdx.x;
		if (idx >= str->nodeSize)
				return;

    if(X[idx] == false)
      result = true;
}

__global__ void colorer(Coloring* col, GraphStruct *str, bool *currentIS){
  uint idx = threadIdx.x + blockDim.x * blockIdx.x;

	if (idx >= str->nodeSize)
		return;

	if (col->coloring[idx])
		return;

  if(currentIS[idx])
      col->coloring[idx] = col->numOfColors;
	else
	 		col->uncoloredNodes = true;
}

__global__ void maximalIS(Coloring * col, GraphStruct *str, uint * weigths, bool * currentIS, bool * X){
		dim3 threads ( THREADxBLOCK);
		dim3 blocks ((str->nodeSize + threads.x - 1) / threads.x, 1, 1 );
		result = true;

		while(result){
				result = false;
        findIS <<< blocks, threads >>> (col, str, weigths, currentIS, X);

		    //cudaDeviceSynchronize();
				checkX <<< blocks, threads >>> (str, X);

    }
}

Coloring* graphColoring(GraphStruct *str){
  Coloring* col;
	CHECK(cudaMallocManaged(&col, sizeof(Coloring)));
	uint n = str->nodeSize;
	col->uncoloredNodes = true;

	// cudaMalloc for arrays of struct Coloring
	CHECK(cudaMallocManaged( &(col->coloring), n * sizeof(uint)));
	memset(col->coloring,0,n);

	// allocate space on the GPU for the random states
	curandState_t* states;
	uint* weigths;
	cudaMalloc((void**) &states, n * sizeof(curandState_t));
	cudaMalloc((void**) &weigths, n * sizeof(uint));
	dim3 threads ( THREADxBLOCK);
	dim3 blocks ((str->nodeSize + threads.x - 1) / threads.x, 1, 1 );
	uint seed = 0;
	init <<< blocks, threads >>> (seed, states, weigths, n);

	bool* currentMIS;
	cudaMalloc((void**) &currentMIS, n * sizeof(bool));
	cudaMemset(currentMIS, false, n);

	bool* X;
	cudaMalloc((void**) &X, n * sizeof(bool));
	cudaMemset(X, false, n);

	col->numOfColors = 0;

	print_d <<< 1, 1 >>> (str, true);

	while (col->uncoloredNodes) {
		col->uncoloredNodes = false;
		col->numOfColors++;
		cudaMemset(currentMIS, false, n);
		cudaMemset(X, false, n);

		maximalIS <<< 1, 1 >>> (col, str, weigths, currentMIS, X);

    colorer <<< blocks, threads >>> (col, str, currentMIS);
		cudaDeviceSynchronize();
	}


  cudaFree(states);
	cudaFree(weigths);
  return col;
}

__global__ void init (uint seed, curandState_t* states, uint* numbers, uint n) {
	uint idx = blockIdx.x * blockDim.x + threadIdx.x;
	if (idx > n)
			return;
	curand_init(seed, idx, 0, &states[idx]);
	numbers[idx] = curand(&states[idx])%n*n;
}


'File written in /content/src/MIS.cu'

In [89]:
%%cuda --name test_MIS.cu

#include "/content/drive/MyDrive/graph/coloring.h"

using namespace std;

int main(void) {
	unsigned int n = 10;		 // number of nodes for random graphs
	float prob = 0.5;				    // density (percentage) for random graphs
	std::default_random_engine eng{0};  // fixed seed

  srand(time(0));
  cudaEvent_t start, stop;
  cudaEventCreate(&start);
  cudaEventCreate(&stop);

	// new graph with n nodes
	Graph graph(n,1);

	// generate a random graph
	graph.randGraph(prob,eng);

	// get the graph struct
	GraphStruct *str = graph.getStruct();

  cudaEventRecord(start);

	Coloring* col = graphColoring(str);
	cudaDeviceSynchronize();

  cudaEventRecord(stop);
  cudaEventSynchronize(stop);

  //Stampo in millisecondi quanto tempo ci ha messo a colorare il grafo.
  float milliseconds = 0;
  cudaEventElapsedTime(&milliseconds, start, stop);
  printf("%f ms\n", milliseconds);

	for(int i = 0; i < n; i++){
			printf("%d ", col->coloring[i]);
	}

	//printColoring(col, str, 1);



	return EXIT_SUCCESS;
}

'File written in /content/src/test_MIS.cu'

In [94]:
!nvcc -dc -D__CDPRT_SUPPRESS_SYNC_DEPRECATION_WARNING src/test_MIS.cu /content/src/MIS.cu /content/drive/MyDrive/graph/graph.cpp /content/drive/MyDrive/graph/graph_d.cu
!nvcc test_MIS.o MIS.o graph.o graph_d.o -o test_MIS
!./test_MIS

** Graph (num node: 10, num edges: 23)
  node(0)[5]-> 1 2 3 7 9 
  node(1)[3]-> 0 2 9 
  node(2)[5]-> 0 1 3 5 8 
  node(3)[7]-> 0 2 4 5 6 7 9 
  node(4)[5]-> 3 6 7 8 9 
  node(5)[5]-> 2 3 6 8 9 
  node(6)[4]-> 3 4 5 8 
  node(7)[3]-> 0 3 4 
  node(8)[4]-> 2 4 5 6 
  node(9)[5]-> 0 1 3 4 5 

6.398272 ms
2 1 4 1 3 3 2 4 1 4 