In [None]:
%%bash
apt-get update
apt-get install -y nvidia-cuda-toolkit
nvcc --version
apt-get install -y cmake gcc g++ mpich git
if ! command -v g++ &> /dev/null
then
    apt-get install -y g++
fi

#kokkos install
if [ -d "kokkos" ]; then
    echo "dir already there"
    rm -rf kokkos
fi

git clone https://github.com/kokkos/kokkos.git
cd kokkos
if [ -d "build" ]; then
    echo "build already there, deleting"
    rm -rf build
fi
mkdir build && cd build


cmake .. \
    -DKokkos_ENABLE_CUDA=ON \
    -DKokkos_ENABLE_CUDA_LAMBDA=ON \
    -DKokkos_ARCH_VOLTA70=ON \
    -DCMAKE_CXX_COMPILER=$(which g++) \
    -DCMAKE_INSTALL_PREFIX=/usr/local/kokkos


make -j$(nproc)
make install

#kokkos-kernels install
cd ../..

if [ -d "kokkos-kernels" ]; then
    echo "deleting previous copy..."
    rm -rf kokkos-kernels
fi

git clone https://github.com/kokkos/kokkos-kernels.git
cd kokkos-kernels
if [ -d "build" ]; then
    echo "build already there, again, deleting"
    rm -rf build
fi
mkdir build && cd build

cmake .. \
    -DKokkos_ROOT=/usr/local/kokkos \
    -DKokkosKernels_ENABLE_ALL_COMPONENTS=ON \
    -DCMAKE_CXX_COMPILER=$(which g++) \
    -DCMAKE_INSTALL_PREFIX=/usr/local/kokkos-kernels

#Finally installing kokkoskernels.
make -j$(nproc)
make install


In [None]:
!df -h

In [None]:
!cat /proc/cpuinfo

In [None]:
!cat /proc/meminfo

In [None]:
export CUDA_PATH=/usr/local/cuda
export KOKKOS_PATH=/usr/local/Kokkos
export LD_LIBRARY_PATH=$KOKKOS_PATH/lib:$LD_LIBRARY_PATH

In [None]:
from scipy.sparse import random as sparse_random
from scipy.sparse import coo_matrix
import networkx as nx

filename = "Queen4147.mtx"
sparse_matrix = load_sparse_matrix(filename)


def partition(graph, num_partitions):
    partitions = {i: [] for i in range(num_partitions)}
    edge_list = list(graph.edges())
    random.shuffle(edge_list)
    for i, edge in enumerate(edge_list):
        partition_index = i % num_partitions
        partitions[partition_index].append(edge)

    return partitions

G_queen4147 = load_queen4147()
print(nx.info(G_queen4147))

partitions_queen4147 = simulate_partition(G_queen4147, num_partitions)

In [None]:
// Queen4147_D1Coloring.cu
#include <KokkosKernels_default_types.hpp>
#include <KokkosKernels_Handle.hpp>
#include <KokkosKernels_GraphColor.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <cuda_runtime.h>
#include <chrono>

namespace KokkosSparse {
namespace Experimental {

template<class KernelHandle, class lno_row_view_t_, class lno_nnz_view_t_>
void graph_color_d1(KernelHandle *handle,
                    typename KernelHandle::const_nnz_lno_t num_verts,
                    typename KernelHandle::const_nnz_lno_t num_edges,
                    lno_row_view_t_ row_map,
                    lno_nnz_view_t_ entries) {
    using execution_space = typename KernelHandle::execution_space;
    using memory_space = typename KernelHandle::memory_space;
    using size_type = typename KernelHandle::size_type;
    using color_type = typename KernelHandle::color_type;
    using device_type = Kokkos::Device<execution_space, memory_space>;
    
    handle->create_graph_coloring_handle(COLORING_D1);
    auto coloring_handle = handle->get_graph_coloring_handle();
    coloring_handle->set_vertex_count(num_verts);
    coloring_handle->set_edge_count(num_edges);
    coloring_handle->set_vb_edge_ratio(4);
    
    Kokkos::View<color_type*, device_type> colors("Colors", num_verts);
    
    Kokkos::parallel_for("D1Coloring",
        Kokkos::RangePolicy<execution_space>(0, num_verts),
        KOKKOS_LAMBDA(const size_type vertex) {
            const size_type my_row_start = row_map(vertex);
            const size_type my_row_end = row_map(vertex + 1);
            Kokkos::View<char*, device_type> forbidden_colors("Forbidden", 256);
            for (size_type edge = my_row_start; edge < my_row_end; ++edge) {
                const size_type neighbor = entries(edge);
                if (neighbor != vertex) {
                    const color_type neighbor_color = colors(neighbor);
                    if (neighbor_color != 0) forbidden_colors(neighbor_color) = 1;
                }
            }
            color_type my_color = 1;
            while (forbidden_colors(my_color) != 0) ++my_color;
            colors(vertex) = my_color;
        });

    size_type num_conflicts = 0;
    Kokkos::parallel_reduce("VerifyColoring",
        Kokkos::RangePolicy<execution_space>(0, num_verts),
        KOKKOS_LAMBDA(const size_type vertex, size_type& update) {
            const size_type my_row_start = row_map(vertex);
            const size_type my_row_end = row_map(vertex + 1);
            const color_type my_color = colors(vertex);
            for (size_type edge = my_row_start; edge < my_row_end; ++edge) {
                const size_type neighbor = entries(edge);
                if (neighbor != vertex && my_color == colors(neighbor)) update++;
            }
        }, num_conflicts);

    coloring_handle->set_color_view(colors);
    coloring_handle->set_num_colors(256);
    coloring_handle->set_num_conflicts(num_conflicts);
}
}
}

int main(int argc, char* argv[]) {
    Kokkos::initialize(argc, argv);
    {
        using ExecutionSpace = Kokkos::Cuda;
        using MemorySpace = Kokkos::CudaSpace;
        using device_type = Kokkos::Device<ExecutionSpace, MemorySpace>;
        
        using size_type = int;
        using lno_t = int;
        using scalar_t = double;
        
        KokkosKernels::Experimental::KokkosKernelsHandle
            <size_type, lno_t, scalar_t, ExecutionSpace, MemorySpace, MemorySpace> handle;
        
        size_type num_rows = 4147;
        size_type num_nnz = 193887;
        
        Kokkos::View<size_type*, device_type> row_map("row_map", num_rows + 1);
        Kokkos::View<lno_t*, device_type> entries("entries", num_nnz);

        cudaEvent_t start, stop;
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        
        cudaEventRecord(start);
        KokkosSparse::Experimental::graph_color_d1(&handle, num_rows, num_nnz, row_map, entries);
        cudaEventRecord(stop);
        
        cudaEventSynchronize(stop);
        float milliseconds = 0;
        cudaEventElapsedTime(&milliseconds, start, stop);
        
        auto coloring_handle = handle.get_graph_coloring_handle();
        printf("Queen4147 D1 Coloring Results:\n");
        printf("Colors used: %d\n", coloring_handle->get_num_colors());
        printf("Conflicts: %d\n", coloring_handle->get_num_conflicts());
        printf("Execution time: %f ms\n", milliseconds);
        
        cudaEventDestroy(start);
        cudaEventDestroy(stop);
    }
    Kokkos::finalize();
    return 0;
}

In [None]:
nvcc -O3 -arch=sm_75 Queen4147_D1Coloring.cu -I${KOKKOS_PATH}/include -L${KOKKOS_PATH}/lib -lkokkos -lkokkoskernels -o queen4147_coloring

./queen4147_coloring