In [8]:
import os
import rasterio as rio
from osgeo import gdal
import matplotlib.pyplot as plt
import numpy as np

# Directory met DEM-bestanden
demroot = 'C:/Users/Max/Documents/Werk/test_gpu/input'

# Verkrijg het eerste bestand in de directory
for file in os.listdir(demroot):
    demfile = os.path.join(demroot, file)
    with rio.open(demfile) as dataset:
        lu_bounds = dataset.bounds
        lu_img = dataset.read(1)
        print('The lu_bounds is:', lu_bounds)
    break

# Pad naar het DEM-bestand (zorg ervoor dat je het juiste pad gebruikt)
#demfile = 'C:/Users/Max/Documents/Werk/test_gpu/input/DEM_37HN1_09.tif'

# Lees DSM- en DEM-bestanden
with rio.open(demfile) as dsmlayer:
    dsmimg = dsmlayer.read(1)

gdal_dsm = gdal.Open(demfile)
dsm = gdal_dsm.ReadAsArray().astype(np.float64)

# Optioneel: Verkrijg geo-transformatie informatie
geotransform = gdal_dsm.GetGeoTransform()
scale = 1 / geotransform[1] if geotransform[1] != 0 else None

# Plot en sla DEM-afbeelding op
plt.figure(figsize=(10, 10))
plt.imshow(dsm, cmap='viridis')  # Gebruik een colormap voor betere visualisatie
plt.colorbar()  # Voeg een kleurstaaf toe voor referentie~
plt.savefig('dem.png', dpi=300)
plt.close()  # Sluit de plot af

The lu_bounds is: BoundingBox(left=92800.0, bottom=434800.0, right=94200.0, top=436450.0)


In [9]:
from pycuda.compiler import SourceModule
import pycuda
from pycuda import gpuarray
from pycuda import compiler
import pycuda.driver as cuda
import pycuda.autoinit                 # PyCuda autoinit
import pycuda.driver as cuda           # PyCuda In, Out helpers
import matplotlib.pyplot as plot       # Library to plot
import matplotlib.cm as colormap       # Library to plot
import numpy                           # Fast math library
import time
import numpy as np                     # numeric python lib
import matplotlib.image as mpimg       # reading images to numpy arrays
import matplotlib.pyplot as plt        # to plot any graph
import matplotlib.patches as mpatches  # to draw a circle at the mean contour
import scipy.ndimage as ndi            # to determine shape centrality
# matplotlib setup
%matplotlib inline
from pylab import rcParams
rcParams['figure.figsize'] = (8, 8)    # setting default size of plots


print("%d device(s) found." % cuda.Device.count())           
for ordinal in range(cuda.Device.count()):
    dev = cuda.Device(ordinal)
    print ("Device #%d: %s" % (ordinal, dev.name()))
print (cuda)

# PyCUDA imports
from pycuda.compiler import SourceModule
import pycuda
from pycuda import gpuarray
from pycuda import compiler
import pycuda.driver as cuda
import pycuda.autoinit  # Zorg ervoor dat CUDA goed wordt geïnitialiseerd
import numpy as np
import time

# GPU context instellen
cuda.Device(0).make_context()

1 device(s) found.
Device #0: NVIDIA GeForce RTX 3050 Laptop GPU
<module 'pycuda.driver' from 'c:\\Users\\Max\\.conda\\envs\\svfscalc\\Lib\\site-packages\\pycuda\\driver.py'>


<pycuda._driver.Context at 0x1b4457a2dc0>

In [10]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

# Selecteer expliciet de NVIDIA GPU (meestal is dit device 0)
cuda.Device(0).make_context()

print("%d device(s) found." % cuda.Device.count())           
for ordinal in range(cuda.Device.count()):
    dev = cuda.Device(ordinal)
    print ("Device #%d: %s" % (ordinal, dev.name()))
print (cuda)

# Haal GPU-geheugeninformatie op
free_memory, total_memory = cuda.mem_get_info()
print(f"Free memory: {free_memory / (1024 ** 2):.2f} MB")
print(f"Total memory: {total_memory / (1024 ** 2):.2f} MB")

1 device(s) found.
Device #0: NVIDIA GeForce RTX 3050 Laptop GPU
<module 'pycuda.driver' from 'c:\\Users\\Max\\.conda\\envs\\svfscalc\\Lib\\site-packages\\pycuda\\driver.py'>
Free memory: 3104.25 MB
Total memory: 4095.56 MB


In [11]:
 #Kernel text
kernel = """

#include <math.h>

#define _X  ( threadIdx.x + blockIdx.x * blockDim.x )
#define _Y  ( threadIdx.y + blockIdx.y * blockDim.y )
#define _WIDTH  ( blockDim.x * gridDim.x )
#define _HEIGHT ( blockDim.y * gridDim.y  )
#define _XM(x)  ( (x + _WIDTH) % _WIDTH )
#define _YM(y)  ( (y + _HEIGHT) % _HEIGHT )
#define _INDEX(x,y)  ( _XM(x)  + _YM(y) * _WIDTH )
#define PI 3.1415926


//https://github.com/AlainPaillou/PyCuda_Denoise_Filters/blob/master/PyCuda_KNN_Denoise_Mono.py
__global__ void svfcalculator(float * lattice_out, float * lattice, float scale) //int w, int h
{
    #define NLM_BLOCK_RADIUS    3
    
    int rangeDist = 200;
    
    int imageW = 2000;
    int imageH = 2000;
    
    const long int   ix = blockDim.x * blockIdx.x + threadIdx.x;
    const long int   iy = blockDim.y * blockIdx.y + threadIdx.y;
    const float  x = (float)ix  + 1.0f;
    const float  y = (float)iy  + 1.0f;
    const float limxmin = -1;      //NLM_BLOCK_RADIUS + 2;
    const float limxmax = imageW; // - NLM_BLOCK_RADIUS - 2;
    const float limymin = -1;      //NLM_BLOCK_RADIUS + 2;
    const float limymax = imageH; // - NLM_BLOCK_RADIUS - 2;
    
    long int index4;    
    
    
    if(ix>limxmin && ix<limxmax && iy>limymin && iy<limymax){
        // sky view factor
        float SVF_res = 0;
        
        //Result accumulator
        float clr00 = 0.0;
        float clrIJ = 0.0;
        
        //Center of the KNN window
        index4 = x + (y * imageW);
        
        // the current pixel
        clr00 = lattice[index4];
        
        for(int thetaN =0; thetaN<360; thetaN++) 
        {
            float theta = PI*float(thetaN)/180;
            float betaMax = 0;
            
            for( float radius = 5; radius < rangeDist; radius = radius + 5)
            {   
                // this is important or you will have memory error
                if (x + int(radius*cos(theta)) > limxmax | x + int(radius*cos(theta)) < limxmin | y - int(radius*sin(theta)) > limymax | y - int(radius*sin(theta)) < 0) 
                {
                    break;
                }
            
                long int index2 = x + int(radius*cos(theta)) + (y - int(radius*sin(theta))) * imageW;
                clrIJ = lattice[index2];
                
                // building height information
                float buildH = clrIJ - clr00;
                
                float beta = atan(scale*buildH/radius); //because the pixel resolution is 2ft, height is in ft
                if (betaMax < beta)
                {
                    betaMax = beta;
                }
            }
            SVF_res += pow(cos(betaMax), 2);
        }
        
        lattice_out[index4] = SVF_res/360.0;
    }
    
}
"""


#Compile and get kernel function
mod = SourceModule(kernel)
print (mod)


<pycuda.compiler.SourceModule object at 0x000001B409C301D0>


In [12]:
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit  # This initializes the CUDA driver
from pycuda import gpuarray
from pycuda.compiler import SourceModule
import time

# Define the kernel as a string
kernel_code = """
#include <math.h>

#define PI 3.1415926

__global__ void svfcalculator(float * lattice_out, float * lattice, float scale, int width, int height)
{
    int ix = threadIdx.x + blockIdx.x * blockDim.x;
    int iy = threadIdx.y + blockIdx.y * blockDim.y;

    if (ix >= width || iy >= height) return;

    int index = ix + iy * width;

    float SVF_res = 0;
    float clr00 = lattice[index];

    for (int thetaN = 0; thetaN < 360; thetaN++) 
    {
        float theta = PI * float(thetaN) / 180;
        float betaMax = 0;

        for (float radius = 5; radius < 200; radius += 5)
        {   
            int x = ix + int(radius * cos(theta));
            int y = iy - int(radius * sin(theta));

            if (x < 0 || x >= width || y < 0 || y >= height) break;

            int index2 = x + y * width;
            float clrIJ = lattice[index2];
            
            float buildH = clrIJ - clr00;
            float beta = atan(scale * buildH / radius); 
            betaMax = fmax(betaMax, beta);
        }
        SVF_res += pow(cos(betaMax), 2);
    }
    
    lattice_out[index] = SVF_res / 360.0;
}
"""

# Compile the kernel
mod = SourceModule(kernel_code)
svfcalculator = mod.get_function("svfcalculator")

def svfCalculator_RayTracingOnGPU(dsm, scale):
    px = np.array(dsm).astype(np.float32)
    height, width = px.shape
    
    # Allocate memory on the device
    d_px = cuda.mem_alloc(px.nbytes)
    cuda.memcpy_htod(d_px, px)
    
    # Allocate output memory on the device
    d_out = gpuarray.empty_like(gpuarray.to_gpu(px))

    # Set blocks and grid sizes
    nb_ThreadsX = 8
    nb_ThreadsY = 8
    nb_blocksX = (width + nb_ThreadsX - 1) // nb_ThreadsX
    nb_blocksY = (height + nb_ThreadsY - 1) // nb_ThreadsY

    # Launch the kernel
    svfcalculator(
        d_out, d_px, np.float32(scale), 
        np.int32(width), np.int32(height),
        block=(nb_ThreadsX, nb_ThreadsY, 1),
        grid=(nb_blocksX, nb_blocksY)
    )

    # Retrieve the result from the device
    bwPx = d_out.get()
    
    return bwPx


In [13]:
def saverasternd(gdal_data, filename, raster):
    rows = gdal_data.RasterYSize
    cols = gdal_data.RasterXSize
    
    outDs = gdal.GetDriverByName("GTiff").Create(filename, cols, rows, int(1), GDT_Float64)
    outBand = outDs.GetRasterBand(1)
    
    # write the data
    outBand.WriteArray(raster, 0, 0)
    # flush data to disk, set the NoData value and calculate stats
    outBand.FlushCache()
    outBand.SetNoDataValue(-9999)
    
    # georeference the image and set the projection
    outDs.SetGeoTransform(gdal_data.GetGeoTransform())
    outDs.SetProjection(gdal_data.GetProjection())
    

In [14]:
import os, os.path
from osgeo import gdal
from osgeo.gdalconst import *
import time
import numpy as np

root = r'C:/Users/Max/Documents/Werk/test_gpu/input'
outroot = r'C:/Users/Max/Documents/Werk/test_gpu/output'
if not os.path.exists(outroot):
    os.mkdir(outroot)


for file in os.listdir(root):
    filename = os.path.join(root, file)
    t0 = time.time()
    
    ### using rasterio to read the raster dsm
    #dsm_dataset = rio.open(filename)
    #dsm_img = dsm_dataset.read(1)
    
    ### using gdal to read the raster dsm
    gdal_dsm = gdal.Open(filename)
    dsm_img = gdal_dsm.ReadAsArray().astype(np.float64)
    geotransform = gdal_dsm.GetGeoTransform()
    scale = 1 / geotransform[1]
    
    ## calculate the svf
    svfres = svfCalculator_RayTracingOnGPU(dsm_img, scale)
    print('The time and file are:', time.time() - t0, filename)
    
    outsvfname = os.path.join(outroot, 'svf_' + file)
    saverasternd(gdal_dsm, outsvfname, np.asarray(svfres))

The time and file are: 8.440712690353394 C:/Users/Max/Documents/Werk/test_gpu/input\CDSM_groundlvl_37HN1_09.tif


In [15]:
cuda.Context.pop()