In [3]:
from pycuda.compiler import SourceModule
import pycuda
from pycuda import gpuarray
from pycuda import compiler
import pycuda.driver as cuda
import pycuda.autoinit             # PyCuda autoinit
import pycuda.driver as cuda       # PyCuda In, Out helpers
import matplotlib.pyplot as plt   # Library to plot
import numpy as np                # numeric python lib
import scipy.ndimage as ndi       # to determine shape centrality
from osgeo import gdal
import time

# Print available CUDA devices
print("%d device(s) found." % cuda.Device.count())           
for ordinal in range(cuda.Device.count()):
    dev = cuda.Device(ordinal)
    print("Device #%d: %s" % (ordinal, dev.name()))
print(cuda)

# Kernel text
kernel = """
#include <math.h>

#define _X  ( threadIdx.x + blockIdx.x * blockDim.x )
#define _Y  ( threadIdx.y + blockIdx.y * blockDim.y )
#define _WIDTH  ( blockDim.x * gridDim.x )
#define _HEIGHT ( blockDim.y * gridDim.y  )
#define _XM(x)  ( (x + _WIDTH) % _WIDTH )
#define _YM(y)  ( (y + _HEIGHT) % _HEIGHT )
#define _INDEX(x,y)  ( _XM(x)  + _YM(y) * _WIDTH )
#define PI 3.1415926

__global__ void shadowCast(float * lattice_out, float * lattice, float azimuth, float altitude) //int w, int h
{
    #define NLM_BLOCK_RADIUS    3
    
    int imageW = 1320;
    int imageH = 1320;
    
    const long int   ix = blockDim.x * blockIdx.x + threadIdx.x;
    const long int   iy = blockDim.y * blockIdx.y + threadIdx.y;
    const float  x = (float)ix  + 1.0f;
    const float  y = (float)iy  + 1.0f;
    const float limxmin = NLM_BLOCK_RADIUS + 2;
    const float limxmax = imageW - NLM_BLOCK_RADIUS - 2;
    const float limymin = NLM_BLOCK_RADIUS + 2;
    const float limymax = imageH - NLM_BLOCK_RADIUS - 2;
    
    long int index4;    
    
    if(ix>limxmin && ix<limxmax && iy>limymin && iy<limymax)
    {
        
        //Result accumulator
        float clr00 = 0.0;
        float clrIJ = 0.0;
        
        //Center of the KNN window
        index4 = x + (y * imageW);
        
        // the current pixel
        clr00 = lattice[index4];
        
        int rangeDist = 200;
        float betaMax = 0;
        float altitude_degree = PI*altitude/180.0;
        float theta;
        
        // convert the sun azimuth (clockwise zero at North) to theta (anticlockwise, zero at east)
        if (azimuth < 90 & azimuth > 0) {
            theta = PI*(90 - azimuth)/180.0;
        } else { //azimuth > 180 & azimuth<360
            theta = PI*(450 - azimuth)/180.0;
        }
        
        int flag = 0;
        for( float radius = 5; radius < rangeDist; radius = radius + 5)
        {   
            if (x + int(radius*cos(theta)) > limxmax | x + int(radius*cos(theta)) < limxmin | y - int(radius*sin(theta)) > limymax | y - int(radius*sin(theta)) < 0) 
            {
                break;
            }
            
            long int index2 = x + int(radius*cos(theta)) + (y - int(radius*sin(theta))) * imageW;
            clrIJ = lattice[index2];

            // building height information
            float buildH = clrIJ - clr00;
            
            float beta = atan(0.5*buildH/radius); //because the pixel resolution is 2ft, height is in ft
            
            if (betaMax > altitude_degree) 
            {
                //lattice_out[index4] = 1; //building is too high, shadow
                flag = 1;
                break;
            }
            
            if (betaMax < beta)
            {
                betaMax = beta;
            }
        }
        
        if (flag == 1) {
            lattice_out[index4] = 0; //building is too high, shadow
        } else {
            if (betaMax > altitude_degree) 
            {
                lattice_out[index4] = 0; //building is too high, shadow
            } else {
                lattice_out[index4] = 1; // not shadow
            }
        }
    }
}

__device__ void annulus_weight(float altitude, float aziinterval, float *weight) {
    float n = 90.0;
    float steprad = (360./aziinterval) * PI/180.0;
    float annulus = 91.0 - altitude;
    float w = 1.0/(2.0*PI) * sin(PI / (2.0*n)) * sin((PI * (2.0 * annulus - 1.0)) / (2.0 * n));
    *weight = steprad * w;
}

__global__ void svf_shadowcasting_2018a_gpu(float * svf_Latt, float * dsm_Latt, 
                                            float *iazimuth_Latt, float scale, 
                                            int imageW, int imageH)
{
    #define NLM_BLOCK_RADIUS    3
    
    const long int   ix = blockDim.x * blockIdx.x + threadIdx.x;
    const long int   iy = blockDim.y * blockIdx.y + threadIdx.y;
    const float  x = (float)ix  + 1.0f;
    const float  y = (float)iy  + 1.0f;
    
    const float limxmin = -1;
    const float limxmax = imageW;
    const float limymin = -1;
    const float limymax = imageH;
    
    long int index4;
    
    if(ix>limxmin && ix<limxmax && iy>limymin && iy<limymax)
    {
        index4 = x + (y * imageW);
        
        float iangle[] = {6, 18, 30, 42, 54, 66, 78, 90};
        float aziinterval[] = {30, 30, 24, 24, 18, 12, 6, 1};
        float annulino[] = {0, 12, 24, 36, 48, 60, 72, 84, 90};
        
        float svf = 0;
        int idx = 0;
        for (int i = 0; i<8; i++) {
            for (int j=0; j < aziinterval[i]; j++) {
                float altitude = iangle[i];
                float azimuth;
                
                if (idx - 1 < 0) {
                    azimuth = iazimuth_Latt[144];
                } else {
                    azimuth = iazimuth_Latt[idx - 1];
                }
                
                float altitude_degree = PI*altitude/180.0;
                float theta;
                
                if (azimuth < 90 & azimuth > 0) {
                    theta = PI*(90 - azimuth)/180.0;
                } else { //azimuth > 180 & azimuth<360
                    theta = PI*(450 - azimuth)/180.0;
                }
                
                int rangeDist = 400;
                
                float sh = 0;
                float f = dsm_Latt[index4];
                
                float temp = 0;
                
                for( float radius = 0; radius < rangeDist; radius = radius + 1)
                {   
                    if (x + int(radius*cos(theta)) > limxmax | x + int(radius*cos(theta)) < limxmin | y - int(radius*sin(theta)) > limymax | y - int(radius*sin(theta)) < 0) 
                    {
                        break;
                    }
                    
                    temp = 0;
                    long int index2 = x + int(radius*cos(theta)) + (y - int(radius*sin(theta))) * imageW;
                    
                    temp = dsm_Latt[index2] - radius*tan(altitude_degree)/scale;
                    
                    if (f < temp) {
                        f = temp;
                    }
                    
                }
                
                if (f == dsm_Latt[index4]) {
                    sh = 1;
                } else {
                    sh = 0;
                }
                
                float weight;
                for (int k = annulino[i] + 1; k < annulino[i + 1] + 1; k++){
                    annulus_weight(k, aziinterval[i], &weight);
                    weight *= sh;
                    svf += weight;
                }
                
                idx++;
            }
        }
        
        svf_Latt[index4] = svf;
    }
}
"""

# Compile and get kernel function
mod = SourceModule(kernel)
print(mod)

def svf_shadowcasting_2018a_gpu(mod, dsmimg, scale):    
    '''This is the shadow casting algorithm to calculate the sky view factor
    Input:
    mod : SourceModule
    dsmimg : the DSM data to be calculated
    scale : scale of DSM pixels
    
    Output:
    The Sky View Factor (SVF) image
    '''
    
    # Compile kernel function
    svf_Lattice_gpu = mod.get_function("svf_shadowcasting_2018a_gpu")

    # Retrieve dimensions
    height, width = dsmimg.shape
    npixels = width * height
    
    # Allocate memory on the device
    dsm_px = cuda.mem_alloc(dsmimg.nbytes)
    iazimuth_px = cuda.mem_alloc(npixels * np.float32().nbytes)
    svf_px = cuda.mem_alloc(npixels * np.float32().nbytes)
    
    # Copy data to the device
    cuda.memcpy_htod(dsm_px, dsmimg)
    
    # Define kernel block and grid sizes
    nb_ThreadsX = 32
    nb_ThreadsY = 32
    nb_blocksX = int(width/nb_ThreadsX)
    nb_blocksY = int(height/nb_ThreadsY)
    
    # Execute the kernel function
    svf_Lattice_gpu(svf_px, dsm_px, iazimuth_px, np.float32(scale), np.int32(width), np.int32(height), 
                    block=(nb_ThreadsX, nb_ThreadsY, 1), grid=(nb_blocksX, nb_blocksY))
    
    # Retrieve results
    svf_res = np.empty_like(dsmimg)
    cuda.memcpy_dtoh(svf_res, svf_px)
    
    return svf_res

# Input file
dsmfile = 'input/DSM_buildings_ground_37HN1_09.tif'
gdal_dsm = gdal.Open(dsmfile)
dsm = gdal_dsm.ReadAsArray().astype(np.float)
geotransform = gdal_dsm.GetGeoTransform()
scale = 1 / geotransform[1]

# SVF Calculation
gdal.DontUseExceptions()
t1 = time.time()
svfres = svf_shadowcasting_2018a_gpu(mod, dsm, scale)
print('The time consumption is:', time.time() - t1)
plt.imshow(svfres)


1 device(s) found.
Device #0: NVIDIA GeForce RTX 3050 Laptop GPU
<module 'pycuda.driver' from 'C:\\Users\\Max\\.conda\\envs\\svfscalc\\Lib\\site-packages\\pycuda\\driver.py'>
<pycuda.compiler.SourceModule object at 0x0000019190EA87A0>


AttributeError: 'NoneType' object has no attribute 'ReadAsArray'