<a href="https://colab.research.google.com/github/0x1beef/uap/blob/main/src/opencv_cuda.ipynb">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
<a href="https://kaggle.com/kernels/welcome?src=https://github.com/0x1beef/uap/blob/main/src/opencv_cuda.ipynb">
    <img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"   />
</a>

In [None]:
url = 'https://raw.githubusercontent.com/0x1beef/uap/main/src'
import os, urllib.request
for py_file in ['utils.py','opencv_cuda_installer.py']:
    if not os.path.exists(py_file):
        urllib.request.urlretrieve(f'{url}/{py_file}', py_file)
import utils

In [None]:
utils.show_env_info()
!nvidia-smi
!nvidia-smi --query-gpu=compute_cap --format=csv

In [None]:
import opencv_cuda_installer as cv_cuda
if cv_cuda.test_opencv_cuda('current'):
    raise Exception('the current OpenCV already has CUDA support')

## **Clone OpenCV**

In [None]:
%%bash
# do shallow clones of just the commit that we want, it's a bit faster
shallow_clone () {
  mkdir -p $1 && cd $1 && git init
  git remote add origin $2
  git fetch --depth 1 origin $3
  git -c advice.detachedHead=false checkout FETCH_HEAD
  cd ..
}

shallow_clone opencv https://github.com/opencv/opencv.git 4.10.0
shallow_clone opencv_contrib https://github.com/opencv/opencv_contrib.git 4.10.0

## **Install build requirements**

In [None]:
if utils.get_platform() != "unknown":
    if utils.get_platform() == "kaggle":
        # fix cmake errors: /opt/conda/lib/libcurl.so.4: no version information available
        !apt-get -y remove cmake
        # from https://apt.kitware.com/
        !test -f /usr/share/doc/kitware-archive-keyring/copyright || wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
        !echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null
        !apt-get update
        !apt-get install -y kitware-archive-keyring
        !apt-get install -y cmake
        !cmake --version

    if utils.get_platform() == 'sagemaker':
        !pip install huggingface-hub
        # can't use apt-get on sagemaker
        !conda install -y cmake lld cuda-toolkit=12.2
    else:
        # the original linker is slow and crashes while trying to allocate too much memory, using lld instead
        !apt-get -y install lld
    !lld --version

In [None]:
%%bash
if ! cmake --version; then
    pip install cmake
fi
# note: some cmake config errors can result from older cmake versions

pip install numpy

In [None]:
# remove the previous opencv version
!pip uninstall opencv_python_headless opencv_python opencv_contrib_python -y

## **Configure OpenCV**

In [None]:
# the arch_bin and arch_ptx should be set according to 
# what GPUs the the binary is expected to be used with.
# see https://docs.opencv.org/4.10.0/d2/dbc/cuda_intro.html
def get_cuda_arch_bin_ptx():
    # specific platforms will have a particular range of GPUs
    platform = utils.get_platform()
    if platform == 'kaggle':
        return ('60,75','') # T4x2 = 75, P100 = 60 # PTX not needed
    if platform == 'colab':
        return ('75','75') # T4 = 75 # PTX for other GPUs
    if platform == 'sagemaker':
        return ('75','75') # T4 = 75
    # otherwise, build a binary just for the current GPU
    cap_csv = !nvidia-smi --query-gpu=compute_cap --format=csv
    cap = cap_csv[1].replace('.','')
    return (cap, '')

def is_windows():
    import sys
    return sys.platform.startswith('win')

# the OpenCV scripts don't handle backslashes well
def fix_path(path):
    return path.replace('\\', '/')

# find the path to the python library the OpenCV bindings will link to
def get_python_library():
    from os.path import dirname, join, exists
    if not is_windows():
        from distutils.sysconfig import get_config_var
        return join(dirname(get_config_var('LIBPC')), get_config_var('LDLIBRARY'))
    else:
        # on windows LIBPC/LDLIBRARY are not set.
        # assume paths that are typical for a conda environment
        import sys
        lib_file = f'python{sys.version_info[0]}{sys.version_info[1]}.lib'
        env_root = dirname(sys.executable)
        lib_file_path = f'{env_root}\\libs\\{lib_file}'
        if not exists(lib_file_path):
            raise Exception(f'python library not found at {lib_file_path}')
        return lib_file_path
    
def get_python_args():
    python = 'python'
    python_path = !which {python}
    args = f'-D PYTHON3_EXECUTABLE="{python_path[0]}"'
    from distutils.sysconfig import get_python_inc, get_config_h_filename, get_python_lib
    from os.path import dirname
    args += f' -D PYTHON3_INCLUDE_DIR="{fix_path(get_python_inc())}"'
    args += f' -D PYTHON3_INCLUDE_DIR2="{fix_path(dirname(get_config_h_filename()))}"'
    args += f' -D PYTHON3_LIBRARY="{fix_path(get_python_library())}"'
    import numpy
    args += f' -D PYTHON3_NUMPY_INCLUDE_DIRS="{fix_path(numpy.get_include())}"'
    args += f' -D PYTHON3_PACKAGES_PATH="{fix_path(get_python_lib())}"'
    return args

def get_cmake_args():
    (arch_bin, arch_ptx) = get_cuda_arch_bin_ptx()
    args = f"-D CUDA_ARCH_BIN={arch_bin} -D CUDA_ARCH_PTX={arch_ptx} "
    args += get_python_args()
    return args

print(get_cmake_args())

In [None]:
%%bash -s {utils.get_platform()} {get_cmake_args()}
PLATFORM=$1
ARGS=( ${@} )
CMAKE_ARGS="${ARGS[*]:1}"

# build reference: https://docs.opencv.org/4.x/db/d05/tutorial_config_reference.html
# available modules: https://docs.opencv.org/4.10.0/modules.html
# the opencv modules we're likely to need:
BUILD_LIST=imgcodecs,imgproc,text,tracking,features2d,optflow,python3,xfeatures2d
BUILD_LIST=$BUILD_LIST,cudafeatures2d,cudaimgproc,cudaoptflow,cudawarping,cudev
BUILD_LIST=$BUILD_LIST,highgui # not used but building 'world' fails without this

CMAKE_ARGS=(
    -B . -S ..
    -D CMAKE_BUILD_TYPE=Release
    -D CMAKE_INSTALL_PREFIX=`pwd`/opencv/install
    -D OPENCV_EXTRA_MODULES_PATH=`pwd`/opencv_contrib/modules
    -D BUILD_LIST=$BUILD_LIST
    # building everything into one library (world) makes the build faster and is otherwise more convenient
    -D BUILD_opencv_world=ON
    # the dnn module needs protobuf, but we don't need dnn yet so disable both
    -D BUILD_opencv_dnn=OFF
    -D WITH_PROTOBUF=OFF
    -D WITH_DNN=OFF
    -D OPENCV_DNN_CUDA=OFF
    -D WITH_CUDNN=OFF
    # enable building e.g SURF, which is patented / not free
    -D OPENCV_ENABLE_NONFREE=ON
    # dont use the old CUDA cmake functions, as per https://cmake.org/cmake/help/latest/policy/CMP0146.html
    -D ENABLE_CUDA_FIRST_CLASS_LANGUAGE=ON
    -D WITH_CUDA=ON
    -D WITH_CUBLAS=ON
    -D WITH_TBB=ON # todo: is this used ?
    -D BUILD_EXAMPLES=OFF
    -D BUILD_TESTS=OFF
    -D BUILD_PERF_TESTS=OFF
    -D BUILD_opencv_apps=OFF
    # we dont need GTK since the gui doesnt work in a notebook
    -D WITH_GTK=OFF
    -D WITH_OPENEXR=OFF
    -D WITH_WEBP=OFF
    -D WITH_PYTHON=ON
    -D BUILD_opencv_python3=ON
    $CMAKE_ARGS
)

if [ "$PLATFORM" == "kaggle" ]; then
    # on kaggle there's another nvcc at /opt/conda/bin which doesn't work
    # fixes: CUDA::nppial - target not found
    NVCC_PATH=`echo /usr/local/cuda-*/bin/nvcc | awk '{print $1}'`
    CMAKE_ARGS=(
        "${CMAKE_ARGS[@]}"
        -D CMAKE_CUDA_COMPILER=$NVCC_PATH
    )
fi

cd opencv
rm -rf build install
mkdir -p build install
cd build

# use the lld linker if available
if lld &> /dev/null; then
    export LDFLAGS="-fuse-ld=lld" 
fi
time cmake "${CMAKE_ARGS[@]}"

## **Build, install and test OpenCV**

In [None]:
%%bash
cd opencv/build
time cmake --build . --config Release --parallel 4 # run only some jobs in parallel to avoid running out of memory

In [None]:
%%bash
cd opencv/build
time cmake --install . --config Release

On Windows we need to tell the python loader where to find the CUDA binaries. Otherwise we get the following error when importing the library:
```bash
ImportError: DLL load failed: The specified module could not be found.
```

In [None]:
def patch_dll_path():
    import os
    nvcc_path = !where nvcc
    cuda_bin = os.path.dirname(nvcc_path[0]).replace('\\','/')
    from distutils.sysconfig import get_python_lib
    config_file = get_python_lib() + '/cv2/config.py'
    with open(config_file, 'a') as file:
        file.write(f"\nBINARIES_PATHS += ['{cuda_bin}']\n")

if is_windows():
    patch_dll_path()

Windows conda environments will include a version of the Visual C++ Redistributable. This can be incompatible with the Visual C++ version used to build the library, resulting in the following error:
```bash
ImportError: DLL load failed while importing cv2: A dynamic link library (DLL) initialization routine failed.
```
[This guide](https://www.jamesbowley.co.uk/qmd/opencv_cuda_python_windows.html#troubleshooting-python-bindings-installation-issues) recommends upgrading the redistributable in the conda environment, but if that still doesn't resolve the issue then the following workaround will move conda's redistributable DLLs to backup locations so that the redistributable that is installed system wide, to e.g C:\Windows\System32, will be found instead.

In [None]:
import os
def move_msvcrt_to_backup(dir, version):
    os.makedirs(f'{dir}/backup', exist_ok = True)
    !mv {dir}/*{version}*.dll {dir}/backup

def msvcrt_workaround():
    version = '140'
    python_path = !where python
    env_dir = fix_path(os.path.dirname(python_path[0]))
    print(env_dir)
    move_msvcrt_to_backup(env_dir, version)
    move_msvcrt_to_backup(f'{env_dir}/Library/bin', version)

#if is_windows(): msvcrt_workaround()

In [None]:
import cv2
print(cv2.__version__)
print(cv2.getBuildInformation())
# on Colab nvcc is available to build even when no GPU is present
if utils.get_platform() != 'colab':
    import numpy as np
    a = np.full((480,480), 60, np.uint8)
    ga = cv2.cuda.GpuMat(a)

## **Create a build archive and upload it to Hugging Face**

In [None]:
%%bash -s {utils.get_platform()}
if [ "$1" == "unknown" ]; then exit; fi
OPENCV_LIB="opencv/install/lib/libopencv_world.so"
DEPS=`ldd $OPENCV_LIB | grep "=>" | sed "s/.*=> //" | sed "s/ (.*//"`
md5sum -b $DEPS > opencv_cuda.md5
cp opencv_cuda.md5 opencv/install/lib

PACKAGES_PATH=$(python3 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")

tar -czf opencv_cuda.tar.gz -C $PACKAGES_PATH cv2 -C `pwd`/opencv install
du -hs *.tar.gz

In [None]:
if utils.get_platform() != 'unknown':
    utils.upload_to_huggingface(['opencv_cuda.tar.gz', 'opencv_cuda.md5'], 
        f'logicbear/cache/opencv/{utils.get_platform()}')