<a href="https://colab.research.google.com/github/0x1beef/uap/blob/main/src/opencv_cuda.ipynb">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
<a href="https://kaggle.com/kernels/welcome?src=https://github.com/0x1beef/uap/blob/main/src/opencv_cuda.ipynb">
    <img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"   />
</a>


In [None]:
url = 'https://raw.githubusercontent.com/0x1beef/uap/main/src'
import urllib.request
for py_file in ['utils.py','opencv_cuda_installer.py']:
    urllib.request.urlretrieve(f'{url}/{py_file}', py_file)
import utils

In [None]:
utils.show_env_info()
!nvidia-smi
!nvidia-smi --query-gpu=compute_cap --format=csv

In [None]:
import opencv_cuda_installer as cv_cuda
if cv_cuda.test_opencv_cuda('current'):
    raise Exception('the current OpenCV already has CUDA support')

## **Clone OpenCV**

In [None]:
%%bash
# do shallow clones of just the commit that we want, it's a bit faster
shallow_clone () {
  mkdir -p $1 && cd $1 && git init
  git remote add origin $2
  git fetch --depth 1 origin $3
  git -c advice.detachedHead=false checkout FETCH_HEAD
  cd ..
}

shallow_clone opencv https://github.com/opencv/opencv.git 4.10.0
shallow_clone opencv_contrib https://github.com/opencv/opencv_contrib.git 4.10.0

## **Install build requirements**

In [None]:
if utils.get_platform() == "kaggle":
    # fix cmake errors: /opt/conda/lib/libcurl.so.4: no version information available
    !apt-get -y remove cmake
    # from https://apt.kitware.com/
    !test -f /usr/share/doc/kitware-archive-keyring/copyright || wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
    !echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null
    !apt-get update
    !apt-get install -y kitware-archive-keyring
    !apt-get install -y cmake
    !cmake --version

if utils.get_platform() == 'sagemaker':
    !pip install huggingface-hub
    # can't use apt-get on sagemaker
    !conda install -y cmake lld cuda-toolkit=12.2
else:
    # the original linker is slow and crashes while trying to allocate too much memory, using lld instead
    !apt-get -y install lld
!lld --version

In [None]:
# remove the previous opencv version
!pip uninstall opencv_python_headless opencv_python opencv_contrib_python -y

## **Configure OpenCV**

In [None]:
def get_cuda_arch_bin_ptx():
    platform = utils.get_platform()
    if platform == 'kaggle':
        return ('60,75','') # T4x2 = 75, P100 = 60 # PTX not needed
    if platform == 'colab':
        return ('75','75') # T4 = 75 # PTX for other GPUs
    if platform == 'sagemaker':
        return ('75','75') # T4 = 75
    return ('','50')

(cuda_arch_bin, cuda_arch_ptx) = get_cuda_arch_bin_ptx()

In [None]:
%%bash -s {utils.get_platform()} {cuda_arch_bin} {cuda_arch_ptx}
PLATFORM=$1

# compute capabilities:
CUDA_ARCH_BIN=$2
CUDA_ARCH_PTX=$3

GCC_PATH=`type g++ | awk '{print $3}'`
NVCC_PATH=`type nvcc | awk '{print $3}'`

if [ "$PLATFORM" == "kaggle" ]; then
    # on kaggle there's another nvcc at /opt/conda/bin which doesn't work
    NVCC_PATH=`echo /usr/local/cuda-*/bin/nvcc | awk '{print $1}'`
fi

CUDA_DIR=`dirname $NVCC_PATH`/..
CUDA_DIR=`realpath $CUDA_DIR`

# build reference: https://docs.opencv.org/4.x/db/d05/tutorial_config_reference.html
# available modules: https://docs.opencv.org/4.9.0/modules.html
# the opencv modules we're likely to need:
BUILD_LIST=imgcodecs,imgproc,text,tracking,features2d,optflow,python3,xfeatures2d
BUILD_LIST=$BUILD_LIST,cudafeatures2d,cudaimgproc,cudaoptflow,cudawarping,cudev
BUILD_LIST=$BUILD_LIST,highgui # not used but building 'world' fails without this

CMAKE_ARGS=(
-B . -S ..
-D CMAKE_BUILD_TYPE=RELEASE
-D CMAKE_INSTALL_PREFIX=`pwd`/opencv/install
-D OPENCV_EXTRA_MODULES_PATH=`pwd`/opencv_contrib/modules
-D BUILD_LIST=$BUILD_LIST
# building everything into one library (world) makes the build faster and is otherwise more convenient
-D BUILD_opencv_world=ON
# the dnn module needs protobuf, but we don't need dnn yet so disable both
-D BUILD_opencv_dnn=OFF
-D WITH_PROTOBUF=OFF
-D WITH_DNN=OFF
-D OPENCV_DNN_CUDA=OFF
-D WITH_CUDNN=OFF
# enable building e.g SURF, which is patented / not free
-D OPENCV_ENABLE_NONFREE=ON
# dont use the old CUDA cmake functions, as per https://cmake.org/cmake/help/latest/policy/CMP0146.html
-D ENABLE_CUDA_FIRST_CLASS_LANGUAGE=ON
# fix for: CUDA: Not detected! If you are not using the default host compiler (g++)
# then you need to specify both CMAKE_CUDA_HOST_COMPILER and CMAKE_CUDA_COMPILER.
-D CMAKE_CUDA_HOST_COMPILER=$GCC_PATH
-D CMAKE_CUDA_COMPILER=$NVCC_PATH
-D WITH_CUDA=ON
-D CUDA_ARCH_BIN=$CUDA_ARCH_BIN
-D CUDA_ARCH_PTX=$CUDA_ARCH_PTX
-D WITH_CUBLAS=ON
-D WITH_TBB=ON # todo: is this used ?
-D BUILD_EXAMPLES=OFF
-D BUILD_TESTS=OFF
-D BUILD_PERF_TESTS=OFF
-D BUILD_opencv_apps=OFF
# we dont need GTK since the gui doesnt work in a notebook
-D WITH_GTK=OFF
-D WITH_OPENEXR=OFF
-D WITH_WEBP=OFF
-D WITH_PYTHON=ON
-D BUILD_opencv_python3=ON
# python varibles from https://stackoverflow.com/a/54176727
-D PYTHON3_EXECUTABLE=$(which python3)
-D PYTHON_INCLUDE_DIR=$(python3 -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())")
-D PYTHON_INCLUDE_DIR2=$(python3 -c "from os.path import dirname; from distutils.sysconfig import get_config_h_filename; print(dirname(get_config_h_filename()))")
-D PYTHON_LIBRARY=$(python3 -c "from distutils.sysconfig import get_config_var;from os.path import dirname,join ; print(join(dirname(get_config_var('LIBPC')),get_config_var('LDLIBRARY')))")
-D PYTHON3_NUMPY_INCLUDE_DIRS=$(python3 -c "import numpy; print(numpy.get_include())")
-D PYTHON3_PACKAGES_PATH=$(python3 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")
)

cd opencv
rm -rf build install
mkdir -p build install
cd build
# for some reason on kaggle this fails to find cuda_runtime.h and some libraries so add the paths manually
export INCLUDES="-I$CUDA_DIR/include"
export LIBRARIES="-L$CUDA_DIR/lib64"
export LDFLAGS="-fuse-ld=lld" # use the lld linker
time cmake "${CMAKE_ARGS[@]}"

## **Build, install and test OpenCV**

In [None]:
%%bash
cd opencv/build
time cmake --build . --parallel 4 # run only some jobs in parallel to avoid running out of memory

In [None]:
%%bash
cd opencv/build
time cmake --install .

In [None]:
import cv2
print(cv2.__version__)
print(cv2.getBuildInformation())
# on Colab nvcc is available to build even when no GPU is present
if utils.get_platform() != 'colab':
    import numpy as np
    a = np.full((480,480), 60, np.uint8)
    ga = cv2.cuda.GpuMat(a)

## **Create a build archive and upload it to Hugging Face**

In [None]:
%%bash
OPENCV_LIB="opencv/install/lib/libopencv_world.so"
DEPS=`ldd $OPENCV_LIB | grep "=>" | sed "s/.*=> //" | sed "s/ (.*//"`
md5sum -b $DEPS > opencv_cuda.md5
cp opencv_cuda.md5 opencv/install/lib

PACKAGES_PATH=$(python3 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")

tar -czf opencv_cuda.tar.gz -C $PACKAGES_PATH cv2 -C `pwd`/opencv install
du -hs *.tar.gz

In [None]:
utils.upload_to_huggingface(['opencv_cuda.tar.gz', 'opencv_cuda.md5'], 
    f'logicbear/cache/opencv/{utils.get_platform()}')