In [1]:
## Benchmark between C, C++, and Numpy
## Task: computing sum of squares for Int and Double types

In [2]:
# C code ("mysum.c")

# long long mysum_int(int n, long long* array) {
#     long long res = 0;
#     for (int i=0; i<n; i++)
#         res += array[i]*array[i];
#     return res;
# }

# double mysum_double(int n, double* array) {
#     double res = 0;
#     for (int i=0; i<n; i++)
#         res += array[i]*array[i];
#     return res;
# }

In [3]:
# C++ code ("mysum.cpp")
# just add [extern "C"] at the top of each function

# extern "C"

# long long mysum_int(int n, long long* array) {
#     long long res = 0;
#     for (int i=0; i<n; i++)
#         res += array[i]*array[i];
#     return res;
# }

# extern "C"

# double mysum_double(int n, double* array) {
#     double res = 0;
#     for (int i=0; i<n; i++)
#         res += array[i]*array[i];
#     return res;
# }


In [4]:
# Save the following code snippet as setup.py at the working directory and run 'python setup.py build'
# to create the shared C/C++ modules

# from setuptools import setup, Extension

# setup(
#     ext_modules=[Extension('mysum_c', ['mysum.c']),
#                  Extension('mysum_cpp', ['mysum.cpp']),
#                  ],
#     install_requires=['numpy']
# )

In [5]:
# ctypes is the library that we use for C/C++ functions

import time
import ctypes
import numpy as np
from numpy.random import normal

In [6]:
# import C/C++ modules and set arg/res types

libfile_c = 'mysum_c.cpython-37m-darwin.so'
mylib_c = ctypes.CDLL(libfile_c)

mylib_c.mysum_int.restype = ctypes.c_longlong
mylib_c.mysum_int.argtypes = [ctypes.c_int,
                              np.ctypeslib.ndpointer(
                                dtype=np.int
                              )]

mylib_c.mysum_double.restype = ctypes.c_double
mylib_c.mysum_double.argtypes = [ctypes.c_int,
                                 np.ctypeslib.ndpointer(
                                    dtype=np.float64
                                 )]


libfile_cpp = 'mysum_cpp.cpython-37m-darwin.so'
mylib_cpp = ctypes.CDLL(libfile_cpp)

mylib_cpp.mysum_int.restype = ctypes.c_longlong
mylib_cpp.mysum_int.argtypes = [ctypes.c_int,
                                np.ctypeslib.ndpointer(
                                 dtype=np.int
                                )]

mylib_cpp.mysum_double.restype = ctypes.c_double
mylib_cpp.mysum_double.argtypes = [ctypes.c_int,
                                   np.ctypeslib.ndpointer(
                                      dtype=np.float64
                                   )]


In [7]:
# extract functions
cfunc_int = mylib_c.mysum_int
cfunc_double = mylib_c.mysum_double
cppfunc_int = mylib_cpp.mysum_int
cppfunc_double = mylib_cpp.mysum_double

# Simulation parameters
n_sim = 1000
n_data = 10000000

In [None]:
# generate some data
array_int = np.arange(n_data).astype(np.int)

# for right justification
rjust_n = 50

# Int functions
# C solution
start = time.time()
for i in range(n_sim):
    cfunc_int(n_data, array_int)
t_c = time.time()-start
res_str = f"{cfunc_int(n_data, array_int):.0f} from C."
print("Results: \n")
print(res_str)

# C++ solution
start = time.time()
for i in range(n_sim):
    cppfunc_int(n_data, array_int)
t_cpp = time.time()-start
res_str = f"{cppfunc_int(n_data, array_int):.0f} from C++."
print(res_str)

# Numpy solution
start = time.time()
for i in range(n_sim):
    
    (array_int**2).sum()
    # List comp is super slow
    # res = 0
    # for item in array_int:
    #     res += item
    
t_p1 = time.time()-start
res_str = f"{(array_int**2).sum():.0f} from Numpy using native methods."
print(res_str)

# List comprehension was super slow, so I didn't try it

# Print results
print("\nComparing C/C++ to Numpy in case of Int type arithmetics.\n")
print(f'{t_c/n_sim:.4f} second(s) per iteration for C.')
print(f'{t_cpp/n_sim:.4f} second(s) per iteration for C++.')
print(f'{t_p1/n_sim:.4f} second(s) per iteration for Numpy native methods.\n')

# How fast are C/C++ compared to Numpy?
print(f"In case of Long Long Int, C   is about {round(t_p1/t_c, 2)} times faster than Numpy native methods.")
print(f"In case of Long Long Int, C++ is about {round(t_p1/t_cpp, 2)} times faster than Numpy native methods.")

Results: 

1291890006563070976 from C.
1291890006563070976 from C++.


In [None]:
# Generate some big data
array_double = normal(0, 1, n_data).astype(np.float64)

# Double functions
# C solution
start = time.time()
for i in range(n_sim):
    cfunc_double(n_data, array_double)
t_c = time.time()-start
res_str = f"{cfunc_double(n_data, array_double):.10f} from C."
print("Results: \n")
print(res_str)

# C++ solution
start = time.time()
for i in range(n_sim):
    cppfunc_double(n_data, array_double)
t_cpp = time.time()-start
res_str = f"{cppfunc_double(n_data, array_double):.10f} from C++."
print(res_str)

# Numpy solution
start = time.time()
for i in range(n_sim):
    (array_double**2).sum()
t_p1 = time.time()-start
res_str = f"{(array_double**2).sum():.10f} from Numpy methods."
print(res_str)

# List comprehension was super slow, so I didn't try it

# Print results
print("\nComparing C/C++ to Numpy in case of Double type arithmetics.\n")
print(f'{t_c/n_sim:.4f} second(s) per iteration in case of C.')
print(f'{t_cpp/n_sim:.4f} second(s) per iteration in case of C++.')
print(f'{t_p1/n_sim:.4f} second(s) per iteration in case of Numpy native methods.\n')

# How fast are C/C++ compared to Numpy?
print(f"In case of Double, C   is about {round(t_p1/t_c, 2)} times faster than Numpy native methods.")
print(f"In case of Double, C++ is about {round(t_p1/t_cpp, 2)} times faster than Numpy native methods.")