# 1) Loading Cython

In [1]:
%load_ext Cython
import cython

# 2) Cython Annotations

In [2]:
%%cython --annotate

a: cython.int = 0
for i in range(10):
    a += i
print(a)

45


In [3]:
%%cython --annotate

cdef int a = 0
for i in range(10):
    a += i
print(a)

45


# 3) Pure Python vs Pythonic Cython vs Proper Cython 

In [4]:
%timeit sum(range(10**8))

2.35 s ± 471 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%cython
def cy_sum():
    cdef int i, total = 0
    for i in range(10**8):
        total += i
    return total

In [6]:
%timeit cy_sum()

23 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [57]:
# Pure Python function
def f1(x):
    return x ** 2 - x
def integrate_f1(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f1(a + i * dx)
    return s * dx

In [58]:
# Pythonic Cython function
def f2(x: cython.double):
    return x ** 2 - x
def integrate_f2(a: cython.double, b: cython.double, N: cython.int):
    i: cython.int
    s: cython.double
    dx: cython.double
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f2(a + i * dx)
    return s * dx

In [59]:
%%cython
# Proper Cython function
def f3(double x):
    return x ** 2 - x
def integrate_f3(double a, double b, int N):
    cdef int i
    cdef double s
    cdef double dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f3(a + i * dx)
    return s * dx

Content of stdout:
_cython_magic_92e1b275ab308f4d841c95d5bc81ae3b065eca6f769f25510cb098c287ede905.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_92e1b275ab308f4d841c95d5bc81ae3b065eca6f769f25510cb098c287ede905.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_92e1b275ab308f4d841c95d5bc81ae3b065eca6f769f25510cb098c287ede905.cp312-win_amd64.exp
Generating code
Finished generating code

In [10]:
%%timeit 
integrate_f1(0, 1, 10000000)

2.53 s ± 407 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit 
integrate_f2(0, 1, 10000000)

2.46 s ± 109 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
%%timeit 
integrate_f3(0, 1, 10000000)

872 ms ± 130 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# 4) Build from Setup.py Import

In [13]:
from hellopython import say_hello_to
say_hello_to("Cython")

Hello Cython!


# 5) C++ and C compilation in Cython

In [14]:
@cython.cfunc
@cython.exceptval(-2, check=True)
def fexcept2_1(x: cython.double) -> cython.double:
    return x ** 2 - x

In [15]:
%%cython
cpdef double fexcept2_2(double x) except? -2:
    return x ** 2 - x

In [16]:
%%timeit 
fexcept2_1(100.0)

136 ns ± 5.97 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [17]:
%%timeit
fexcept2_2(100.0)

77.8 ns ± 10.5 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


## a) Pythonic Cython + C

In [None]:
%%cython 
import cython
def primes1(nb_primes: cython.int):
    i: cython.int
    p: cython.int[1000]

    if nb_primes > 1000:
        nb_primes = 1000

    if not cython.compiled:  # Only if regular Python is running
        p = [0] * 1000       # Make p work almost like a C array

    len_p: cython.int = 0  # The current number of elements in p.
    n: cython.int = 2
    while len_p < nb_primes:
        # Is n prime?
        for i in p[:len_p]:
            if n % i == 0:
                break

        # If no break occurred in the loop, we have a prime.
        else:
            p[len_p] = n
            len_p += 1
        n += 1

    # Let's copy the result into a Python list:
    result_as_list = [prime for prime in p[:len_p]]
    return result_as_list

Content of stdout:
_cython_magic_a1d75c630841507c73ad46b16381d3463930ccc9693eb8cce475b5234e3ec604.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_a1d75c630841507c73ad46b16381d3463930ccc9693eb8cce475b5234e3ec604.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_a1d75c630841507c73ad46b16381d3463930ccc9693eb8cce475b5234e3ec604.cp312-win_amd64.exp
Generating code
Finished generating code

## b) Proper Cython + C

In [24]:
%%cython
import cython
def primes2(int nb_primes):
    cdef int n, i, len_p
    cdef int[1000] p

    if nb_primes > 1000:
        nb_primes = 1000

    len_p = 0  # The current number of elements in p.
    n = 2
    while len_p < nb_primes:
        # Is n prime?
        for i in p[:len_p]:
            if n % i == 0:
                break

        # If no break occurred in the loop, we have a prime.
        else:
            p[len_p] = n
            len_p += 1
        n += 1

    # Let's copy the result into a Python list:
    result_as_list = [prime for prime in p[:len_p]]
    return result_as_list

Content of stdout:
_cython_magic_0df776963101b44ad5e6787ab6ef20a189c5e9f8f840d1c298210ae1e1c817f7.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_0df776963101b44ad5e6787ab6ef20a189c5e9f8f840d1c298210ae1e1c817f7.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_0df776963101b44ad5e6787ab6ef20a189c5e9f8f840d1c298210ae1e1c817f7.cp312-win_amd64.exp
Generating code
Finished generating code

## c) Pythonic Cython + C++

In [25]:
%%cython -+
# distutils: language=c++
import cython
from cython.cimports.libcpp.vector import vector

def primes3(nb_primes: cython.uint):
    i: cython.int
    p: vector[cython.int]
    p.reserve(nb_primes)  # allocate memory for 'nb_primes' elements.

    n: cython.int = 2
    while p.size() < nb_primes:  # size() for vectors is similar to len()
        for i in p:
            if n % i == 0:
                break
        else:
            p.push_back(n)  # push_back is similar to append()
        n += 1

    # If possible, C values and C++ objects are automatically
    # converted to Python objects at need.
    return p

Content of stdout:
_cython_magic_5ac400f30ea5b21921c99514d80d49af52ef4fb609137f1a6f56ce4f05e2ea37.cpp
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_5ac400f30ea5b21921c99514d80d49af52ef4fb609137f1a6f56ce4f05e2ea37.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_5ac400f30ea5b21921c99514d80d49af52ef4fb609137f1a6f56ce4f05e2ea37.cp312-win_amd64.exp
Generating code
Finished generating code

## d) Proper Cython + C++

In [31]:
%%cython -+
# distutils: language=c++
from libcpp.vector cimport vector

def primes4(unsigned int nb_primes):
    cdef int n, i
    cdef vector[int] p
    p.reserve(nb_primes)  # allocate memory for 'nb_primes' elements.

    n = 2
    while p.size() < nb_primes:  # size() for vectors is similar to len()
        for i in p:
            if n % i == 0:
                break
        else:
            p.push_back(n)  # push_back is similar to append()
        n += 1

    # If possible, C values and C++ objects are automatically
    # converted to Python objects at need.
    return p  # so here, the vector will be copied into a Python list.

Content of stdout:
_cython_magic_29547540f131e4beccdd820a8be63e2e5f2e5dd6c622d15fc8750787a6377417.cpp
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_29547540f131e4beccdd820a8be63e2e5f2e5dd6c622d15fc8750787a6377417.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_29547540f131e4beccdd820a8be63e2e5f2e5dd6c622d15fc8750787a6377417.cp312-win_amd64.exp
Generating code
Finished generating code

## e) TimeIT for a,b,c,d

In [27]:
%%timeit 
primes1(900)

944 μs ± 38.2 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [28]:
%%timeit
primes2(900)

910 μs ± 7.6 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [29]:
%%timeit
primes3(900)

951 μs ± 76.4 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [32]:
%%timeit
primes4(900)

942 μs ± 63.9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# 6) Calling C-Lib.Functions defined in Cython

In [55]:
%%cython
import cython
from cython.cimports.libc.stdlib import atoi
@cython.cfunc
def parse_charptr_to_py_int(s: cython.p_char):
    assert s is not cython.NULL, "Input is null"
    return atoi(s)

In [54]:
%%cython
from libc.stdlib cimport atoi
cdef parse_charptr_to_py_int(char* s):
    assert s is not NULL, "byte string value is NULL"
    return atoi(s)  # note: atoi() has no error detection!

In [52]:
%%cython
from cython.cimports.cpython.version import PY_VERSION_HEX
print(PY_VERSION_HEX >= 0x030200F0)

In [49]:
%%cython
from cpython.version cimport PY_VERSION_HEX
print(PY_VERSION_HEX >= 0x030200F0)

In [45]:
from cython.cimports.libc.math import sin
@cython.cfunc
def f(x: cython.double) -> cython.double:
    return sin(x * x)

In [56]:
%%cython
from libc.math cimport sin
cdef double f(double x):
    return sin(x * x)

Content of stdout:
_cython_magic_c639b4326e580a343506f67526722569d09c7c1c59b6cfbd14b520c30fa98036.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_c639b4326e580a343506f67526722569d09c7c1c59b6cfbd14b520c30fa98036.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_c639b4326e580a343506f67526722569d09c7c1c59b6cfbd14b520c30fa98036.cp312-win_amd64.exp
Generating code
Finished generating code

# 7) External C-lib.Function Declarations

In [77]:
%%cython
cdef extern from "string.h":
    char* strstr(const char *haystack, const char *needle)

In [87]:
%%cython
# This way is not working in Jupyter Notebook
import cython

def main():
    cdef const char* data = b"hfvcakdfagbcffvschvxcdfgccbcfhvgcsnfxjh"
    cdef const char* pos = strstr(data, b"akd")
    print(pos is not cython.NULL)



Error compiling Cython file:
------------------------------------------------------------
...
# This way is not working in Jupyter Notebook
import cython

def main():
    cdef const char* data = b"hfvcakdfagbcffvschvxcdfgccbcfhvgcsnfxjh"
    cdef const char* pos = strstr(data, b"akd")
                           ^
------------------------------------------------------------

C:\Users\user\.ipython\cython\_cython_magic_ba3d5615b27aca2282d248837e203cb854e82582fa85141116334723ca304161.pyx:6:27: undeclared name not builtin: strstr

Error compiling Cython file:
------------------------------------------------------------
...
# This way is not working in Jupyter Notebook
import cython

def main():
    cdef const char* data = b"hfvcakdfagbcffvschvxcdfgccbcfhvgcsnfxjh"
    cdef const char* pos = strstr(data, b"akd")
    ^
------------------------------------------------------------

C:\Users\user\.ipython\cython\_cython_magic_ba3d5615b27aca2282d248837e203cb854e82582fa85141116334723ca304161.pyx

In [79]:
%%cython
def doit():
    cdef extern from "string.h":
        char* strstr(const char *haystack, const char *needle)

    cdef char* data = "hfvcakdfagbcffvschvxcdfgccbcfhvgcsnfxjh"

    cdef char* pos = strstr(needle='akd', haystack=data)
    print(pos is not NULL)

Content of stdout:
_cython_magic_cc00cc5959da61b46843637aff3a6dcd0e1e3b2479bc9015d15b9e9c0547aabe.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_cc00cc5959da61b46843637aff3a6dcd0e1e3b2479bc9015d15b9e9c0547aabe.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_cc00cc5959da61b46843637aff3a6dcd0e1e3b2479bc9015d15b9e9c0547aabe.cp312-win_amd64.exp
Generating code
Finished generating code

In [88]:
#print(main())
doit()

True


# 8) CQueue and Link Implementation with Callback  

In [168]:
%%cython 
# distutils: include_dirs = C:/Users/user/Downloads/CPP-Learning/Cython
# distutils: sources      = C:/Users/user/Downloads/CPP-Learning/Cython/queue.c
cimport cqueue
from cqueue cimport (
    Queue, QueueValue, predicate_func,
    queue_new, queue_free,
    queue_push_tail, queue_pop_head,
    queue_peek_head, queue_is_empty,
    queue_pop_head_until
)
cdef int evaluate_predicate(void* context, QueueValue data) except -1:
    cdef object pred = <object>context
    try:
        return 1 if pred(<int>data) else 0
    except Exception:
        return -1
cdef class CQueue:
    """A queue class for C integer values.

    >>> q = Queue()
    >>> q.append(5)
    >>> q.peek()
    5
    >>> q.pop()
    5
    """
    cdef cqueue.Queue* _c_queue
    def __cinit__(self):
        self._c_queue = cqueue.queue_new()
        if self._c_queue is NULL:
            raise MemoryError()

    def __dealloc__(self):
        if self._c_queue is not NULL:
            cqueue.queue_free(self._c_queue)
    
    cpdef pop_until(self, object py_predicate):
        cdef predicate_func cb = <predicate_func>evaluate_predicate
        cdef void* ctx        = <void*>py_predicate
        cdef int res          = queue_pop_head_until(self._c_queue, cb, ctx)
        if res == -1:
            raise RuntimeError("predicate raised")

    cpdef append(self, int value):
        if not cqueue.queue_push_tail(self._c_queue,
                                      <void*> <Py_ssize_t> value):
            raise MemoryError()

    # The `cpdef` feature is obviously not available for the original "extend()"
    # method, as the method signature is incompatible with Python argument
    # types (Python does not have pointers).  However, we can rename
    # the C-ish "extend()" method to e.g. "extend_ints()", and write
    # a new "extend()" method that provides a suitable Python interface by
    # accepting an arbitrary Python iterable.

    cpdef extend(self, values):
        for value in values:
            self.append(value)


    cdef extend_ints(self, int* values, size_t count):
        cdef int value
        for value in values[:count]:  # Slicing pointer to limit the iteration boundaries.
            self.append(value)



    cpdef int peek(self) except? -1:
        cdef int value = <Py_ssize_t> cqueue.queue_peek_head(self._c_queue)

        if value == 0:
            # this may mean that the queue is empty,
            # or that it happens to contain a 0 value
            if cqueue.queue_is_empty(self._c_queue):
                raise IndexError("Queue is empty")
        return value



    cpdef int pop(self) except? -1:
        if cqueue.queue_is_empty(self._c_queue):
            raise IndexError("Queue is empty")
        return <Py_ssize_t> cqueue.queue_pop_head(self._c_queue)

    def __bool__(self):
        return not cqueue.queue_is_empty(self._c_queue)
__all__ = ['CQueue']

Content of stdout:
queue.c
_cython_magic_6514f09a13da4d41e6fde3326ceb294de77d45f5f619db0ccff03fd6490f3396.c
   Creating library C:\Users\user\.ipython\cython\Users\user\Downloads\CPP-Learning\Cython\_cython_magic_6514f09a13da4d41e6fde3326ceb294de77d45f5f619db0ccff03fd6490f3396.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\Downloads\CPP-Learning\Cython\_cython_magic_6514f09a13da4d41e6fde3326ceb294de77d45f5f619db0ccff03fd6490f3396.cp312-win_amd64.exp
Generating code
Finished generating code

In [169]:
import time
Q = CQueue()

Q.append(10)
Q.append(20)
print(Q.peek())
print(Q.pop())
print(Q.pop())
try:
    print(Q.pop())
except IndexError as e:
    print("Error message:", e)  # Prints "Queue is empty"

i = 10000

values = range(i)

start_time = time.time()

Q.extend(values)

end_time = time.time() - start_time

print("Adding {} items took {:1.3f} msecs.".format(i, 1000 * end_time))

for i in range(41):
    Q.pop()

Q.pop()
print("The answer is:")
print(Q.pop())

10
10
20
Error message: Queue is empty
Adding 10000 items took 1.002 msecs.
The answer is:
42


# 9) Nonecheck (Pythonic Cython and Proper Cython)

In [170]:
# cython: nonecheck=True
#        ^^^ Turns on nonecheck globally

import cython

@cython.cclass
class MyClass:
    pass

# Turn off nonecheck locally for the function
@cython.nonecheck(False)
def func():
    obj: MyClass = None
    try:
        # Turn nonecheck on again for a block
        with cython.nonecheck(True):
            print(obj.myfunc())  # Raises exception
    except AttributeError:
        pass
    print(obj.myfunc())  # Hope for a crash!

In [178]:
%%cython
# cython: nonecheck=True

import cython

cdef class MyClass:
    pass

@cython.nonecheck(False)
def func():
    # don’t use a C-level initializer here—just declare then assign:
    cdef MyClass obj
    obj = None      # with nonecheck off, this compiles

    # If you really need a compile-time NULL:
    # cdef MyClass obj = NULL

    # You can re-enable checking in a block:
    with cython.nonecheck(True):
        try:
            print(obj.__class__)     # raises AttributeError
        except AttributeError:
            print("caught AttributeError")

def test():
    print("calling func()…")
    func()

test()


Content of stdout:
_cython_magic_3ba41258ccd43a96366030cc0186522c5524321e52094784a177db286ab9b0aa.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_3ba41258ccd43a96366030cc0186522c5524321e52094784a177db286ab9b0aa.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_3ba41258ccd43a96366030cc0186522c5524321e52094784a177db286ab9b0aa.cp312-win_amd64.exp
Generating code
Finished generating codecalling func()…
<class 'NoneType'>


# 10) Integration and Use of ".pxd" files

In [None]:
### Read Only
# # Simplifying the Integration Example

# This example shows how to organize your Cython code into **interface** (`.pxd`) and **implementation** (`.pyx`) files so multiple modules can call each other **at C speed** without Python’s usual import overhead.

# ---

# ## Why split into `.pxd` and `.pyx`?

# - `.pxd` files are like C “header” files: they declare  
#   - C functions and types from external libraries (for example, `sin` from `<math.h>`),  
#   - Cython extension‐type signatures (classes with `cdef` or `cpdef` methods).  
# - `.pyx` files contain the actual **bodies** of those functions and methods.  
# - Sharing `.pxd` files lets modules **cimport** each other’s declarations and generate direct C‐level calls.

# ---

# ## Step-by-step Breakdown

# 1. **Declare C math in `cmath.pxd`**  
#    ```cython
#    # cmath.pxd
#    cdef extern from "math.h":
#        cpdef double sin(double x)
#    ```  
#    Now any Cython module can do `from CyIntegration.cmath cimport sin` and call `sin()` without Python overhead.

# 2. **Describe the integration API in `integrate.pxd`**  
#    ```cython
#    # integrate.pxd
#    cdef class Function:
#        cpdef double evaluate(self, double x)

#    cpdef double integrate(Function f, double a, double b, int N)
#    ```  
#    This tells other modules:  
#    - There’s a `Function` type with a C‐method `evaluate(x)`.  
#    - There’s a C‐function `integrate(f, a, b, N)` they can call directly.

# 3. **Implement in `integrate.pyx`**  
#    ```cython
#    # integrate.pyx
#    from CyIntegration.cmath cimport sin
#    from CyIntegration.integrate cimport Function

#    cdef class MyFunction(Function):
#        cpdef double evaluate(self, double x):
#            return sin(x * x)

#    cpdef double integrate(Function f, double a, double b, int N):
#        cdef double total = 0
#        cdef double dx = (b - a) / N
#        cdef int i
#        for i in range(N):
#            total += f.evaluate(a + i * dx)
#        return total * dx
#    ```  
#    Here you write the actual code that uses `sin()` and `Function.evaluate()`.  

# 4. **Re-export declarations in `__init__.pxd`**  
#    ```cython
#    # __init__.pxd
#    from CyIntegration.integrate cimport Function, integrate
#    ```  
#    This makes `Function` and `integrate` available at the package top level.  

# ---

# ## New Directory Structure

# ```
# CyIntegration/
# ├── __init__.pyx      # optional Python imports or empty
# ├── __init__.pxd      # re-export Function and integrate
# ├── cmath.pxd         # C math declarations
# ├── integrate.pxd     # interface for integration API
# └── integrate.pyx     # implementation of integration
# ```

# ---

# ## Using the Package

# In any Cython module, you can now write:

# ```cython
# cimport CyIntegration

# # Create an instance of your function
# cdef CyIntegration.Function f = CyIntegration.MyFunction()

# # Call integrate at C speed
# cdef double result = CyIntegration.integrate(f, 0.0, 1.0, 1000)
# ```

# No Python‐level `import` is needed—`cimport` pulls in declarations from `.pxd` files and generates pure C calls under the hood.  

# ---

# ## How It Shares Data Without Python ‘import’

# - **cimport** reads declarations from `.pxd` files at compile time.  
# - Cython generates functions that call each other directly with C signatures.  
# - There’s no runtime lookup of Python objects—everything happens through plain C functions and methods.

# By following this pattern, you build a fast, modular Cython library with clear separation between headers (`.pxd`) and source (`.pyx`), all sharing data at full C speed.

# ---

# References  
# pxd files — Cython 3.2.0a0 documentation (https://cython.readthedocs.io/en/latest/src/tutorial/pxd_files.html)

# 11) Key Caveats when mixing C and Python in Cython

In [None]:
# # Key Caveats When Mixing C and Python in Cython

# When you combine C‐level types and Python semantics in Cython, a few unexpected behaviors can pop up. Here are the most important ones to watch for:

# 1. Modulo (`%`) Follows Python’s Sign Rule  
#    - In Python, `a % b` always has the same sign as *b* (the divisor).  
#    - In C, `a % b` has the same sign as *a* (the dividend).  
#    - **Cython defaults to Python’s rule**.  
#    - If you need the C behavior (and want faster code), enable:  
#      ```cython
#      # cython: cdivision=True
#      ```

# 2. Unsigned Types Wrap Around  
#    ```cython
#    cdef unsigned int n = 10
#    print(range(-n, n))   # → []
#    ```  
#    - Here `-n` is computed in pure C as an unsigned, which becomes a huge positive number.  
#    - That large positive bound makes `range()` see an empty sequence.  
#    - **Take care** whenever you mix signed literals or Python operations with `unsigned` C types.

# 3. Floating‐Point and Integer “Widths”  
#    - A Python `float` is really a C `double`.  
#    - In Python 2, the built-in `int` maps to C’s `long`.  
#    - If you need more precision or a different C type, you must explicitly declare it, e.g.:  
#      ```cython
#      cdef double x   # 64-bit IEEE double
#      cdef float  y   # 32-bit IEEE float
#      cdef long   z   # C long (size varies by platform)
#      ```

# ---

# Keeping these in mind will help you avoid confusing bugs when writing performance-critical Cython code that blends C types and Python‐style operations.

# 12) Profiling in both pythonic python and proper cython

In [186]:
# cython: profile=True
import pstats, cProfile
import pyximport
pyximport.install()

@cython.profile(False)
@cython.cfunc
@cython.inline
@cython.exceptval(-1.0)
def recip_square(i):
    return 1. / i ** 2

def approx_pi(n=10000000):
    val = 0.
    for k in range(1, n + 1):
        val += recip_square(k)
    return (6 * val) ** .5

cProfile.runctx("approx_pi()", globals(), locals(), "Profile.prof")

s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()

Sun Jul 27 11:30:22 2025    Profile.prof

         10000074 function calls in 4.535 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    2.482    2.482    4.532    4.532 949566149.py:13(approx_pi)
 10000000    2.051    0.000    2.051    0.000 949566149.py:6(recip_square)
        1    0.001    0.001    0.001    0.001 traitlets.py:1527(_notify_observers)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        2    0.000    0.000    0.001    0.001 traitlets.py:3631(set)
        2    0.000    0.000    0.001    0.001 traitlets.py:689(set)
        2    0.000    0.000    0.000    0.000 traitlets.py:718(_validate)
        5    0.000    0.000    0.000    0.000 {built-in method builtins.hasattr}
        1    0.000    0.000    0.000    0.000 {built-in method _thread.allocate_lock}
        1    0.000    0.000    0.000    0.000 history.py:1016(_writeout_output_cache)
        2    0.

<pstats.Stats at 0x1ce904899a0>

In [189]:
%%cython -+
# cython: profile=True
import pstats, cProfile
import pyximport
pyximport.install()
cimport cython
@cython.profile(False)
cdef inline double recip_square(long long i) except -1.0:
    return 1. / (i * i)

def approx_pi(int n=10000000):
    cdef double val = 0.
    cdef int k
    for k in range(1, n + 1):
        val += recip_square(k)
    return (6 * val) ** .5
cProfile.runctx("approx_pi()", globals(), locals(), "Profile.prof")

s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()

Content of stdout:
_cython_magic_707ca3bcb0055229432865df5b75ad0aa65e27f66c3079c21ac96077c79bba6c.cpp
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_707ca3bcb0055229432865df5b75ad0aa65e27f66c3079c21ac96077c79bba6c.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_707ca3bcb0055229432865df5b75ad0aa65e27f66c3079c21ac96077c79bba6c.cp312-win_amd64.exp
Generating code
Finished generating codeSun Jul 27 11:32:30 2025    Profile.prof

         3 function calls in 0.019 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.019    0.019    0.019    0.019 <string>:1(<module>)
        1    0.000    0.000    0.019    0.019 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




# 13) Unicode and Passing Strings in Cython

In [193]:
%%cython

# // Python string types in Cython code 
# cython: language_level=3
cdef bytes  b = b"hello"
cdef str    u = "world"
cdef unicode u2 = u"안녕"
cdef basestring any_text = u2

# // String Literals
cdef bytes  b1 = b"bytes literal"
cdef unicode u1 = u"unicode literal"
cdef unicode f1 = f"formatted {42}"
cdef str    s1 = "plain text"

# // getting length in O(N) vs Python’s O(1)
from libc.string cimport strlen
cdef char *cs = b"hello"
print(strlen(cs))       # linear scan
py = b"hello"
print(len(py))          # constant-time

# // Passing byte strings (C->Python)
from libc.stdlib cimport malloc, free
from libc.string  cimport strcpy
cdef extern from "some.h":
    char *get_c_string()
def wrap():
    cdef char *cs = get_c_string()
    try:
        py: bytes = cs[:]      # slice copies exact length
        return py
    finally:
        free(cs)
def wrap2():
    cdef char *cs = get_c_string()
    return <bytes>cs           # Cython calls strlen() internally

# // Accepting Strings from python code
def process_bytes(data: cython.uchar[:]):
    cdef Py_ssize_t n = data.shape[0]
    cdef unsigned char first = data[0]
    return bytes(data[1 : n-1])

# // Dealing with "const"
cdef extern from "some.h":
    int   process_string(const char *s)
    const unsigned char *lookup_key(const unsigned char *key)

# // Decoding byte to text 
from libc.stdlib cimport free
cdef extern from "some.h":
    char *get_utf8()
def to_py():
    cdef char *cs = get_utf8()
    try:
        return cs.decode("utf-8")    # works if no interior NULs
    finally:
        free(cs)
def to_py2():
    cdef char *cs; 
    cdef Py_ssize_t length
    get_c_string_with_length(&cs, &length)
    u: str = cs[:length].decode("utf-8")
    free(cs)
    return u

# // Encoding text to bytes
def to_c(s: str):
    py_bytes = s.encode("utf-8")
    cdef char *cs = py_bytes   # pointer into Python buffer
    # keep `py_bytes` alive while using `cs`
    return cs

# // C++ strings
# distutils: language = c++
from libcpp.string cimport string
def cpp_roundtrip(s: str):
    cdef string cpp = s.encode("utf-8")
    return cpp.decode("utf-8")

# // Autoencoding and Decoding
# cython: c_string_type=unicode, c_string_encoding=utf8
cdef extern from "some.h":
    char *get_utf8()
def auto_py():
    # implicit decoding into Python str
    cdef object s = get_utf8()
    return s.upper()              # now a Python str

# // Source code encoding
# -*- coding: ISO-8859-15 -*-
cdef bytes b = b"caf\xe9"     # single byte for 'é'

# // Single Bytes and Chars
cdef char  c = b'A'[0]
assert c == 65         # prints 65
cdef Py_UCS4 u = ord('용')
assert u == 0xC6A9     # coerces to a Unicode character on print

# // Narrow Unicode Builds
cdef Py_UCS4 high = 0x1F600   # 😀
print(high)                   # works on narrow and wide builds

# // Iteration
def find_A(c_string: bytes):
    for c in c_string:        # c is a C char, int 0–255
        if c == ord('A'):
            print("Found A")
def find_Ω(u_str: str):
    for uc in u_str:          # uc is Py_UCS4
        if uc == ord('Ω'):
            print("Found Omega")

# // Windows and wide char APIs
cdef extern from "Windows.h":
    ctypedef wchar_t WCHAR
    ctypedef const WCHAR *LPCWSTR
    int MessageBoxW(void *hWnd, LPCWSTR text, LPCWSTR title, int type)
def show_box():
    title = "Hello Windows"
    # Cython will convert Python str to LPCWSTR under the hood
    MessageBoxW(NULL, "Hi \u263A", title, 0)


Error compiling Cython file:
------------------------------------------------------------
...
    cdef object s = get_utf8()
    return s.upper()              # now a Python str

# // Source code encoding
# -*- coding: ISO-8859-15 -*-
cdef bytes b = b"caf\xe9"     # single byte for 'é'
           ^
------------------------------------------------------------

C:\Users\user\.ipython\cython\_cython_magic_41a9d2fcfa8d1f4266cdff27aeae56d32557fcbff83162279167ccdfb4293333.pyx:92:11: 'b' redeclared 

Error compiling Cython file:
------------------------------------------------------------
...

# // Python string types in Cython code 
# cython: language_level=3
cdef bytes  b = b"hello"
            ^
------------------------------------------------------------

C:\Users\user\.ipython\cython\_cython_magic_41a9d2fcfa8d1f4266cdff27aeae56d32557fcbff83162279167ccdfb4293333.pyx:4:12: Previous declaration is here

Error compiling Cython file:
----------------------------------------------------------

# 14) Memory Allocation 

In [195]:
%%cython
import cython
import random
from cython.cimports.cpython.mem import PyMem_Malloc, PyMem_Realloc, PyMem_Free
from cython.cimports.libc.stdlib import malloc, free
def random_noise(number: cython.int = 1):
    i: cython.int
    # allocate number * sizeof(double) bytes of memory
    my_array: cython.p_double = cython.cast(cython.p_double, malloc(
        number * cython.sizeof(cython.double)))
    if not my_array:
        raise MemoryError()
    try:
        ran = random.normalvariate
        for i in range(number):
            my_array[i] = ran(0, 1)
        # ... let's just assume we do some more heavy C calculations here to make up
        # for the work that it takes to pack the C double values into Python float
        # objects below, right after throwing away the existing objects above.
        return [x for x in my_array[:number]]
    finally:
        # return the previously allocated memory to the system
        free(my_array)

@cython.cclass
class SomeMemory:
    data: cython.p_double

    def __cinit__(self, number: cython.size_t):
        # allocate some memory (uninitialised, may contain arbitrary data)
        self.data = cython.cast(cython.p_double, PyMem_Malloc(
            number * cython.sizeof(cython.double)))
        if not self.data:
            raise MemoryError()

    def resize(self, new_number: cython.size_t):
        # Allocates new_number * sizeof(double) bytes,
        # preserving the current content and making a best-effort to
        # reuse the original data location.
        mem = cython.cast(cython.p_double, PyMem_Realloc(
            self.data, new_number * cython.sizeof(cython.double)))
        if not mem:
            raise MemoryError()
        # Only overwrite the pointer if the memory was really reallocated.
        # On error (mem is NULL), the originally memory has not been freed.
        self.data = mem

    def __dealloc__(self):
        PyMem_Free(self.data)  # no-op if self.data is NULL

Content of stdout:
_cython_magic_34b29ca43dad91d3c03488a98c9b9d3c530192f5850e9edae00293fee37f24b1.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_34b29ca43dad91d3c03488a98c9b9d3c530192f5850e9edae00293fee37f24b1.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_34b29ca43dad91d3c03488a98c9b9d3c530192f5850e9edae00293fee37f24b1.cp312-win_amd64.exp
Generating code
Finished generating code

# 15) Embedding Cython Modules in C/C++ Applications

In [196]:
# # Embedding Cython Modules in C/C++ Applications

# This guide walks you through embedding a Cython-compiled extension into a native C/C++ program. It covers module registration, interpreter setup, example code, and common pitfalls.

# ---

# ## Quick Links

# - CPython docs: https://docs.python.org/3/c-api/index.html  
# - Cython Wiki: https://github.com/cython/cython/wiki  
# - `--embed` option: automatically generate a `main()` for your module  
# - `cython_freeze`: merge multiple Cython extensions into one library  

# ---

# ## 1. Initializing Your Main Module

# Before calling any Cython-exported functions, you must register the extension and start Python properly.

# ### 1.1 Register with Inittab

# Place this **before** `Py_Initialize()`:

# ```c
# if (PyImport_AppendInittab("embedded", PyInit_embedded) == -1) {
#     fprintf(stderr,
#         "Error: could not extend in-built modules table\n");
#     exit(1);
# }
# ```

# - Registers your statically or dynamically linked module  
# - Ensures `import embedded` works inside the embedded interpreter  

# ### 1.2 Set Program Name

# Pass the executable’s name to Python:

# ```c
# wchar_t *program = Py_DecodeLocale(argv[0], NULL);
# Py_SetProgramName(program);
# ```

# - Helps Python locate its standard library relative to your app  
# - Must decode `argv[0]` to wide characters  

# ### 1.3 Initialize the Interpreter

# ```c
# Py_Initialize();
# ```

# - Mandatory: Cython code relies on the Python runtime  
# - Call exactly **once** per process  

# ### 1.4 Configure Module Search Path

# Embedded Python does not auto-populate `sys.path`. After initialization:

# ```c
# PySys_SetPath(L"/path/to/your/python/libs");
# ```

# Or append to existing list:

# ```c
# PyObject *path = PySys_GetObject("path");
# PyList_Append(path, PyUnicode_FromString("/my/modules"));
# ```

# ---

# ## 2. Embedding Example Code

# ### 2.1 Cython Module (`embedded.pyx`)

# ```cython
# # embedded.pyx
# # tag: py3only
# # tag: no-cpp

# TEXT_TO_SAY = 'Hello from Python!'

# cdef public int say_hello_from_python() except -1:
#     print(TEXT_TO_SAY)
#     return 0
# ```

# - `cdef public` exports `say_hello_from_python` as a C symbol in `embedded.h`  

# ### 2.2 Host Application (`embedded_main.c`)

# ```c
# #include <Python.h>
# #include "embedded.h"   /* generated by Cython */

# int main(int argc, char *argv[]) {
#     PyObject *pmodule;
#     wchar_t *program = Py_DecodeLocale(argv[0], NULL);
#     if (!program) { fprintf(stderr, "Decode failure\n"); exit(1); }

#     /* 1. Register module */
#     if (PyImport_AppendInittab("embedded", PyInit_embedded) == -1) {
#         fprintf(stderr, "Inittab registration failed\n");
#         exit(1);
#     }

#     /* 2. Pass program name and init */
#     Py_SetProgramName(program);
#     Py_Initialize();

#     /* 3. Optionally set sys.path */
#     /* PySys_SetPath(...); */

#     /* 4. Import and use the module */
#     pmodule = PyImport_ImportModule("embedded");
#     if (!pmodule) {
#         PyErr_Print();
#         fprintf(stderr, "Import failed\n");
#         goto error;
#     }

#     if (say_hello_from_python() < 0) {
#         PyErr_Print();
#         fprintf(stderr, "Cython call failed\n");
#         goto error;
#     }

#     /* 5. Clean up */
#     PyMem_RawFree(program);
#     Py_Finalize();
#     return 0;

# error:
#     PyMem_RawFree(program);
#     Py_Finalize();
#     return 1;
# }
# ```

# ---

# ## 3. Troubleshooting

# - **Interpreter not initialized**  
#   Crashes or undefined behavior if you skip `Py_Initialize()`.  

# - **Repeated initialization/finalization**  
#   Don’t call `Py_Initialize()`/`Py_Finalize()` per calculation—most modules dislike re-initialization.

# - **Python path not set**  
#   Import errors for both your module and standard library.  

# - **Skipping the `import` step**  
#   Module-level setup (string tables, built-ins) happens at import time.

# - **Multiprocessing & pickle**  
#   Spawned child interpreters can’t find your in-tabled module.  
#   Workaround:  
#   - Set `multiprocessing.set_executable()` to your binary  
#   - Handle the `--multiprocessing-fork` argument  
#   - Call `multiprocessing.freeze_support()`  

# ---

# ## 4. Advanced Topics

# | Topic                        | Description                                                          |
# |------------------------------|----------------------------------------------------------------------|
# | Multi-phase init (PEP 489)   | Default in Python 3.5+. Two-phase loading of extension modules.      |
# | Forcing single-phase         | Compile with `-DCYTHON_PEP489_MULTI_PHASE_INIT=0` to disable two-phase. |
# | Auto-generated `main()`      | Use `cython --embed` to let Cython produce the host `main()` for you.|
# | Bundling dependencies        | Use PyInstaller or cx_Freeze for full portability.                   |

# ---

# By following this structure—registering via inittab, initializing once, setting paths, and performing an import—you’ll reliably embed Cython modules in native applications.

# 16) Pythonic Cython Mode or {Pure Python Mode (in Cython) according to CythonDocs} different from Pure Python Mode(Regular)

In [197]:
# # Pure Python Mode in Cython

# This guide explains how to accelerate Python code with Cython without sacrificing its ability to run under the standard Python interpreter. It covers three complementary approaches for adding static typing and cythonic features to pure `.py` files: augmenting with `.pxd` files, using magic attributes via the `cython` module, and leveraging PEP-484/526 type annotations.

# ---

# ## 1. Why Pure Python Mode?

# - Allows a single `.py` file to run both interpreted and compiled.  
# - Yields moderate speedups (20–50%) without rewriting code in `.pyx`.  
# - Eases testing, debugging, and collaboration with pure-Python developers.  
# - Limits you to Python-expressible constructs; more advanced Cython syntax still requires `.pyx`.

# ---

# ## 2. Augmenting with `.pxd` Files

# - Create an augmenting `.pxd` alongside your `.py`.  
# - Declare `cdef`/`cpdef` functions, classes, and attributes in `.pxd`.  
# - Cython merges declarations into the `.py` at compile time, generating wrappers automatically.  
# - Pros: No changes to your original `.py`.  
# - Cons: Must maintain two synchronized files.

# ---

# ## 3. Magic Attributes (Decorators & Functions)

# Importing the special `cython` module lets you embed static typing directly:

# - `cython.compiled`: Flag that’s True when compiled, False at runtime.  
# - `@cython.locals(...)`: Declare types for locals and arguments.  
# - `@cython.returns(type)`: Specify function return type.  
# - `@cython.exceptval(...)`: Control exception return values and checking.  
# - `cython.declare(...)`: Define typed variables and attributes inline.

# All decorators are no-ops at runtime (they’re ignored by plain Python).

# ---

# ## 4. PEP-484/526 Type Annotations

# - Use Python’s standard `foo: type` and `def f(...) -> type` syntax with `cython.` types.  
# - Cython interprets `x: cython.int` and compiles it to `cdef int x`.  
# - Global annotations are ignored; use `cython.declare()` for top-level variables.  
# - Combine with `@cython.exceptval()` for non-Python return semantics.

# ---

# ## 5. Managing the GIL

# - `with cython.nogil:` or `@cython.nogil` to release the Global Interpreter Lock.  
# - `with cython.gil:` to reacquire the GIL.  
# - Conditional GIL handling (compile-time constant) enables optimizations in fused or generic code.

# ---

# ## 6. cimports and Calling C Functions

# - `from cython.cimports.libc import math`: Declare C libraries in pure-Python files.  
# - At compile time, calls map to C functions; at runtime, you must fallback to Python equivalents.  
# - Use `cpdef` in `.pxd` to expose C functions under your module’s namespace.

# ---

# ## 7. Additional Facilities

# - **C types**: All standard C scalar, pointer, array, struct/union, `fused_type`, and casting support.  
# - **Extension types**: `@cython.cclass`, `@cython.cfunc`, `@cython.ccall`, and `@cython.inline` decorate classes and functions.  
# - **Utility functions**: `cython.address()`, `cython.sizeof()`, `cython.typeof()` for low-level operations.

# ---

# ## 8. Tips and Tricks

# - Provide a minimal `cython.py` “shadow” to avoid runtime dependency on Cython.  
# - Leverage C arrays (`cython.int[10]`) for fixed-size buffers.  
# - Use conditional imports to select between C and Python implementations based on `cython.compiled`.  
# - Disable annotation typing with `@cython.annotation_typing(False)` when conflicting with other uses of annotations.

# ---

# Pure Python Mode strikes a balance between maintainability and performance, enabling incremental adoption of Cython’s static-typing features while retaining interpreter compatibility.

# 17) Numpy Library + Cython

In [200]:
%%cython
# // Naive_Convolve
import cython
# tag: numpy
# You can ignore the previous line.
# It's for internal testing of the cython documentation.
import numpy as np
# "cimport" is used to import special compile-time information
# about the numpy module (this is stored in a file numpy.pxd which is
# distributed with Numpy).
# Here we've used the name "cnp" to make it easier to understand what
# comes from the cimported module and what comes from the imported module,
# however you can use the same name for both if you wish.
cimport numpy as cnp
# It's necessary to call "import_array" if you use any part of the
# numpy PyArray_* API. From Cython 3, accessing attributes like
# ".shape" on a typed Numpy array use this API. Therefore we recommend
# always calling "import_array" whenever you "cimport numpy"
cnp.import_array()
# We now need to fix a datatype for our arrays. I've used the variable
# DTYPE for this, which is assigned to the usual NumPy runtime
# type info object.
DTYPE = np.int64
# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For
# every type in the numpy module there's a corresponding compile-time
# type with a _t-suffix.
ctypedef cnp.int64_t DTYPE_t
# "def" can type its arguments but not have a return type. The type of the
# arguments for a "def" function is checked at run-time when entering the
# function.
#
# The arrays f, g and h is typed as "np.ndarray" instances. The only effect
# this has is to a) insert checks that the function arguments really are
# NumPy arrays, and b) make some attribute access like f.shape[0] much
# more efficient. (In this example this doesn't matter though.)
@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False)  # turn off negative index wrapping for entire function
def naive_convolve(cnp.ndarray[DTYPE_t, ndim=2] f, cnp.ndarray[DTYPE_t, ndim=2] g): # We can use object instead of cnp.ndarray
    if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
        raise ValueError("Only odd dimensions on filter supported")
    assert f.dtype == DTYPE and g.dtype == DTYPE

    # The "cdef" keyword is also used within functions to type variables. It
    # can only be used at the top indentation level (there are non-trivial
    # problems with allowing them in other places, though we'd love to see
    # good and thought out proposals for it).
    #
    # For the indices, the "int" type is used. This corresponds to a C int,
    # other C types (like "unsigned int") could have been used instead.
    # Purists could use "Py_ssize_t" which is the proper Python type for
    # array indices.
    cdef int vmax = f.shape[0]
    cdef int wmax = f.shape[1]
    cdef int smax = g.shape[0]
    cdef int tmax = g.shape[1]
    cdef int smid = smax // 2
    cdef int tmid = tmax // 2
    cdef int xmax = vmax + 2 * smid
    cdef int ymax = wmax + 2 * tmid
    cdef cnp.ndarray h = np.zeros([xmax, ymax], dtype=DTYPE)
    cdef int x, y, s, t, v, w

    # It is very important to type ALL your variables. You do not get any
    # warnings if not, only much slower code (they are implicitly typed as
    # Python objects).
    cdef int s_from, s_to, t_from, t_to

    # For the value variable, we want to use the same data type as is
    # stored in the array, so we use "DTYPE_t" as defined above.
    # NB! An important side-effect of this is that if "value" overflows its
    # datatype size, it will simply wrap around like in C, rather than raise
    # an error like in Python.
    cdef DTYPE_t value
    for x in range(xmax):
        for y in range(ymax):
            s_from = max(smid - x, -smid)
            s_to = min((xmax - x) - smid, smid + 1)
            t_from = max(tmid - y, -tmid)
            t_to = min((ymax - y) - tmid, tmid + 1)
            value = 0
            for s in range(s_from, s_to):
                for t in range(t_from, t_to):
                    v = x - smid + s
                    w = y - tmid + t
                    value += g[smid - s, tmid - t] * f[v, w]
            h[x, y] = value
    return h

# Buffer options
    # The following options are accepted when creating buffer types:

        # ndim - an integer number of dimensions.

        # mode - a string from:

            # "c" - C contiguous array,

            # "fortran" - Fortran contiguous array,

            # "strided" - non-contiguous lookup into a single block of memory,

            # "full" - any valid buffer, including indirect arrays.

        # negative_indices - boolean value specifying whether negative indexing is allowed, essentially a per-variable version of the compiler directive cython.wraparound.

        # cast - boolean value specifying whether to allow the user to view the array as a different type. The sizes of the source and destination type must be the same. In C++ this would be equivalent to reinterpret_cast.

    # In all cases these parameters must be compile-time constants.

    # As an example of how to specify the parameters:
        # code=>cdef cnp.ndarray[double, ndim=2, mode="c", cast=True] some_array

    # cast can be used to get a low-level view of an array with non-native endianness:
        # code=>cdef cnp.ndarray[cnp.uint32, cast=True] values = np.

Content of stdout:
_cython_magic_6dbd09a62a38e1622d477b42d2e79ce224b2ec41a3db4c593480de7451771e61.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_6dbd09a62a38e1622d477b42d2e79ce224b2ec41a3db4c593480de7451771e61.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_6dbd09a62a38e1622d477b42d2e79ce224b2ec41a3db4c593480de7451771e61.cp312-win_amd64.exp
Generating code
Finished generating code

# 18) Python Array + Cython

In [210]:
%%cython
# // Safe usage with memory views
from cython.cimports.cpython import array
import cython
import array
a = cython.declare(array.array, array.array('i', [1, 2, 3]))
ca = cython.declare(cython.int[:], a)
@cython.cfunc
def overhead(a: cython.object) -> cython.int:
    ca: cython.int[:] = a
    return ca[0]
@cython.cfunc
def no_overhead(ca: cython.int[:]) -> cython.int:
    return ca[0]
print(overhead(a))  # new memory view will be constructed, overhead
print(no_overhead(ca))  # ca is already a memory view, so no overhead

# // Zero-Overhead, unsafe access to raw c-pointer
# access underlying pointer:
print(a.data.as_ints[0])
from cython.cimports.libc.string import memset
memset(a.data.as_voidptr, 0, len(a) * cython.sizeof(cython.int))

# // Cloning, extending arrays
int_array_template = cython.declare(array.array, array.array('i', []))
cython.declare(newarray=array.array)
# create an array with 3 elements with same type as template
newarray = array.clone(int_array_template, 3, zero=False)
b = cython.declare(array.array, array.array('i', [4, 5, 6]))
# extend a with b, resize as needed
array.extend(a, b)
# resize a, leaving just original three elements
array.resize(a, len(a) - len(b))
print(b)

# // API Reference

## Data fields
# data.as_voidptr
# data.as_chars
# data.as_schars
# data.as_uchars
# data.as_shorts
# data.as_ushorts
# data.as_ints
# data.as_uints
# data.as_longs
# data.as_ulongs
# data.as_longlongs  # requires Python >=3
# data.as_ulonglongs  # requires Python >=3
# data.as_floats
# data.as_doubles
# data.as_pyunicodes

## Functions
# The following functions are available to Cython from the array module

# # Pure Python(Cython)
# @cython.cfunc
# @cython.exceptval(-1)
# def resize(self: array.array, n: cython.Py_ssize_t) -> cython.int
# # Fast resize / realloc. Not suitable for repeated, small increments; resizes underlying array to exactly the requested amount.

# # Pure Python(Cython)
# @cython.cfunc
# @cython.exceptval(-1)
# def resize_smart(self: array.array, n: cython.Py_ssize_t) -> cython.int
# # Efficient for small increments; uses growth pattern that delivers amortized linear-time appends.

# # Pure Python(Cython)
# @cython.cfunc
# @cython.inline
# def clone(template: array.array, length: cython.Py_ssize_t, zero: cython.bint) -> array.array
# # Fast creation of a new array, given a template array. Type will be same as template. If zero is True, new array will be initialized with zeroes.

# # Pure Python(Cython)
# @cython.cfunc
# @cython.inline
# def copy(self: array.array) -> array.array
# # Make a copy of an array.

# # Pure Python(Cython)
# @cython.cfunc
# @cython.inline
# @cython.exceptval(-1)
# def extend_buffer(self: array.array, stuff: cython.p_char, n: cython.Py_ssize_t) -> cython.int
# # Efficient appending of new data of same type (e.g. of same array type) n: number of elements (not number of bytes!)

# # Pure Python(Cython)
# @cython.cfunc
# @cython.inline
# @cython.exceptval(-1)
# def extend(self: array.array, other: array.array) -> cython.int
# # Extend array with data from another array; types must match.

# # Pure Python(Cython)
# @cython.cfunc
# @cython.inline
# def zero(self: array.array) -> cython.void
# # Set all elements of array to zero.

Content of stdout:
_cython_magic_233031128710c0e052731ceec50857d6cadd0677f248a45fb103b9fe1dea008f.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_233031128710c0e052731ceec50857d6cadd0677f248a45fb103b9fe1dea008f.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_233031128710c0e052731ceec50857d6cadd0677f248a45fb103b9fe1dea008f.cp312-win_amd64.exp
Generating code
Finished generating code1
1
1
array('i', [4, 5, 6])


In [209]:
%%cython
# // Safe usage with Memory Views
from cpython cimport array
import cython
import array
cdef array.array a = array.array('i', [1, 2, 3])
cdef int[:] ca = a
cdef int overhead(object a):
    cdef int[:] ca = a
    return ca[0]
cdef int no_overhead(int[:] ca):
    return ca[0]
print(overhead(a))  # new memory view will be constructed, overhead
print(no_overhead(ca))  # ca is already a memory view, so no overhead

# // Zero-Overhead, unsafe access to raw c-pointer
# access underlying pointer:
print(a.data.as_ints[0])
from libc.string cimport memset
memset(a.data.as_voidptr, 0, len(a) * sizeof(int))

# // Cloning, extending arrays
cdef array.array int_array_template = array.array('i', [])
cdef array.array newarray
# create an array with 3 elements with same type as template
newarray = array.clone(int_array_template, 3, zero=False)
cdef array.array b = array.array('i', [7, 95, 16])
# extend a with b, resize as needed
array.extend(a, b)
# resize a, leaving just original three elements
array.resize(a, len(a) - len(b))
print(len(a))

# // API Reference

## Data fields
# data.as_voidptr
# data.as_chars
# data.as_schars
# data.as_uchars
# data.as_shorts
# data.as_ushorts
# data.as_ints
# data.as_uints
# data.as_longs
# data.as_ulongs
# data.as_longlongs  # requires Python >=3
# data.as_ulonglongs  # requires Python >=3
# data.as_floats
# data.as_doubles
# data.as_pyunicodes

## Functions
# The following functions are available to Cython from the array module

# # Proper Cython
# cdef int resize(array.array self, Py_ssize_t n) except -1
# # Fast resize / realloc. Not suitable for repeated, small increments; resizes underlying array to exactly the requested amount.

# # Proper Cython
# cdef int resize_smart(array.array self, Py_ssize_t n) except -1
# # Efficient for small increments; uses growth pattern that delivers amortized linear-time appends.

# # Proper Cython
# cdef inline array.array clone(array.array template, Py_ssize_t length, bint zero)
# # Fast creation of a new array, given a template array. Type will be same as template. If zero is True, new array will be initialized with zeroes.

# # Proper Cython
# cdef inline array.array copy(array.array self)
# # Make a copy of an array.

# # Proper Cython
# cdef inline int extend_buffer(array.array self, char* stuff, Py_ssize_t n) except -1
# # Efficient appending of new data of same type (e.g. of same array type) n: number of elements (not number of bytes!)

# # Proper Cython
# cdef inline int extend(array.array self, array.array other) except -1
# # Extend array with data from another array; types must match.

# # Proper Cython
# cdef inline void zero(array.array self)
# # Set all elements of array to zero.

Content of stdout:
_cython_magic_2482774f9a26e8620e47ad7f7f7849d6c575cb20b91c2e812be76d05893ea840.c
   Creating library C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_2482774f9a26e8620e47ad7f7f7849d6c575cb20b91c2e812be76d05893ea840.cp312-win_amd64.lib and object C:\Users\user\.ipython\cython\Users\user\.ipython\cython\_cython_magic_2482774f9a26e8620e47ad7f7f7849d6c575cb20b91c2e812be76d05893ea840.cp312-win_amd64.exp
Generating code
Finished generating code1
1
1
3


# 19) Writing parallel code with Cython

In [None]:
# This tutorial shows how to use Cython’s OpenMP-based parallel features—available in two flavors—to speed up loops and computations across multiple CPU cores:

# - **Pure-Python syntax** (PEP-484/526 annotations + `cython.parallel` imports) in `.py` files  
# - **Proper Cython syntax** in `.pyx` modules  

# Both generate C code with `#pragma omp` and require `nogil` blocks.  

# ---

# ## Compilation Requirements  
# - Enable OpenMP flags:  
#   - GCC: `-fopenmp`  
#   - MSVC: `/openmp`  
# - Typical `setup.py` snippet:
#   ```python
#   from setuptools import Extension, setup
#   from Cython.Build import cythonize
#   import sys

#   openmp_arg = '/openmp' if sys.platform.startswith("win") else '-fopenmp'
#   ext_modules = [
#       Extension("*", ["*.pyx"],
#                 extra_compile_args=[openmp_arg],
#                 extra_link_args=[openmp_arg]),
#   ]
#   setup(name="parallel-tutorial",
#         ext_modules=cythonize(ext_modules))
#   ```


# ---

# ## 1. Element-wise Parallel Loops

# **Pure Python Cython syntax**:
# ```python
# # do_sine.py
# from cython.parallel import prange
# import cython
# from libc.math cimport sin
# import numpy as np

# def do_sine(input: cython.double[:,:]) -> np.ndarray:
#     output: cython.double[:,:] = np.empty_like(input)
#     i: cython.Py_ssize_t
#     j: cython.Py_ssize_t
#     for i in prange(input.shape[0], nogil=True):
#         for j in range(input.shape[1]):
#             output[i, j] = sin(input[i, j])
#     return np.asarray(output)
# ```

# **Proper Cython syntax**:
# ```cython
# # do_sine.pyx
# from cython.parallel cimport prange
# cimport cython
# from libc.math cimport sin
# import numpy as np

# @cython.boundscheck(False)
# @cython.wraparound(False)
# def do_sine(double[:, :] input):
#     cdef double[:, :] output = np.empty_like(input)
#     cdef Py_ssize_t i, j
#     for i in prange(input.shape[0], nogil=True):
#         for j in range(input.shape[1]):
#             output[i, j] = sin(input[i, j])
#     return np.asarray(output)
# ```
# Both versions parallelize the outer loop, working without the GIL.

# ---

# ## 2. Private Variables

# - Loop indices (`i`, `j`) and C scalars become `firstprivate`/`lastprivate` by default.  
# - Each thread has its own copy; reductions combine results after the parallel region.  
# - Typed memoryviews and Python objects remain shared (access controlled by the GIL).

# ---

# ## 3. Reductions

# Automatically infer common reductions (`+=`, `*=`, etc.) on C scalars:

# **Pure-Python**:
# ```python
# from cython.parallel import prange
# import cython
# from libc.math cimport sqrt

# def l2norm(x: cython.double[:]) -> float:
#     total: cython.double = 0
#     i: cython.Py_ssize_t
#     for i in prange(x.shape[0], nogil=True):
#         total += x[i] * x[i]
#     return sqrt(total)
# ```

# **Proper-Cython**:
# ```cython
# from cython.parallel cimport prange
# cimport cython
# from libc.math cimport sqrt

# @cython.boundscheck(False)
# @cython.wraparound(False)
# def l2norm(double[:] x):
#     cdef double total = 0
#     cdef Py_ssize_t i
#     for i in prange(x.shape[0], nogil=True):
#         total += x[i] * x[i]
#     return sqrt(total)
# ```
# Reductions occur in a single OpenMP `reduction(+:total)` clause.

# ---

# ## 4. Parallel Blocks

# Group multiple loops into one `parallel` region to reduce fork/join overhead:

# ```cython
# from cython.parallel cimport parallel, prange
# cimport cython
# from libc.math cimport sqrt

# @cython.boundscheck(False)
# @cython.wraparound(False)
# def normalize(double[:] x):
#     cdef Py_ssize_t i
#     cdef double total = 0, norm
#     with cython.nogil, parallel():
#         for i in prange(x.shape[0]):
#             total += x[i] * x[i]
#         norm = sqrt(total)
#         for i in prange(x.shape[0]):
#             x[i] /= norm
# ```
# Variables assigned in `parallel` become private and are not accessible afterwards.

# ---

# ## 5. Scratch Space per Thread

# Allocate per-thread buffers to avoid data races:

# **Proper-Cython**:
# ```cython
# from cython.parallel cimport parallel, prange
# from libc.stdlib cimport malloc, free
# cimport cython
# import numpy as np

# def median_axis0(double[:, :] x):
#     cdef Py_ssize_t rows = x.shape[0], cols = x.shape[1]
#     cdef double[:] out = np.empty(cols, dtype=np.double)
#     cdef Py_ssize_t i, j
#     cdef double *scratch

#     with cython.nogil, parallel():
#         scratch = <double*>malloc(rows * sizeof(double))
#         try:
#             for i in prange(cols):
#                 for j in range(rows):
#                     scratch[j] = x[j, i]
#                 # compute median from scratch[]
#                 out[i] = scratch[rows // 2]
#         finally:
#             free(scratch)
#     return np.asarray(out)
# ```
# Each thread allocates and frees its own scratch buffer.

# ---

# ## 6. Custom Thread Tasks

# Run different functions on each thread using OpenMP thread IDs:

# ```cython
# from cython.parallel cimport parallel
# from cython.cimports.openmp import omp_get_thread_num
# cimport cython

# @cython.cfunc
# @cython.nogil
# def task1(): ...

# @cython.cfunc
# @cython.nogil
# def task2(): ...

# def do_two_tasks():
#     cdef int thread_num
#     with cython.nogil, parallel(num_threads=2):
#         thread_num = omp_get_thread_num()
#         if thread_num == 0:
#             task1()
#         else:
#             task2()
# ```
# Useful for bootstrapping long-running operations per thread, though data sharing is limited.

# ---

# By choosing the appropriate syntax—pure-Python annotations or Proper Cython style—and using `prange`/`parallel`, you can harness multi-core speedups where data can be processed independently. Always measure to ensure parallel overhead is justified.  
