In [None]:
%load_ext blackcellmagic

# Why Rust extensions?

* At some point you might need the speed of native extensions
* Can be a successful combination
  * numpy
  * pillow
* Rust
  * safety vs speed, choose both
  * also a lot of niceties of modern higher level languages

# Load some data

In [1]:
import os
import re


def read_data(path: str) -> str:
    with open(path) as f:
        text = f.read()
    return text


def normalize(text: str) -> str:
    lowercase_and_ws = re.compile(r"[^a-z\s]")
    return re.sub(lowercase_and_ws, "", text.lower())


assert normalize("Julius,,,, Caesar") == "julius caesar"

text_corpus = normalize(read_data("data/gallic_wars.txt")) * 1500
# Make dataset 5 times bigger
# text_corpus = normalize(read_data("data/gallic_wars.txt")) * 7500

In [5]:
len(text_corpus)

725827500

# Count a word

In [3]:
def count_word(text_corpus: str, searched_word: str) -> int:
    return len([word for word in text_corpus.split() if word == searched_word])

In [6]:
def count_word(text_corpus: str, searched_word: str) -> int:
    return text_corpus.count(searched_word)

In [8]:
from collections import Counter

def count_word(text_corpus: str, searched_word: str) -> int:
    return Counter(text_corpus.split())[searched_word]

In [7]:
%time count_word(text_corpus, "marcus")

CPU times: user 509 ms, sys: 544 µs, total: 509 ms
Wall time: 506 ms


22500

# Pandas

In [4]:
import pandas as pd

def pandas_count_word(text_corpus: str, searched_word: str) -> int:
    series = pd.Series(text_corpus.split())
    return sum(series.isin([searched_word]))

In [5]:
%time pandas_count_word(text_corpus, "marcus")

CPU times: user 28.1 s, sys: 2.99 s, total: 31.1 s
Wall time: 31 s


22500

In [7]:
series = pd.Series(text_corpus.split())
searched_word = "marcus"
%time sum(series == searched_word)

CPU times: user 15.2 s, sys: 30.8 ms, total: 15.2 s
Wall time: 15.2 s


22500

# Rust

In [4]:
%%bash
cd word_count_rust/
cargo build --release

   Compiling word_count_rust v0.1.0 (/home/a/pyg/rust_extensions/word_count_rust)
    Finished release [optimized] target(s) in 0.77s


In [5]:
del rust_lib

In [2]:
import ctypes

rust_lib = ctypes.cdll.LoadLibrary("word_count_rust/target/release/libword_count_rust.so")

In [3]:
%time rust_lib.word_count(text_corpus.encode(), "marcus".encode())

CPU times: user 4.56 s, sys: 208 ms, total: 4.77 s
Wall time: 1.05 s


22500

### Make it parallel

In [4]:
%%bash
cd word_count_rust/
cargo build --release

   Compiling word_count_rust v0.1.0 (/home/a/pyg/rust_extensions/word_count_rust)
    Finished release [optimized] target(s) in 0.76s


In [None]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

In [None]:
import ctypes

rust_lib = ctypes.cdll.LoadLibrary("word_count_rust/target/release/libword_count_rust.so")

In [None]:
%time rust_lib.word_count(text_corpus.encode(), "marcus".encode())

# More complex use cases - PyO3

* Who frees the data?
  * http://jakegoulding.com/rust-ffi-omnibus/string_return/
* Use the Python C-API?
  * https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_FromString
* Thin Ice
  * https://docs.python.org/3/extending/extending.html#thin-ice
* Python classes
  * https://github.com/PyO3/pyo3/blob/master/examples/word-count/src/lib.rs
  * https://github.com/PyO3/pyo3/blob/master/examples/word-count/tests/test_word_count.py

In [2]:
!cp word_count_rust/target/release/libword_count_rust.so lib_word_count_pyo3.so
import lib_word_count_pyo3 as rust_lib_pyo3

In [3]:
%time rust_lib_pyo3.word_count_pyo3(text_corpus, "marcus")

CPU times: user 4.29 s, sys: 0 ns, total: 4.29 s
Wall time: 686 ms


22500

# Package it

https://github.com/PyO3/maturin
Can be used with cffi, PyO3 and rust-cpython

In [4]:
%%bash
pip install maturin
pip install cffi
cd word_count_rust/
maturin build -b cffi --release

🐍 Using CPython 3.7m at python to generate the cffi bindings
📦 Built wheel to /home/a/pyg/rust_extensions/word_count_rust/target/wheels/word_count_rust-0.1.0-py2.py3-none-manylinux1_x86_64.whl


   Compiling pyo3 v0.8.3
   Compiling word_count_rust v0.1.0 (/home/a/pyg/rust_extensions/word_count_rust)
    Finished release [optimized] target(s) in 4.62s


In [5]:
!pip install -I word_count_rust/target/wheels/word_count_rust-0.1.0-py2.py3-none-manylinux1_x86_64.whl

Processing ./word_count_rust/target/wheels/word_count_rust-0.1.0-py2.py3-none-manylinux1_x86_64.whl
Installing collected packages: word-count-rust
Successfully installed word-count-rust-0.1.0


In [6]:
from word_count_rust import lib as rust_lib

In [7]:
%time rust_lib.word_count(text_corpus.encode(), "marcus".encode())

CPU times: user 4.53 s, sys: 232 ms, total: 4.76 s
Wall time: 1.06 s


22500

... and build wheels for all platforms
* https://github.com/kngwyu/rogue-gym/blob/master/azure-pipelines.yml

# Universal runtime with web assembly?

* https://hacks.mozilla.org/2019/08/webassembly-interface-types/


* https://github.com/CraneStation/wasmtime-demos/tree/master/python
* https://github.com/bytecodealliance/wasmtime/tree/master/crates/misc/py
* https://github.com/wasmerio/python-ext-wasm

# Notes

## Not so nice
  * fighting with the borrow checker
     * https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html#lifetime-annotations-in-function-signatures
      * noisy syntax with lifetimes
  * strings
    * https://doc.rust-lang.org/book/ch08-02-strings.html

## Nice
  * Iterators
  * Easy parallelize with rayon
  * Error handling, Option, Result
    * https://doc.rust-lang.org/rust-by-example/error/result/enter_question_mark.html
  * No overhead (no gc), you can go as low as C, low memory footprint
    * interesting for wasm, embedded
  * guaranteed memory safety
  * zero cost abstractions
  * "fearless concurrency"
    * many parallel/concurrency problems are compile time errors
    * no data races
  * Tooling
    * helpful compiler errors
    * cargo
    * rustfmt
    * clippy
    * cargo doc
    * tests run parallel by default