# Notebook 06 - Inference checks and RAM usage
This notebook runs one inference per saved model in Mes_notebooks and reports peak RSS.
For each model we run the inference + the needing preprocessing steps.


Peak_RSS:

The peak_RSS is the maximum amount of random access memory (RAM) that a program has used at any given time during its execution.

In [1]:
from pathlib import Path
import json
import os
import threading
import time

import psutil

NOTEBOOKS_DIR_OVERRIDE = os.environ.get("NOTEBOOKS_DIR")
if NOTEBOOKS_DIR_OVERRIDE:
    NOTEBOOKS_DIR = Path(NOTEBOOKS_DIR_OVERRIDE).expanduser().resolve()
else:
    HERE = Path.cwd().resolve()
    if (HERE / "Mes_notebooks").is_dir():
        NOTEBOOKS_DIR = HERE / "Mes_notebooks"
    elif HERE.name == "Mes_notebooks":
        NOTEBOOKS_DIR = HERE
    else:
        NOTEBOOKS_DIR = None
        for parent in [HERE] + list(HERE.parents):
            candidate = parent / "Mes_notebooks"
            if candidate.is_dir():
                NOTEBOOKS_DIR = candidate
                break
        if NOTEBOOKS_DIR is None:
            raise FileNotFoundError("Mes_notebooks directory not found; set NOTEBOOKS_DIR manually.")

SAMPLE_TEXT = "I love how friendly this app is!"
MAX_LENGTH = 128

os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")


def human_bytes(num):
    step = 1024.0
    for unit in ["B", "KiB", "MiB", "GiB", "TiB"]:
        if abs(num) < step:
            return f"{num:.1f} {unit}"
        num /= step
    return f"{num:.1f} PiB"


def run_with_peak_rss(func, interval=0.1):
    proc = psutil.Process(os.getpid())
    baseline = proc.memory_info().rss
    peak = baseline
    running = True

    def monitor():
        nonlocal peak, running
        while running:
            try:
                rss = proc.memory_info().rss
            except psutil.Error:
                break
            peak = max(peak, rss)
            time.sleep(interval)

    t = threading.Thread(target=monitor, daemon=True)
    t.start()
    try:
        result = func()
    finally:
        running = False
        t.join()
    return result, peak, peak - baseline


def load_json(path):
    if not path.exists():
        return {}
    return json.loads(path.read_text())

def load_tf_input_example(example_path, tf_module):
    data = load_json(example_path)
    tensors = {}
    for key, value in data.items():
        try:
            tensors[key] = tf_module.convert_to_tensor(value)
        except Exception:
            pass
    return tensors


def record_result(results, name, peak_bytes, delta_bytes, details=None, error=None, extra=None):
    row = {
        "model": name,
        "peak_rss": peak_bytes,
        "peak_rss_human": human_bytes(peak_bytes) if peak_bytes is not None else None,
        "delta_rss": delta_bytes,
        "delta_rss_human": human_bytes(delta_bytes) if delta_bytes is not None else None,
        "details": details,
        "error": str(error) if error else None,
    }
    if extra:
        row.update(extra)
    results.append(row)


results = []


# Model_1_simple: Logistic Regression

In [None]:
import joblib
import json
import re
import string

import numpy as np

try:
    import pandas as pd
except Exception:
    pd = None

try:
    import nltk
    from nltk.corpus import stopwords
    from nltk.stem import PorterStemmer
except Exception:
    nltk = None
    stopwords = None
    PorterStemmer = None

try:
    import emoji as _emoji
except Exception:
    _emoji = None

LOGREG_DIR = NOTEBOOKS_DIR / "Model_1_simple"
logreg_packages = sorted(LOGREG_DIR.glob("logreg_model_package_minmaxscaler"))


def _get_tokenizer():
    if nltk is None:
        return lambda text: re.findall(r"[A-Za-z]+", text)
    try:
        nltk.data.find("tokenizers/punkt")
    except LookupError:
        return lambda text: re.findall(r"[A-Za-z]+", text)
    return nltk.word_tokenize


def _get_stop_words():
    if stopwords is None:
        return set()
    try:
        return set(stopwords.words("english"))
    except LookupError:
        return set()

_TOKENIZE = _get_tokenizer()
_STOP_WORDS = _get_stop_words()
_STEMMER = PorterStemmer() if PorterStemmer is not None else None

EMOTICON_DICT = {
    ':)': 'smile', ':-)': 'smile', ':(': 'sad', ':-(': 'sad',
    ':d': 'laugh', ':-d': 'laugh', ';)': 'wink', ';-)': 'wink',
    ':/': 'annoyed', ':p': 'playful', ':-p': 'playful',
    ":'(": 'cry', 'xd': 'laugh', 't_t': 'cry'
}

CONTRACTIONS = {
    "ain't": "am not", "aren't": "are not", "can't": "cannot",
    "couldn't": "could not", "didn't": "did not", "doesn't": "does not",
    "don't": "do not", "hadn't": "had not", "hasn't": "has not",
    "haven't": "have not", "he'd": "he would", "he'll": "he will",
    "he's": "he is", "i'd": "i would", "i'll": "i will", "i'm": "i am",
    "i've": "i have", "isn't": "is not", "it's": "it is",
    "let's": "let us", "shouldn't": "should not", "that's": "that is",
    "there's": "there is", "they'd": "they would", "they'll": "they will",
    "they're": "they are", "they've": "they have", "wasn't": "was not",
    "we'd": "we would", "we're": "we are", "we've": "we have",
    "weren't": "were not", "what's": "what is", "where's": "where is",
    "who's": "who is", "won't": "will not", "wouldn't": "would not",
    "you'd": "you would", "you'll": "you will", "you're": "you are",
    "you've": "you have", "gonna": "going to", "wanna": "want to"
}

_CONTRACTION_RE = re.compile(r"\b(" + "|".join(map(re.escape, CONTRACTIONS.keys())) + r")\b")


def _expand_contractions(text):
    return _CONTRACTION_RE.sub(lambda m: CONTRACTIONS[m.group(0)], text)


def _convert_emojis(text):
    if _emoji is None:
        return text
    return _emoji.demojize(text, delimiters=(" ", " "))


NEGATIVE_FORMS = [
    r"\bnot\b", r"\bno\b", r"\bnever\b", r"\bnone\b", r"\bnothing\b", r"\bnowhere\b",
    r"\bneither\b", r"\bnor\b", r"\bcannot\b", r"\bwithout\b",
    r"\bdon't\b", r"\bdoesn't\b", r"\bdidn't\b", r"\bcan't\b", r"\bcouldn't\b",
    r"\bwon't\b", r"\bwouldn't\b", r"\bshouldn't\b", r"\bisn't\b", r"\baren't\b",
    r"\bwasn't\b", r"\bweren't\b", r"\bhasn't\b", r"\bhaven't\b", r"\bhadn't\b",
    r"\bain't\b", r"n't\b"
]

_NEG_FORM_PATTERN = re.compile("|".join(NEGATIVE_FORMS), flags=re.IGNORECASE)
_HASHTAG_PATTERN = re.compile(r"#\w+")
_URL_PATTERN = re.compile(r"http[s]?://\S+|www\.\S+")
_MENTION_PATTERN = re.compile(r"@\w+")
_QUESTION_PATTERN = re.compile(r"\?")
_EXCLAMATION_PATTERN = re.compile(r"!")
_ELLIPSIS_PATTERN = re.compile(r"\.\.\.")
_UPPERCASE_PATTERN = re.compile(r"\b(?:[A-Z]{2,}(?:\s+[A-Z]{2,})*)\b")


def preprocess_tweet(text):
    text = str(text).lower()
    text = _expand_contractions(text)

    for emoticon, meaning in EMOTICON_DICT.items():
        text = text.replace(emoticon, f" {meaning} ")

    text = _convert_emojis(text)

    text = re.sub(r'https?://\S+', '', text)
    text = re.sub(r'\bhttps?\b', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'[\?\!\.]', '', text)
    text = re.sub(f"[{re.escape(string.punctuation)}]", " ", text)
    text = re.sub(r'\s+', ' ', text).strip()

    tokens = _TOKENIZE(text)
    tokens = [word for word in tokens if word not in _STOP_WORDS and len(word) > 1]
    if _STEMMER is not None:
        tokens = [_STEMMER.stem(word) for word in tokens]
    return " ".join(tokens)


def extract_numeric_features(text):
    raw = str(text)
    return {
        "text_length": len(raw),
        "word_count": len(raw.split()),
        "neg_form_count": len(_NEG_FORM_PATTERN.findall(raw)),
        "hashtags_count": len(_HASHTAG_PATTERN.findall(raw)),
        "urls_count": len(_URL_PATTERN.findall(raw)),
        "mentions_count": len(_MENTION_PATTERN.findall(raw)),
        "question_count": len(_QUESTION_PATTERN.findall(raw)),
        "exclamation_count": len(_EXCLAMATION_PATTERN.findall(raw)),
        "ellipsis_count": len(_ELLIPSIS_PATTERN.findall(raw)),
        "uppercase_count": len(_UPPERCASE_PATTERN.findall(raw)),
    }


def build_feature_vector(tfidf, feature_columns, sample_text, scaler=None):
    raw_text = str(sample_text)
    numeric = extract_numeric_features(raw_text)
    processed = preprocess_tweet(raw_text)

    tfidf_vec = tfidf.transform([processed]).toarray().ravel()
    tfidf_names = [f"tfidf_{w}" for w in tfidf.get_feature_names_out()]

    col_index = {name: idx for idx, name in enumerate(feature_columns)}
    X = np.zeros((1, len(feature_columns)), dtype=float)

    for key, value in numeric.items():
        idx = col_index.get(key)
        if idx is not None:
            X[0, idx] = value

    for name, value in zip(tfidf_names, tfidf_vec):
        idx = col_index.get(name)
        if idx is not None:
            X[0, idx] = value

    if scaler is not None:
        X_input = X
        if pd is not None and hasattr(scaler, "feature_names_in_"):
            X_input = pd.DataFrame(X, columns=feature_columns)
        X = scaler.transform(X_input)

    return X, {
        "preprocessed_text": processed,
        "numeric_features": numeric,
    }


def infer_logreg(package_dir, sample_text=SAMPLE_TEXT):
    tfidf = joblib.load(package_dir / "tfidf.joblib")
    model = joblib.load(package_dir / "model.joblib")

    feature_columns_path = package_dir / "feature_columns.json"
    if feature_columns_path.exists():
        feature_columns = json.loads(feature_columns_path.read_text())
    else:
        feature_columns = [f"tfidf_{w}" for w in tfidf.get_feature_names_out()]

    scaler_path = package_dir / "scaler.joblib"
    scaler = joblib.load(scaler_path) if scaler_path.exists() else None
    if scaler is not None and hasattr(scaler, "feature_names_in_"):
        feature_columns = list(scaler.feature_names_in_)

    def run_preprocess():
        return build_feature_vector(tfidf, feature_columns, sample_text, scaler=scaler)

    (X, prep_info), pre_peak, pre_delta = run_with_peak_rss(run_preprocess)

    def run_infer():
        X_input = X
        if pd is not None and hasattr(model, "feature_names_in_"):
            X_input = pd.DataFrame(X, columns=feature_columns)
        pred = model.predict(X_input)
        proba = model.predict_proba(X_input) if hasattr(model, "predict_proba") else None
        return pred, proba

    (pred, proba), inf_peak, inf_delta = run_with_peak_rss(run_infer)

    return {
        "pred": pred.tolist(),
        "proba": proba.tolist() if proba is not None else None,
        "preprocessed_text": prep_info["preprocessed_text"],
        "numeric_features": prep_info["numeric_features"],
        "feature_count": len(feature_columns),
    }, pre_peak, pre_delta, inf_peak, inf_delta


if not logreg_packages:
    print("No logreg model packages found.")

for package_dir in logreg_packages:
    try:
        output, pre_peak, pre_delta, inf_peak, inf_delta = infer_logreg(package_dir)
        extra = {
            "preprocess_peak_rss": pre_peak,
            "preprocess_peak_rss_human": human_bytes(pre_peak),
            "preprocess_peak_rss": pre_delta,
            "preprocess_peak_rss_human": human_bytes(pre_peak),
            "inference_peak_rss": inf_peak,
            "inference_peak_rss_human": human_bytes(inf_peak),
            "inference_delta_rss": inf_delta,
            "inference_delta_rss_human": human_bytes(inf_peak),
        }
        record_result(
            results,
            f"logreg:{package_dir.name}",
            inf_peak,
            inf_delta,
            details=output,
            extra=extra,
        )
        print(
            f"{package_dir.name}: pred={output['pred']} preprocess_peak={human_bytes(pre_peak)} "
            f"infer_peak={human_bytes(inf_peak)}"
        )
    except Exception as exc:
        record_result(results, f"logreg:{package_dir.name}", None, None, error=exc)
        print(f"{package_dir.name}: error={exc}")


logreg_model_package_minmaxscaler: pred=[1] preprocess_peak=224.0 MiB infer_peak=225.5 MiB


# Model_2_advanced

In [2]:
import re
from typing import List, Tuple

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import tokenizer_from_json

try:
    from emoji import demojize
    _HAS_EMOJI = True
except Exception:
    _HAS_EMOJI = False

BILSTM_DIR = NOTEBOOKS_DIR / "Model_2_advanced"
bilstm_packages = [
    BILSTM_DIR / "bilstm_w2v_model_package",
    BILSTM_DIR / "bilstm_glove_model_package",
]

# ---------- light, meaning-preserving normalization (from notebook_02) ----------
_EMOTICONS = {
    r":-\)|:\)|=\)|:\]": "smile",
    r":-D|:D|=D": "laugh",
    r":-\(|:\(|=\(|:\[": "sad",
    r":'\(|:'-\(": "cry",
    r";-\)|;\)": "wink",
    r":-P|:P": "playful",
    r":/|:-/": "skeptical",
    r":\*": "kiss",
    r">:\(|>:-\(": "angry",
    r"XD|xD": "laugh",
}

_EMOTICON_REGEXES = [(re.compile(p), w) for p, w in _EMOTICONS.items()]

_URL_RE = re.compile(r"(https?://\S+|www\.\S+)")
_USER_RE = re.compile(r"@\w+")
_NUM_RE = re.compile(r"\b\d+\b")
_HASHTAG_RE = re.compile(r"#(\w+)")
_REPEAT_RE = re.compile(r"(.)\1{3,}")


def _emoticons_to_words(text: str) -> str:
    for rgx, word in _EMOTICON_REGEXES:
        text = rgx.sub(f" {word} ", text)
    return text


def _emojis_to_words(text: str) -> str:
    if not _HAS_EMOJI:
        return text
    text = demojize(text, language="en")
    text = re.sub(r":([a-zA-Z0-9_]+):", lambda m: " " + m.group(1).replace("_", " ") + " ", text)
    return text


def normalize_tweet(t: str) -> str:
    t = t.strip().lower()
    t = _URL_RE.sub(" <URL> ", t)
    t = _USER_RE.sub(" <USER> ", t)
    t = _NUM_RE.sub(" <NUM> ", t)
    t = _emoticons_to_words(t)
    t = _emojis_to_words(t)
    t = _HASHTAG_RE.sub(lambda m: f" #{m.group(1)} {m.group(1)} ", t)
    t = _REPEAT_RE.sub(r"\1\1\1", t)
    t = re.sub(r"\s+", " ", t).strip()
    return t


def preprocess_test(test_texts: List[str], tokenizer, max_len: int) -> Tuple[List[List[int]], List[List[int]]]:
    norm = [normalize_tweet(t) for t in test_texts]
    seqs = tokenizer.texts_to_sequences(norm)
    padded = pad_sequences(seqs, maxlen=max_len, padding="post", truncating="post")
    masks = (padded != 0).astype("int32").tolist()
    return padded, masks


def load_tokenizer(tokenizer_path):
    if not tokenizer_path.exists():
        raise FileNotFoundError(f"Missing tokenizer.json in {tokenizer_path.parent}")
    return tokenizer_from_json(tokenizer_path.read_text())


def get_max_len(example_path, default=50):
    example = load_json(example_path)
    token_ids = example.get("token_ids")
    if isinstance(token_ids, list) and token_ids and isinstance(token_ids[0], list):
        return len(token_ids[0])
    return default


def prepare_bilstm_inputs(package_dir, texts):
    tokenizer_path = package_dir / "tokenizer.json"
    example_path = package_dir / "input_example.json"
    tokenizer = load_tokenizer(tokenizer_path)
    max_len = get_max_len(example_path)
    tokens, masks = preprocess_test(texts, tokenizer, max_len)
    return tokens, masks, {
        "max_len": max_len,
    }


def infer_bilstm(package_dir, sample_text=SAMPLE_TEXT, prefer_keras=True):
    saved_model_dir = package_dir / "saved_model"
    keras_path = package_dir / "keras_model" / "model.keras"

    def run_preprocess():
        return prepare_bilstm_inputs(package_dir, [sample_text])

    (tokens, masks, prep_info), pre_peak, pre_delta = run_with_peak_rss(run_preprocess)

    def run_infer():
        import tensorflow as tf

        try:
            tf.config.set_visible_devices([], "GPU")
        except Exception:
            pass

        if prefer_keras and keras_path.exists():
            model = tf.keras.models.load_model(keras_path)
            x = tf.convert_to_tensor(tokens, dtype=tf.int32)
            out = model(x)
            shape = getattr(out, "shape", None)
            if hasattr(shape, "as_list"):
                shape = shape.as_list()
            elif shape is not None:
                shape = list(shape)
            return {
                "backend": "keras",
                "output_shape": shape,
                "max_len": prep_info["max_len"],
            }

        if saved_model_dir.exists():
            model = tf.saved_model.load(saved_model_dir)
            infer = model.signatures.get("serving_default")
            if infer is None:
                raise RuntimeError("No serving_default signature.")

            expected = infer.structured_input_signature[1]
            feed = {}
            for name, spec in expected.items():
                if name in {"token_ids", "input_ids", "input", "input_1", "keras_tensor_8"}:
                    feed[name] = tf.convert_to_tensor(tokens, dtype=spec.dtype or tf.int32)
                elif "mask" in name and masks is not None:
                    feed[name] = tf.convert_to_tensor(masks, dtype=spec.dtype or tf.int32)

            if not feed and len(expected) == 1:
                name, spec = next(iter(expected.items()))
                feed[name] = tf.convert_to_tensor(tokens, dtype=spec.dtype or tf.int32)

            if not feed:
                for name, spec in expected.items():
                    shape = [1 if dim is None else dim for dim in spec.shape]
                    feed[name] = tf.zeros(shape, dtype=spec.dtype)

            out = infer(**feed)
            return {
                "backend": "saved_model",
                "output_keys": list(out.keys()),
                "max_len": prep_info["max_len"],
            }

        raise RuntimeError("No keras or saved_model found.")

    output, inf_peak, inf_delta = run_with_peak_rss(run_infer)
    return output, pre_peak, pre_delta, inf_peak, inf_delta


for package_dir in bilstm_packages:
    if not package_dir.exists():
        print(f"Missing package: {package_dir}")
        continue
    try:
        output, pre_peak, pre_delta, inf_peak, inf_delta = infer_bilstm(package_dir, prefer_keras=True)
        extra = {
            "preprocess_peak_rss": pre_peak,
            "preprocess_peak_rss_human": human_bytes(pre_peak),
            "preprocess_delta_rss": pre_delta,
            "preprocess_delta_rss_human": human_bytes(pre_delta),
            "inference_peak_rss": inf_peak,
            "inference_peak_rss_human": human_bytes(inf_peak),
            "inference_delta_rss": inf_delta,
            "inference_delta_rss_human": human_bytes(inf_delta),
        }
        record_result(results, f"bilstm:{package_dir.name}", inf_peak, inf_delta, details=output, extra=extra)
        print(
            f"{package_dir.name}: backend={output['backend']} preprocess_peak={human_bytes(pre_peak)} "
            f"infer_peak={human_bytes(inf_peak)}"
        )
    except Exception as exc:
        record_result(results, f"bilstm:{package_dir.name}", None, None, error=exc)
        print(f"{package_dir.name}: error={exc}")


2026-01-05 10:24:30.682437: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-05 10:24:30.713813: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-05 10:24:33.001030: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2026-01-05 10:24:33.344905: E tensorflow/core/util/util.cc:131] oneDNN supports DT_BOOL only on platforms with AVX-512. Falling back to the default Eigen-based imple

bilstm_w2v_model_package: backend=keras preprocess_peak=731.2 MiB infer_peak=998.2 MiB
bilstm_glove_model_package: backend=keras preprocess_peak=929.5 MiB infer_peak=930.0 MiB


# Model_3_USE

In [2]:
import re

try:
    from emoji import demojize
    _HAS_EMOJI = True
except Exception:
    _HAS_EMOJI = False

USE_DIR = NOTEBOOKS_DIR / "Model_3_USE" / "use_model_package"

# ---------- cleaning text (from notebook_03) ----------
_EMOTICONS = {
    r":-\)|:\)|=\)|:\]": "smile",
    r":-D|:D|=D": "laugh",
    r":-\(|:\(|=\(|:\[": "sad",
    r":'\(|:'-\(": "cry",
    r";-\)|;\)": "wink",
    r":-P|:P": "playful",
    r":/|:-/": "skeptical",
    r":\*": "kiss",
    r">:\(|>:-\(": "angry",
    r"XD|xD": "laugh",
}

_EMOTICON_REGEXES = [(re.compile(p), w) for p, w in _EMOTICONS.items()]

_URL_RE = re.compile(r"(https?://\S+|www\.\S+)")
_USER_RE = re.compile(r"@\w+")
_NUM_RE = re.compile(r"\b\d+\b")
_HASHTAG_RE = re.compile(r"#(\w+)")
_REPEAT_RE = re.compile(r"(.)\1{3,}")


def _emoticons_to_words(text: str) -> str:
    for rgx, word in _EMOTICON_REGEXES:
        text = rgx.sub(f" {word} ", text)
    return text


def _emojis_to_words(text: str) -> str:
    if not _HAS_EMOJI:
        return text
    text = demojize(text, language="en")
    text = re.sub(r":([a-zA-Z0-9_]+):", lambda m: " " + m.group(1).replace("_", " ") + " ", text)
    return text


def normalize_tweet(t: str) -> str:
    t = t.strip().lower()
    t = _URL_RE.sub(" <URL> ", t)
    t = _USER_RE.sub(" <USER> ", t)
    t = _NUM_RE.sub(" <NUM> ", t)
    t = _emoticons_to_words(t)
    t = _emojis_to_words(t)
    t = _HASHTAG_RE.sub(lambda m: f" #{m.group(1)} {m.group(1)} ", t)
    t = _REPEAT_RE.sub(r"\1\1\1", t)
    t = re.sub(r"\s+", " ", t).strip()
    return t


def load_tf_input_example(example_path, tf_module):
    data = load_json(example_path)
    tensors = {}
    for key, value in data.items():
        try:
            tensors[key] = tf_module.convert_to_tensor(value)
        except Exception:
            pass
    return tensors


def infer_use(package_dir):
    saved_model_dir = package_dir / "saved_model"
    example_path = package_dir / "input_example.json"

    def run_preprocess():
        raw = SAMPLE_TEXT
        normalized = normalize_tweet(raw)
        return normalized

    normalized_text, pre_peak, pre_delta = run_with_peak_rss(run_preprocess)

    def run_infer():
        import tensorflow as tf

        try:
            tf.config.set_visible_devices([], "GPU")
        except Exception:
            pass

        model = tf.saved_model.load(saved_model_dir)
        infer = model.signatures.get("serving_default")
        if infer is None:
            raise RuntimeError("No serving_default signature.")

        example_inputs = load_tf_input_example(example_path, tf)
        expected = infer.structured_input_signature[1]
        feed = {}
        for name, spec in expected.items():
            if name in example_inputs:
                feed[name] = tf.convert_to_tensor(example_inputs[name], dtype=spec.dtype)
            elif name == "input_1":
                feed[name] = tf.convert_to_tensor([normalized_text], dtype=spec.dtype or tf.string)

        if not feed:
            for name, spec in expected.items():
                shape = [1 if dim is None else dim for dim in spec.shape]
                if spec.dtype == tf.string:
                    feed[name] = tf.convert_to_tensor([normalized_text], dtype=tf.string)
                else:
                    feed[name] = tf.zeros(shape, dtype=spec.dtype)

        out = infer(**feed)
        return {
            "output_keys": list(out.keys()),
        }

    output, inf_peak, inf_delta = run_with_peak_rss(run_infer)
    return output, pre_peak, pre_delta, inf_peak, inf_delta


if not USE_DIR.exists():
    print(f"Missing package: {USE_DIR}")
else:
    try:
        output, pre_peak, pre_delta, inf_peak, inf_delta = infer_use(USE_DIR)
        extra = {
            "preprocess_peak_rss": pre_peak,
            "preprocess_peak_rss_human": human_bytes(pre_peak),
            "preprocess_delta_rss": pre_delta,
            "preprocess_delta_rss_human": human_bytes(pre_delta),
            "inference_peak_rss": inf_peak,
            "inference_peak_rss_human": human_bytes(inf_peak),
            "inference_delta_rss": inf_delta,
            "inference_delta_rss_human": human_bytes(inf_delta),
        }
        record_result(results, "use:use_model_package", inf_peak, inf_delta, details=output, extra=extra)
        print(f"use_model_package: preprocess_peak={human_bytes(pre_peak)} infer_peak={human_bytes(inf_peak)}")
    except Exception as exc:
        record_result(results, "use:use_model_package", None, None, error=exc)
        print(f"use_model_package: error={exc}")


2026-01-05 10:22:25.064971: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-05 10:22:25.610568: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


use_model_package: preprocess_peak=64.9 MiB infer_peak=742.9 MiB


2026-01-05 10:22:29.707779: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


# Model_4_DISTILBERT

In [4]:
import re

DISTIL_DIR = NOTEBOOKS_DIR / "Model_4_DISTILBERT" / "distilbert_model_package"
TOKENIZER_DIR = DISTIL_DIR / "tokenizer"
HF_MODEL_DIR = DISTIL_DIR / "hf_model"

def normalize_tweet(t: str) -> str:
    return re.sub(r"\s+", " ", t.strip())

def infer_distilbert(sample_text=SAMPLE_TEXT):
    def run():
        os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
        import torch
        from transformers import AutoModelForSequenceClassification, AutoTokenizer

        torch.set_grad_enabled(False)
        tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, use_fast=True)
        model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL_DIR)
        model.eval()
        clean_text = normalize_tweet(sample_text)
        inputs = tokenizer(
            clean_text,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=MAX_LENGTH,
        )
        outputs = model(**inputs)
        logits = outputs.logits.detach().cpu().numpy()
        return {
            "logits": logits.tolist(),
            "normalized_text": clean_text,
        }

    return run_with_peak_rss(run)

if not HF_MODEL_DIR.exists():
    print(f"Missing package: {DISTIL_DIR}")
else:
    try:
        output, peak, delta = infer_distilbert()
        record_result(results, "distilbert:hf_model", peak, delta, details=output)
        print(f"distilbert hf_model: peak_rss={human_bytes(peak)}")
    except Exception as exc:
        record_result(results, "distilbert:hf_model", None, None, error=exc)
        print(f"distilbert hf_model: error={exc}")

2026-01-05 10:16:22.744611: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-05 10:16:22.771271: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


distilbert hf_model: peak_rss=1.2 GiB


# Model_4_DISTILBERT_onnx_int8

In [2]:
import re
import numpy as np

ONNX_DIR = NOTEBOOKS_DIR / "Model_4_DISTILBERT_quant"
ONNX_PATH = ONNX_DIR / "model-int8-static.onnx"
TOKENIZER_DIR = NOTEBOOKS_DIR / "Model_4_DISTILBERT" / "distilbert_model_package" / "tokenizer"

def normalize_tweet(t: str) -> str:
    return re.sub(r"\s+", " ", t.strip())

def infer_onnx(sample_text=SAMPLE_TEXT):
    def run():
        import onnxruntime as ort
        from transformers import AutoTokenizer

        tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, use_fast=True)
        clean_text = normalize_tweet(sample_text)
        encoded = tokenizer(
            clean_text,
            return_tensors="np",
            padding="max_length",
            truncation=True,
            max_length=MAX_LENGTH,
        )
        session = ort.InferenceSession(str(ONNX_PATH), providers=["CPUExecutionProvider"])
        feed = {}
        for input_meta in session.get_inputs():
            name = input_meta.name
            if name in encoded:
                feed[name] = encoded[name]
            elif name == "token_type_ids":
                feed[name] = np.zeros_like(encoded["input_ids"])
        outputs = session.run(None, feed)
        shapes = [getattr(out, "shape", None) for out in outputs]
        return {
            "output_shapes": shapes,
            "normalized_text": clean_text,
        }

    return run_with_peak_rss(run)

if not ONNX_PATH.exists():
    print(f"Missing package: {ONNX_PATH}")
else:
    try:
        output, peak, delta = infer_onnx()
        record_result(results, "onnx:model-int8-static", peak, delta, details=output)
        print(f"onnx model-int8-static: peak_rss={human_bytes(peak)}")
    except Exception as exc:
        record_result(results, "onnx:model-int8-static", None, None, error=exc)
        print(f"onnx model-int8-static: error={exc}")

  from .autonotebook import tqdm as notebook_tqdm


onnx model-int8-static: peak_rss=665.2 MiB


#  Model_4_DISTILBERT_model-int8-dynamic

In [3]:
import re
import numpy as np

try:
    ONNX_DIR
except NameError:
    ONNX_DIR = NOTEBOOKS_DIR / "Model_4_DISTILBERT_quant"

try:
    TOKENIZER_DIR
except NameError:
    TOKENIZER_DIR = NOTEBOOKS_DIR / "Model_4_DISTILBERT" / "distilbert_model_package" / "tokenizer"

try:
    normalize_tweet
except NameError:
    def normalize_tweet(t: str) -> str:
        return re.sub(r"\s+", " ", t.strip())

ONNX_DYNAMIC_PATH = ONNX_DIR / "model-int8-dynamic.onnx"


def infer_onnx_dynamic(sample_text=SAMPLE_TEXT):
    def run():
        import onnxruntime as ort
        from transformers import AutoTokenizer

        tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, use_fast=True)
        clean_text = normalize_tweet(sample_text)
        encoded = tokenizer(
            clean_text,
            return_tensors="np",
            padding="max_length",
            truncation=True,
            max_length=MAX_LENGTH,
        )
        session = ort.InferenceSession(str(ONNX_DYNAMIC_PATH), providers=["CPUExecutionProvider"])
        feed = {}
        for input_meta in session.get_inputs():
            name = input_meta.name
            if name in encoded:
                feed[name] = encoded[name]
            elif name == "token_type_ids":
                feed[name] = np.zeros_like(encoded["input_ids"])
        outputs = session.run(None, feed)
        shapes = [getattr(out, "shape", None) for out in outputs]
        return {
            "output_shapes": shapes,
            "normalized_text": clean_text,
        }

    return run_with_peak_rss(run)

if not ONNX_DYNAMIC_PATH.exists():
    print(f"Missing package: {ONNX_DYNAMIC_PATH}")
else:
    try:
        output, peak, delta = infer_onnx_dynamic()
        record_result(results, "onnx:model-int8-dynamic", peak, delta, details=output)
        print(f"onnx model-int8-dynamic: peak_rss={human_bytes(peak)}")
    except Exception as exc:
        record_result(results, "onnx:model-int8-dynamic", None, None, error=exc)
        print(f"onnx model-int8-dynamic: error={exc}")


onnx model-int8-dynamic: peak_rss=660.8 MiB
