In [1]:
from typing import TYPE_CHECKING


if TYPE_CHECKING:
    from math_rag.application.containers import ApplicationContainer
    from math_rag.infrastructure.containers import InfrastructureContainer

    application_container: ApplicationContainer
    infrastructure_container: InfrastructureContainer

In [2]:
RESET = False
%load_ext hooks.notebook_hook

2025-07-13 16:25:59,660 - INFO - datasets - config.py:54 - PyTorch version 2.6.0 available.


## Setup

In [3]:
math_expression_description_writer_assistant = (
    application_container.math_expression_description_writer_assistant()
)
math_expression_description_optimizer_assistant = (
    application_container.math_expression_description_optimizer_assistant()
)
math_expression_comparator_assistant = application_container.math_expression_comparator_assistant()
math_expression_relationship_description_writer_assistant = (
    application_container.math_expression_relationship_description_writer_assistant()
)
math_expression_relationship_detector_assistant = (
    application_container.math_expression_relationship_detector_assistant()
)

default_embedder = application_container.default_embedder()
math_expression_description_opt_embedding_repository = (
    infrastructure_container.math_expression_description_opt_embedding_repository()
)
math_expression_description_opt_repository = (
    infrastructure_container.math_expression_description_opt_repository()
)
math_expression_description_repository = (
    infrastructure_container.math_expression_description_repository()
)
math_expression_group_repository = infrastructure_container.math_expression_group_repository()
math_expression_group_graph_repository = (
    await infrastructure_container.math_expression_group_graph_repository()
)
math_expression_repository = infrastructure_container.math_expression_repository()
grouper_service = application_container.grouper_service()

math_expression_graph_repository = await infrastructure_container.math_expression_graph_repository()
math_expression_relationship_repository = (
    infrastructure_container.math_expression_relationship_repository()
)
math_expression_relationship_description_repository = (
    infrastructure_container.math_expression_relationship_description_repository()
)
katex_corrector_service = application_container.katex_corrector_service()
math_expression_context_repository = infrastructure_container.math_expression_context_repository()
math_article_chunk_repository = infrastructure_container.math_article_chunk_repository()
math_expression_index_repository = infrastructure_container.math_expression_index_repository()

In [4]:
from pathlib import Path

from math_rag.core.models import MathArticle


google_drive_repository = infrastructure_container.google_drive_repository()
math_article_parser_service = infrastructure_container.math_article_parser_service()

file_id = google_drive_repository.get_file_id(
    Path('ml/lectures/L07-LogisticRegression2/2024_08_10_2174b40686820b4cb591g.tex')
)

if not file_id:
    raise ValueError()

file_content = google_drive_repository.get_file_by_id(file_id)

math_article = MathArticle(
    math_expression_dataset_id=None,
    math_expression_index_id=None,
    name='article',
    bytes=file_content.getvalue(),
)

2025-07-13 15:43:40,565 - INFO - googleapiclient.discovery_cache - __init__.py:49 - file_cache is only supported with oauth2client<4.0.0


In [5]:
from uuid import UUID

from math_rag.application.utils import InputCreatorUtil
from math_rag.core.models import MathExpressionIndex
from math_rag.infrastructure.utils import (
    TemplateChunkerUtil,
    TemplateContextChunkerUtil,
    TemplateFormatterUtil,
    TemplateIndexFinderUtil,
)


index = MathExpressionIndex()
index.id

UUID('91ad04fa-05f3-4250-8041-e04efb492515')

In [6]:
# index = MathExpressionIndex(id=UUID('d7672957-dc0d-4f9d-8fbb-da2d91cb0dc2'))
# index.id

In [53]:
index_id_to_remove = index.id
common_filter = dict(math_expression_index_id=index_id_to_remove)

await math_expression_index_repository.delete_one(filter=dict(id=index_id_to_remove))

await math_expression_repository.delete_many(filter=common_filter.copy())
await math_expression_context_repository.delete_many(filter=common_filter.copy())
await math_expression_description_repository.delete_many(filter=common_filter.copy())
await math_expression_description_opt_repository.delete_many(filter=common_filter.copy())
await math_expression_group_repository.delete_many(filter=common_filter.copy())
await math_article_chunk_repository.delete_many(filter=common_filter.copy())
await math_expression_relationship_repository.delete_many(filter=common_filter.copy())
await math_expression_relationship_description_repository.delete_many(filter=common_filter.copy())
await math_expression_description_opt_embedding_repository.clear()
await math_expression_graph_repository.clear()
await math_expression_group_graph_repository.clear()



## Nodes

### 1. MathExpression, requires: MathArticle

In [6]:
from math_rag.core.models import MathExpression


math_nodes, _, template = math_article_parser_service.parse_for_index(math_article)
math_nodes.sort(key=lambda x: x.position)

katexes = [math_node.latex.strip('$') for math_node in math_nodes]
valid_katexes = await katex_corrector_service.correct(katexes, max_num_retries=3)
math_expressions = [
    MathExpression(
        math_article_id=math_article.id,
        math_expression_dataset_id=None,
        math_expression_group_id=None,
        math_expression_index_id=index.id,
        latex=node.latex,
        katex=katex.strip(),
        index=i,
        position=node.position,
        is_inline=node.is_inline,
    )
    for i, (node, katex) in enumerate(zip(math_nodes, valid_katexes))
]
await math_expression_repository.insert_many(math_expressions)

In [7]:
print(template)


Jan Šnajder, lectures, v2.0

Last time we introduced the logistic regression algorithm. We defined the model and derived the cross-entropy error function as the negative probability of the labels in the training set. We established that minimizing that error had no solution in closed form, so we turned to iterative procedures. We have considered the simplest such procedure, the gradient descent algorithm, and we applied it to logistic regression, in standard (batch) and stochastic variant. In the end, we talked about regularization, specifically [math_placeholder | 0] regularization, which we incorporated quite straightforwardly into the optimization process.

Today we'll talk a bit more about logistic regression. First, we'll consider some more efficient (read: faster) alternatives to gradient descent. Second, we'll consider the extension of binary logistic regression to multiclass logistic regression. Third, we'll look at all the models discussed thus far and see what they have in c

### 2. MathExpressionContext, requires MathExpression

In [8]:
context_templates = TemplateContextChunkerUtil.chunk(template, max_context_size=1000)
assert len(context_templates) == len(math_expressions)

print(context_templates[1])

above consideration, we can conclude that the batch gradient descent could be improved if we take into account not only the slope (gradient) but also the curvature (the change in gradient, i.e., the second derivative) of the error function. Such optimization methods are referred to as second-order optimization, as opposed to first-order optimization methods, such as gradient descent. The basic second-order optimization method is the Newton's method.


Consider minimization of function [math_placeholder | 1]. We know that the parameter update in gradient descent is as follows:

[math_placeholder | 2]

If we introduce an index for the iterations, then we can write this as an equation:

[math_placeholder | 3]

The idea with Newton's method is to take the point [math_placeholder | 4] (the current minimum) and compute at it the quadratic approximation of the function [math_placeholder | 5], and then move to the minimizer of this quadratic approximation (which is known analytically). If [mat

In [9]:
from math_rag.core.models import MathExpressionContext


index_to_katex = {
    math_expression.index: math_expression.katex for math_expression in math_expressions
}
math_expression_contexts: list[MathExpressionContext] = []

for math_expression, context_template in zip(math_expressions, context_templates):
    formatted_context, _ = TemplateFormatterUtil.format(
        context_template, index_to_katex, omit_wrapper=False
    )
    math_expression_context = MathExpressionContext(
        math_article_id=math_article.id,
        math_expression_id=math_expression.id,
        math_expression_index_id=index.id,
        text=formatted_context,
    )
    math_expression_contexts.append(math_expression_context)

await math_expression_context_repository.insert_many(math_expression_contexts)

### 3. MathExpressionDescription, requires MathExpression, MathExpressionContext

In [10]:
from math_rag.application.models.assistants.inputs import (
    MathExpressionDescriptionWriter as AssistantInput,
)
from math_rag.core.models import MathExpressionDescription
from math_rag.infrastructure.constants.services import MATH_TEMPLATE


inputs: list[AssistantInput] = []
input_id_to_math_expression_id: dict[UUID, UUID] = {}

for math_expression, math_expression_context in zip(math_expressions, math_expression_contexts):
    input = AssistantInput(
        katex=MATH_TEMPLATE.format(katex=math_expression.katex, index=math_expression.index),
        context=math_expression_context.text,
    )
    inputs.append(input)
    input_id_to_math_expression_id[input.id] = math_expression.id

outputs = await math_expression_description_writer_assistant.concurrent_assist(inputs)
math_expression_descriptions = [
    MathExpressionDescription(
        math_expression_index_id=index.id,
        math_expression_id=input_id_to_math_expression_id[output.input_id],
        text=output.description,
    )
    for output in outputs
]
await math_expression_description_repository.insert_many(math_expression_descriptions)

In [11]:
for x in math_expression_descriptions:
    print(x.id)
    print(x.text)
    print('-----')
    print()

ca3e0217-7435-428f-b730-548872b6fcb9
the point about which the Taylor series expansion of a differentiable function is performed
-----

1cca5ec7-5406-480f-b0c8-8d20d5601030
the value of a general differentiable function evaluated at the variable x
-----

4fa1f4b5-66c0-4c72-a0f9-46f876035c4f
The number of examples in the training set.
-----

ae67cc28-59b1-4ed6-a289-85933dbf9137
the black curve representing the function of a single variable that is being minimized in the context of Newton's method
-----

40de932a-3537-479b-a354-9321d97b13e6
The error function that depends on the vector w and is conditioned on the dataset D.
-----

082446e9-48b4-4cb1-bf39-2638a9aa62b1
The number of dimensions of the feature space.
-----

77abde18-8d84-4cb8-bbf0-c67858d23cd4
the initial point from which the minimization process starts in the context of minimizing a function of one variable using Newton's method
-----

f6a6a73b-f275-43ad-8a0d-5956cca28a04
the point at which the quadratic approximation of th

### 4. MathExpressionDescriptionOpt, requires: MathExpressionDescription

In [12]:
from math_rag.application.models.assistants.inputs import (
    MathExpressionDescriptionOptimizer as AssistantInput,
)
from math_rag.core.models import MathExpressionDescriptionOpt


inputs, input_id_to_math_expression_description = InputCreatorUtil.create(
    math_expression_descriptions, lambda x: AssistantInput(description=x.text)
)
outputs = await math_expression_description_optimizer_assistant.concurrent_assist(inputs)
math_expression_descriptions_opt = [
    MathExpressionDescriptionOpt(
        math_expression_id=input_id_to_math_expression_description[
            output.input_id
        ].math_expression_id,
        math_expression_description_id=input_id_to_math_expression_description[output.input_id].id,
        math_expression_index_id=index.id,
        text=output.description,
    )
    for output in outputs
]

In [13]:
for x in math_expression_descriptions_opt:
    print(x.math_expression_description_id)
    print(x.text)
    print('-----')
    print()

4fa1f4b5-66c0-4c72-a0f9-46f876035c4f
Number of examples in the training set.
-----

082446e9-48b4-4cb1-bf39-2638a9aa62b1
Number of dimensions in the feature space.
-----

bf9900fe-ea6c-4ce1-9fbd-e34c754376ca
Value of the parameter vector at iteration t in an iterative optimization process.
-----

b2f94925-5508-4400-89fb-a2f8b15f840d
total number of classes in multinomial logistic regression model
-----

34a96b49-6c94-43b4-a80f-762fedc7fbbf
Point in n-dimensional space where function f is evaluated and Hessian matrix is computed.
-----

68e6418c-1209-4b4d-bd30-14dc913dc533
Hessian matrix in Newton's optimization method determines the curvature of the function being minimized.
-----

40de932a-3537-479b-a354-9321d97b13e6
Error function dependent on vector w, conditioned on dataset D.
-----

fee0f857-90da-428f-987d-811d3ad1f34e
Function mapping an n-dimensional real vector to a real-valued scalar output.
-----

1cca5ec7-5406-480f-b0c8-8d20d5601030
value of a differentiable function at vari

In [14]:
from more_itertools import unzip

from math_rag.application.models.embedders import EmbedderInput


inputs, input_id_to_item = InputCreatorUtil.create(
    math_expression_descriptions_opt, lambda x: EmbedderInput(text=x.text)
)
outputs = await default_embedder.concurrent_embed(inputs)
descriptions, embeddings = unzip(
    (input_id_to_item[output.input_id], output.embedding) for output in outputs
)
descriptions, embeddings = list(descriptions), list(embeddings)
await math_expression_description_opt_repository.insert_many(descriptions)
await math_expression_description_opt_embedding_repository.upsert_many(descriptions, embeddings)

### 5. MathExpressionGroup, requires: MathExpressionDescription, MathExpressionContext

In [15]:
grouped_descriptions = await math_expression_description_opt_embedding_repository.group(
    grouper_service.group
)

In [16]:
from qdrant_client.http.models import Record


grouped_records: list[list[Record]] = []

for descriptions in grouped_descriptions:
    ids = [x.id for x in descriptions]
    records = await math_expression_description_opt_embedding_repository.client.retrieve(
        collection_name=math_expression_description_opt_embedding_repository.collection_name,
        ids=[str(id) for id in ids],
        with_payload=True,
        with_vectors=True,
    )

    for record in records:
        # remove some data for a clener diagram
        record.payload['text'] = record.payload['text'][:50]
        record.payload.pop('math_expression_description_id')
        record.payload.pop('math_expression_index_id')
        record.payload.pop('timestamp')

    grouped_records.append(records)

In [17]:
import os

import pandas as pd
import plotly.express as px

from sklearn.datasets import make_blobs


os.environ['NUMBA_CPU_FEATURES'] = str()  # avoid kernel crash on arm
import umap

#### Example data

In [None]:
# synthetic data
X, y = make_blobs(
    n_samples=500,
    centers=5,
    n_features=10,
    cluster_std=1.0,
    random_state=42,
)

reducer = umap.UMAP(
    n_components=2,
    metric='euclidean',
    random_state=None,
)
X_umap = reducer.fit_transform(X)

In [None]:
df = pd.DataFrame(
    {
        'UMAP1': X_umap[:, 0],
        'UMAP2': X_umap[:, 1],
        'cluster': y,
    }
)

fig = px.scatter(
    df,
    x='UMAP1',
    y='UMAP2',
    color='cluster',
    hover_data=['cluster'],
)
fig.show()

#### Real data

In [18]:
records = [r for grp in grouped_records for r in grp]
vectors = [r.vector for r in records]
cluster_labels = [i for i, grp in enumerate(grouped_records) for _ in grp]

# figure out which payload keys exist across all records
payload_keys = set().union(*(r.payload.keys() for r in records))

reducer = umap.UMAP(
    n_components=2,
    metric='cosine',
    random_state=None,
)
X_umap = reducer.fit_transform(vectors)

In [19]:
rows = []
for x, y, label, record in zip(X_umap[:, 0], X_umap[:, 1], cluster_labels, records):
    row = {
        'UMAP_1': x,
        'UMAP_2': y,
        'cluster': label,
        'id': record.id,
    }
    row.update(record.payload or {})  # add all payload fields
    rows.append(row)

df = pd.DataFrame(rows)

fig = px.scatter(
    df,
    x='UMAP_1',
    y='UMAP_2',
    color='cluster',
    hover_data=list(payload_keys) + ['id', 'cluster'],
)
fig.show()

#### Continue...

In [27]:
from math_rag.core.models import MathExpressionGroup


grouped_math_expression_ids = [
    [description.math_expression_id for description in descriptions]
    for descriptions in grouped_descriptions
]

for math_expression_ids in grouped_math_expression_ids:
    # group requires at least two elements
    if len(math_expression_ids) < 2:
        continue

    math_expression_group = MathExpressionGroup(math_expression_index_id=index.id)
    await math_expression_group_repository.insert_one(math_expression_group)
    await math_expression_group_graph_repository.insert_one_node(math_expression_group)

    # add all candidates to a group, remove some of them in the next step
    await math_expression_repository.update_group_id(math_expression_ids, math_expression_group.id)

### 6. MathExpressionGroupRelationship

In [None]:
from itertools import combinations

from math_rag.application.models.assistants.inputs import MathExpressionComparator as AssistantInput
from math_rag.application.utils import GroupPrunerUtil
from math_rag.core.models import MathExpressionGroupRelationship


math_expression_groups = await math_expression_group_repository.find_many(
    filter=dict(math_expression_index_id=index.id)
)

for math_expression_group in math_expression_groups:
    math_expressions = await math_expression_repository.find_many(
        filter=dict(math_expression_group_id=math_expression_group.id)
    )
    math_expression_ids = [math_expression.id for math_expression in math_expressions]
    math_expression_contexts = await math_expression_context_repository.find_many(
        filter=dict(math_expression_id=math_expression_ids)
    )
    pairs = list(combinations(zip(math_expressions, math_expression_contexts), 2))

    print(len(math_expression_ids))
    print(len(pairs))
    print('----')

    if not pairs:
        continue

    inputs: list[AssistantInput] = []
    input_id_to_candidate_pair: dict[UUID, tuple[UUID, UUID]] = {}

    for pair, other_pair in pairs:
        math_expression, math_expression_context = pair
        other_math_expression, other_math_expression_context = other_pair
        input = AssistantInput(
            katex=math_expression.katex,
            context=math_expression_context.text,
            other_katex=other_math_expression.katex,
            other_context=other_math_expression_context.text,
        )
        inputs.append(input)
        input_id_to_candidate_pair[input.id] = (math_expression.id, other_math_expression.id)

    outputs = await math_expression_comparator_assistant.concurrent_assist(inputs)

    candidates = math_expression_ids
    candidate_pair_to_is_connected = {
        input_id_to_candidate_pair[output.input_id]: output.is_identical for output in outputs
    }

    math_expression_ids = [math_expression.id for math_expression in math_expressions]
    math_expression_ids_to_group = GroupPrunerUtil.prune(candidates, candidate_pair_to_is_connected)
    math_expression_ids_to_ungroup = list(
        set(math_expression_ids) - set(math_expression_ids_to_group)
    )

    if not math_expression_ids_to_group:
        continue

    math_expression_group_relationships = [
        MathExpressionGroupRelationship(
            math_expression_index_id=index.id,
            math_expression_id=math_expression_id,
            math_expression_group_id=math_expression_group.id,
        )
        for math_expression_id in math_expression_ids_to_group
    ]
    await math_expression_repository.update_group_id(math_expression_ids_to_ungroup, None)

    # insert updated math expressions to the graph database
    math_expressions_updated = await math_expression_repository.find_many(
        filter=dict(id=math_expression_ids)
    )
    await math_expression_graph_repository.insert_many_nodes(math_expressions_updated)

    math_expression_group_relationships = [
        MathExpressionGroupRelationship(
            math_expression_index_id=index.id,
            math_expression_id=math_expression_id,
            math_expression_group_id=math_expression_group.id,
        )
        for math_expression_id in math_expression_ids_to_group
    ]

    # print(len(math_expression_ids))
    # print(len(math_expression_ids_to_group))
    # print(len(math_expression_ids_to_ungroup))
    # print(len(math_expression_group_relationships))
    # print('----')

    await math_expression_group_graph_repository.insert_many_rels(
        math_expression_group_relationships, rel_to_cls=MathExpression
    )

## Relationships

### 1. MathArticleChunk, requires: MathExpression

In [29]:
math_expressions = await math_expression_repository.find_many(
    filter=dict(math_expression_index_id=index.id)
)

In [30]:
from math_rag.core.models import MathArticleChunk


index_to_katex = {
    math_expression.index: math_expression.katex for math_expression in math_expressions
}
chunk_templates = TemplateChunkerUtil.chunk(template, max_window_size=2048, max_padding=256)
math_article_chunks: list[MathArticleChunk] = []

for i, chunk_template in enumerate(chunk_templates):
    indexes = TemplateIndexFinderUtil.find(chunk_template)
    formatted_chunk, _ = TemplateFormatterUtil.format(
        chunk_template, index_to_katex, omit_wrapper=False
    )
    # print(_)
    math_article_chunk = MathArticleChunk(
        math_article_id=math_article.id,
        math_expression_index_id=index.id,
        index=i,
        indexes=indexes,
        text=formatted_chunk,
    )
    math_article_chunks.append(math_article_chunk)

await math_article_chunk_repository.insert_many(math_article_chunks)

### 2. MathExpressionRelationship, requires: MathExpression, MathArticleChunk

In [46]:
math_expressions = await math_expression_repository.find_many(
    filter=dict(math_expression_index_id=index.id)
)
math_article_chunks = await math_article_chunk_repository.find_many(
    filter=dict(math_expression_index_id=index.id)
)

In [47]:
num_expected_chunks = sum(
    len(math_article_chunk.indexes) - 1 for math_article_chunk in math_article_chunks
)
num_expected_chunks

2735

In [52]:
from math_rag.application.models.assistants.inputs import (
    MathExpressionRelationshipDetector as AssistantInput,
)
from math_rag.core.models import MathExpressionRelationship


for math_article_chunk in math_article_chunks:
    if len(math_article_chunk.indexes) < 2:
        continue

    start_indexes = math_article_chunk.indexes[:-1]
    last_index = math_article_chunk.indexes[-1]
    index_pairs = [(index, last_index) for index in start_indexes]

    inputs: list[AssistantInput] = []
    input_id_to_math_expression_id_pair: dict[UUID, tuple[UUID, UUID]] = {}
    input_id_to_math_expression_index_pair: dict[UUID, tuple[int, int]] = {}

    for source_index, target_index in index_pairs:
        input = AssistantInput(
            chunk=math_article_chunk.text, source=source_index, target=target_index
        )
        inputs.append(input)

        source_math_expression = next(
            (x for x in math_expressions if x.index == source_index), None
        )
        target_math_expression = next(
            (x for x in math_expressions if x.index == target_index), None
        )

        if source_math_expression is None or target_math_expression is None:
            raise ValueError()

        input_id_to_math_expression_id_pair[input.id] = (
            source_math_expression.id,
            target_math_expression.id,
        )
        input_id_to_math_expression_index_pair[input.id] = source_index, target_index

    outputs = await math_expression_relationship_detector_assistant.concurrent_assist(inputs)
    math_expression_relationships = [
        MathExpressionRelationship(
            math_article_chunk_id=math_article_chunk.id,
            math_expression_index_id=index.id,
            math_expression_source_id=input_id_to_math_expression_id_pair[output.input_id][0],
            math_expression_target_id=input_id_to_math_expression_id_pair[output.input_id][1],
            math_expression_source_index=input_id_to_math_expression_index_pair[output.input_id][0],
            math_expression_target_index=input_id_to_math_expression_index_pair[output.input_id][1],
        )
        for output in outputs
        if output.relationship_exists
    ]

    print(len(outputs))
    print(len(math_expression_relationships))

    await math_expression_relationship_repository.insert_many(math_expression_relationships)
    await math_expression_graph_repository.insert_many_rels(
        math_expression_relationships, rel_to_cls=None
    )

16
16


MathExpressionNodeDoesNotExist: (MathExpressionNodeDoesNotExist(...), "{'uid': '57072083-3dd6-4d35-8bd1-0ddbcbcd5627'}")

### 3. MathExpressionRelationshipDescription, requires: MathArticleChunk, MathExpressionRelationship

In [35]:
math_expression_relationships = await math_expression_relationship_repository.find_many(
    filter=dict(math_expression_index_id=index.id)
)
len(math_expression_relationships)
# 2389 < 2735 because llm decided that some of them are not connected
# gpt 4o: 2389
# gpt 4o nano: 2692 (bad)

144

In [37]:
from math_rag.application.models.assistants.inputs import (
    MathExpressionRelationshipDescriptionWriter as AssistantInput,
)
from math_rag.core.models import MathExpressionRelationshipDescription


math_article_chunk_ids = [
    math_expression_relationship.math_article_chunk_id
    for math_expression_relationship in math_expression_relationships
]
math_article_chunks = await math_article_chunk_repository.find_many(
    filter=dict(id=math_article_chunk_ids)
)

inputs: list[AssistantInput] = []
input_id_to_math_expression_relationship_id: dict[UUID, UUID] = {}

for math_article_chunk, math_expression_relationship in zip(
    math_article_chunks, math_expression_relationships
):
    input = AssistantInput(
        chunk=math_article_chunk.text,
        source=math_expression_relationship.math_expression_source_index,
        target=math_expression_relationship.math_expression_target_index,
    )
    inputs.append(input)
    input_id_to_math_expression_relationship_id[input.id] = math_expression_relationship.id

outputs = await math_expression_relationship_description_writer_assistant.concurrent_assist(inputs)
descriptions = [
    MathExpressionRelationshipDescription(
        math_expression_index_id=index.id,
        math_expression_relationship_id=input_id_to_math_expression_relationship_id[
            output.input_id
        ],
        text=output.description,
    )
    for output in outputs
]
await math_expression_relationship_description_repository.insert_many(descriptions)