From 34415633f1b263056a1018131957e417277615c7 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Thu, 10 Jul 2025 12:09:52 +0200 Subject: [PATCH 1/7] Add "public" dataset function docstring --- khiops/sklearn/dataset.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/khiops/sklearn/dataset.py b/khiops/sklearn/dataset.py index 198e0bf2..08ccb8f1 100644 --- a/khiops/sklearn/dataset.py +++ b/khiops/sklearn/dataset.py @@ -177,6 +177,18 @@ def _check_multitable_spec(ds_spec): def table_name_of_path(table_path): + """Returns the table name as the last fragment of the table data path + + Parameters + ---------- + table_path: str + Data path of the table, in the format "path/to/table". + + Returns + ------- + str + The name of the table. + """ return table_path.split("/")[-1] From f16e086bff7a0b18cbf5874f08069e66d6110b39 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Jul 2025 19:14:03 +0200 Subject: [PATCH 2/7] Update multi-table primer to the new Sklearn multi-table schema specification --- doc/multi_table_primer.rst | 65 ++++++++++++++------------------------ 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/doc/multi_table_primer.rst b/doc/multi_table_primer.rst index ec12d9a3..6d39bae7 100644 --- a/doc/multi_table_primer.rst +++ b/doc/multi_table_primer.rst @@ -76,40 +76,31 @@ feature object ``X``. Specifically, instead of a `pandas.DataFrame`, ``X`` must specifies the dataset schema in the following way:: X = { - "main_table": , - "tables" : { - : (, ), - : (, ), - : (, ), + "main_table": (, ), + "additional_data_tables" : { + : ( + , [], + ), + : ( + , [], + ), ... } - "relations" : [ - (, , ), - (, , ), - ... - ], } The three fields of this dictionary are: -- ``main_table``: The name of the main table. -- ``tables``: A dictionary indexed by the tables' names. Each table is associated to a 2-tuple - containing the following fields: +- ``main_table``: a 2-tuple containing the following fields: + - The `pandas.DataFrame` object of the main table. + - The key columns' names: A list of strings. + . +- ``additional_data_tables``: A dictionary indexed by the data paths to the secondary + tables. Each data path is associated to a 2-tuple containing the following fields: - - The `pandas.DataFrame` object of the table. - - The key columns' names : Either a list of strings or a single string. - -- ``relations``: An optional field containing a list of tuples describing the relations between - tables. The first two values (Strings) of each tuple correspond to names of both the parent and the child table - involved in the relation. A third value (Boolean) can be optionally added to the tuple to indicate if the relation is - either ``1:n`` or ``1:1`` (entity). For example, If the tuple ``(table1, table2, True)`` is contained in this - field, it means that: - - - ``table1`` and ``table2`` are in a ``1:1`` relationship - - The key of ``table1`` is contained in that of ``table2`` (ie. keys are hierarchical) - - If the ``relations`` field is not present then Khiops Python assumes that the tables are in a *star* - schema. + - The `pandas.DataFrame` object of the secondary table. + - The key columns' names : A list of strings. + - optionally, a flag which indicates if the secondary table is in + a ``1:1`` relationship to its parent table. .. note:: @@ -138,9 +129,8 @@ We build the input ``X`` as follows:: accidents_df = pd.read_csv(f"{kh.get_samples_dir()}/AccidentsSummary/Accidents.txt", sep="\t") vehicles_df = pd.read_csv(f"{kh.get_samples_dir()}/AccidentsSummary/Vehicles.txt", sep="\t") X = { - "main_table" : "Accident", - "tables": { - "Accident": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table" : (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicle": (vehicles_df, ["AccidentId", "VehicleId"]) } } @@ -170,19 +160,12 @@ We build the input ``X`` as follows:: places_df = pd.read_csv(f"{kh.get_samples_dir()}/Accidents/Places.txt", sep="\t") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), - "Users": (users_df, ["AccidentId", "VehicleId"]), - "Places": (places_df, "AccidentId"), - + "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]), + "Places": (places_df, ["AccidentId"], True), }, - "relations": [ - ("Accidents", "Vehicles"), - ("Vehicles", "Users"), - ("Accidents", "Places", True), - ], } Both datasets can be found in the Khiops samples directory. From 61861857ee80fd38c0474626db5d742ec238fce3 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Jul 2025 19:21:26 +0200 Subject: [PATCH 3/7] Update upstream khiops-core version to 11.0.0-b.0 in the dev Docker images --- .github/workflows/dev-docker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dev-docker.yml b/.github/workflows/dev-docker.yml index 3d57d6c6..0a7b2641 100644 --- a/.github/workflows/dev-docker.yml +++ b/.github/workflows/dev-docker.yml @@ -1,7 +1,7 @@ --- name: Dev Docker env: - DEFAULT_KHIOPS_REVISION: 11.0.0-a.0 + DEFAULT_KHIOPS_REVISION: 11.0.0-b.0 DEFAULT_IMAGE_INCREMENT: 0 DEFAULT_SERVER_REVISION: main DEFAULT_PYTHON_VERSIONS: 3.8 3.9 3.10 3.11 3.12 3.13 @@ -14,7 +14,7 @@ on: inputs: khiops-revision: type: string - default: 11.0.0-a.0 + default: 11.0.0-b.0 description: Khiops Revision image-increment: type: number From 30f201790951116abfbb7f6b44aa7335a15a3e41 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Jul 2025 19:35:08 +0200 Subject: [PATCH 4/7] Update Docker image tag and Khiops versions to use the Khiops core 11.0.0-b.0 --- .github/workflows/api-docs.yml | 14 +++++++++----- .github/workflows/pip.yml | 4 ++-- .github/workflows/tests.yml | 10 +++++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/api-docs.yml b/.github/workflows/api-docs.yml index 06e68877..6d650452 100644 --- a/.github/workflows/api-docs.yml +++ b/.github/workflows/api-docs.yml @@ -1,15 +1,19 @@ --- name: API Docs env: - DEFAULT_KHIOPS_PYTHON_TUTORIAL_REVISION: main + DEFAULT_KHIOPS_PYTHON_TUTORIAL_REVISION: 11.0.0.0-b.0 + DEFAULT_KHIOPS_SAMPLES_REVISION: 11.0.0 on: workflow_dispatch: inputs: khiops-python-tutorial-revision: - default: main + default: 11.0.0.0-b.0 description: khiops-python-tutorial repo revision + khiops-samples-revision: + default: 11.0.0 + description: khiops-samples repo revision image-tag: - default: 11.0.0-a.0.0 + default: 11.0.0-b.0.0 description: Development Docker Image Tag pull_request: paths: @@ -41,7 +45,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || '11.0.0-a.0.0' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || '11.0.0-b.0.0' }} # Use the 'runner' user (1001) from github so checkout actions work properly # https://github.com/actions/runner/issues/2033#issuecomment-1598547465 options: --user 1001 @@ -56,7 +60,7 @@ jobs: run: | # Install package itself to install the samples datasets pip3 install . - kh-download-datasets --force-overwrite + kh-download-datasets --force-overwrite --version ${{ inputs.khiops-samples-revision || env.DEFAULT_KHIOPS_SAMPLES_REVISION }} kh-status # Install the doc python requirements diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index e79a1ee3..5fdf17fd 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -9,7 +9,7 @@ on: default: 11.0.0 description: khiops-samples repo revision image-tag: - default: 11.0.0-a.0.0 + default: 11.0.0-b.0.0 description: Development Docker Image Tag pull_request: paths: @@ -64,7 +64,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || '11.0.0-a.0.0' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || '11.0.0-b.0.0' }} steps: - name: Set parameters as env run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c1d11fd1..282b554e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,7 +2,7 @@ name: Tests env: DEFAULT_SAMPLES_REVISION: 11.0.0 - DEFAULT_KHIOPS_DESKTOP_REVISION: 11.0.0-a.0 + DEFAULT_KHIOPS_DESKTOP_REVISION: 11.0.0-b.0 on: workflow_dispatch: inputs: @@ -10,10 +10,10 @@ on: default: 11.0.0 description: Git Tag/Branch/Commit for the khiops-samples Repo image-tag: - default: 11.0.0-a.0.0 + default: 11.0.0-b.0.0 description: Development Docker Image Tag khiops-desktop-revision: - default: 11.0.0-a.0 + default: 11.0.0-b.0 description: Khiops Windows Desktop Application Version run-expensive-tests: type: boolean @@ -43,7 +43,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || '11.0.0-a.0.0' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || '11.0.0-b.0.0' }} credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -315,7 +315,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || '11.0.0-a.0.0' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || '11.0.0-b.0.0' }} credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} From d3135ce7545d7b56e86828407dba579173e8d7e2 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Jul 2025 19:37:00 +0200 Subject: [PATCH 5/7] Update Conda khiops-core version to 11.0.0b.0 --- .github/workflows/conda.yml | 4 ++-- packaging/conda/meta.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index f2cc5c97..b3ed3e4d 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -3,13 +3,13 @@ name: Conda Package env: # Note: The default Khiops version must never be an alpha release as they are # ephemeral. To test alpha versions run the workflow manually. - DEFAULT_KHIOPS_CORE_VERSION: 11.0.0a.0 + DEFAULT_KHIOPS_CORE_VERSION: 11.0.0b.0 DEFAULT_SAMPLES_VERSION: 11.0.0 on: workflow_dispatch: inputs: khiops-core-version: - default: 11.0.0a.0 + default: 11.0.0b.0 description: khiops-core version for testing khiops-samples-version: default: 11.0.0 diff --git a/packaging/conda/meta.yaml b/packaging/conda/meta.yaml index 733496ed..d52ab601 100644 --- a/packaging/conda/meta.yaml +++ b/packaging/conda/meta.yaml @@ -26,7 +26,7 @@ requirements: - python run: - python - - conda-forge/label/rc::khiops-core =11.0.0a.0 + - khiops-core =11.0.0b.0 - pandas >=0.25.3 - scikit-learn >=0.22.2 run_constrained: From 39306b76f1d0ab284a45072432eb6b59ea8925d9 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Thu, 10 Jul 2025 11:13:57 +0200 Subject: [PATCH 6/7] Update CHANGELOG with missing v10 entries --- CHANGELOG.md | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 736e31f5..b9727622 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,41 @@ - Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1. - Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists. -## Unreleased +## 10.3.2.0 - 2025-07-03 + +### Fixed +- (`sklearn`) Documentation display for the `train_test_split_dataset` sklearn +helper function. + +## 10.3.1.0 - 2025-04-16 ### Added - (`sklearn`) Support for boolean and float targets in `KhiopsClassifier`. +### Fixed +- (`sklearn`) Crash when there were no informative trees in predictors. + +### Deprecated +- (`core`) The `build_multi_table_dictionary_domain` helper function. + +## 10.3.0.0 - 2025-02-10 + +### Fixed +- (`core`) Dictionary file `.json` extension check in the `khiops.dictionary.read_dictionary_file` +function. + +### Changed +- (`sklearn`) The `train_test_split_dataset` helper has been moved from `khiops.utils` to +`khiops.sklearn`. +- (`sklearn`) The `transform_pairs` parameter of the `KhiopsEncoder` sklearn estimator has been +renamed to `transform_type_pairs`. + +### Removed +- (`sklearn`) The `is_fitted_` estimator attribute. The Scikit-learn `check_is_fitted` function +can be used to test the fitted state of the estimators. +- (`sklearn`) The `n_pairs` parameter of the `KhiopsRegressor` sklearn estimator. It was never +supported. + ## 10.2.4.0 - 2024-12-19 ### Added From 2948fbf157397b417313727e6e88ba4f5327e26f Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Thu, 10 Jul 2025 11:14:16 +0200 Subject: [PATCH 7/7] Update CHANGELOG with the beta v11-specific entry --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9727622..20438857 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,38 @@ - Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1. - Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists. +## 11.0.0.0-b.0 - 2025-07-10 + +### Added +- (`core`) API support for predictor interpretation and reinforcement. +- (`core`) API support for instance-variable coclustering model training. +- (`core`) Support for text types in prediction and coclustering models. +- (`core`) Analysis and coclustering report JSON serialization support. +- (`sklearn`) Automatic removal of newline characters in strings on Pandas + dataframe columns. This is to ensure the proper working of the Khiops engine. + +### Changed +- (`core`) Syntax for additional data tables specification, which uses the data + paths. +- (`core`) API specification of the results path: full paths to report files are + now used instead of result directories. +- (`sklearn`) Specification of the hierarchical multi-table schemata, which now + uses data paths as in the Core API. +- (`general`) Various other changes and updates for Khiops 11.0.0-b.0 + compatibility. + +### Deprecated +- (`core`) The results directory parameter of the Core API functions. The full + path to the reports must now be specified instead. +- (`core`) The "``"-based secondary table path specification. The "/"-based data + paths must now be used instead. +- (`sklearn`) The specification syntax for hierarchical multi-table datasets. + The "/"-based data paths must now be used instead, as in the Core API. + +### Removed +- (`general`) All functions, attributes and features that had been deprecated in + the 10.3.2.0 version. + ## 10.3.2.0 - 2025-07-03 ### Fixed