Skip to content

Commit

Permalink
Merge branch 'develop' into INF-638/QA-for-observatory-datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmassen-hane authored Aug 8, 2023
2 parents 2da7dc0 + c59e770 commit e3414d3
Show file tree
Hide file tree
Showing 72 changed files with 445 additions and 2,732 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ jobs:
cd ..
git clone https://github.com/The-Academic-Observatory/observatory-platform.git
cd observatory-platform
pip install -e observatory-api
pip install -e observatory-platform
pip install -e observatory-api --constraint https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-no-providers-${{ matrix.python-version }}.txt
pip install -e observatory-platform --constraint https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-no-providers-${{ matrix.python-version }}.txt
cd ..
cd academic-observatory-workflows
pip install -e .[tests]
pip install -e .[tests] --constraint https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-no-providers-${{ matrix.python-version }}.txt
- name: Check licenses
run: |
Expand Down
52 changes: 0 additions & 52 deletions academic_observatory_workflows/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,9 @@

# Author: James Diprose

import json
import os
from typing import List

from observatory.platform.config import module_file_path
from observatory.platform.elastic.elastic import KeepInfo, KeepOrder
from observatory.platform.elastic.kibana import TimeField
from observatory.platform.utils.jinja2_utils import render_template
from observatory.platform.workflows.elastic_import_workflow import load_elastic_mappings_simple, ElasticImportConfig


class Tag:
Expand Down Expand Up @@ -57,49 +51,3 @@ def sql_folder() -> str:
"""

return module_file_path("academic_observatory_workflows.database.sql")


def elastic_mappings_folder() -> str:
"""Get the Elasticsearch mappings path.
:return: the elastic search schema path.
"""

return module_file_path("academic_observatory_workflows.database.mappings")


def load_elastic_mappings_ao(path: str, table_prefix: str, simple_prefixes: List = None):
"""For the Observatory project, load the Elastic mappings for a given table_prefix.
:param path: the path to the mappings files.
:param table_prefix: the table_id prefix (without shard date).
:param simple_prefixes: the prefixes of mappings to load with the load_elastic_mappings_simple function.
:return: the rendered mapping as a Dict.
"""

# Set default simple_prefixes
if simple_prefixes is None:
simple_prefixes = ["ao_doi"]

if not table_prefix.startswith("ao"):
raise ValueError("Table must begin with 'ao'")
elif any([table_prefix.startswith(prefix) for prefix in simple_prefixes]):
return load_elastic_mappings_simple(path, table_prefix)
else:
prefix, aggregate, facet = table_prefix.split("_", 2)
mappings_file_name = "ao-relations-mappings.json.jinja2"
is_fixed_facet = facet in ["unique_list", "access_types", "disciplines", "output_types", "events", "metrics"]
if is_fixed_facet:
mappings_file_name = f"ao-{facet.replace('_', '-')}-mappings.json.jinja2"
mappings_path = os.path.join(path, mappings_file_name)
return json.loads(render_template(mappings_path, aggregate=aggregate, facet=facet))


ELASTIC_IMPORT_CONFIG = ElasticImportConfig(
elastic_mappings_path=elastic_mappings_folder(),
elastic_mappings_func=load_elastic_mappings_ao,
kibana_time_fields=[TimeField("^.*$", "published_year")],
index_keep_info={
"": KeepInfo(ordering=KeepOrder.newest, num=2),
"ao": KeepInfo(ordering=KeepOrder.newest, num=2),
},
)

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit e3414d3

Please sign in to comment.