Skip to content

Commit

Permalink
Workflow config and workflow refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
jdddog committed May 17, 2023
1 parent 2a46d69 commit 2e4e2f4
Show file tree
Hide file tree
Showing 237 changed files with 9,351 additions and 13,997 deletions.
11 changes: 7 additions & 4 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
name: Unit Tests

on: [push]
on: [ push ]

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
max-parallel: 4
matrix:
python-version: [3.8]
os: [ubuntu-20.04]
python-version: [ 3.8 ]
os: [ ubuntu-20.04 ]

steps:
- name: Checkout ${{ matrix.python-version }}
Expand All @@ -27,7 +27,7 @@ jobs:
python -m pip install --upgrade pip
cd ..
git clone https://github.com/The-Academic-Observatory/observatory-platform.git
git clone -b feature/workflow-config-file https://github.com/The-Academic-Observatory/observatory-platform.git
cd observatory-platform
pip install -e observatory-api
pip install -e observatory-platform
Expand Down Expand Up @@ -55,6 +55,9 @@ jobs:
TEST_GCP_PROJECT_ID: ${{ secrets.TEST_GCP_PROJECT_ID }}
TEST_GCP_SERVICE_KEY: ${{ secrets.TEST_GCP_SERVICE_KEY }}
GOOGLE_APPLICATION_CREDENTIALS: /tmp/google_application_credentials.json
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
AIRFLOW__CORE__LOAD_EXAMPLES: false
run: |
echo "${TEST_GCP_SERVICE_KEY}" | base64 --decode > /tmp/google_application_credentials.json
Expand Down
73 changes: 0 additions & 73 deletions academic_observatory_workflows/api_type_ids.py

This file was deleted.

1 change: 1 addition & 0 deletions academic_observatory_workflows/clearbit.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

# Author: James Diprose

import logging
import shutil

Expand Down
51 changes: 50 additions & 1 deletion academic_observatory_workflows/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,21 @@

# Author: James Diprose

import json
import os
from typing import List

from observatory.platform.utils.config_utils import module_file_path
from observatory.platform.config import module_file_path
from observatory.platform.elastic.elastic import KeepInfo, KeepOrder
from observatory.platform.elastic.kibana import TimeField
from observatory.platform.utils.jinja2_utils import render_template
from observatory.platform.workflows.elastic_import_workflow import load_elastic_mappings_simple, ElasticImportConfig


class Tag:
"""DAG tag."""

academic_observatory = "academic-observatory"


def test_fixtures_folder(*subdirs) -> str:
Expand Down Expand Up @@ -54,3 +66,40 @@ def elastic_mappings_folder() -> str:
"""

return module_file_path("academic_observatory_workflows.database.mappings")


def load_elastic_mappings_ao(path: str, table_prefix: str, simple_prefixes: List = None):
"""For the Observatory project, load the Elastic mappings for a given table_prefix.
:param path: the path to the mappings files.
:param table_prefix: the table_id prefix (without shard date).
:param simple_prefixes: the prefixes of mappings to load with the load_elastic_mappings_simple function.
:return: the rendered mapping as a Dict.
"""

# Set default simple_prefixes
if simple_prefixes is None:
simple_prefixes = ["ao_doi"]

if not table_prefix.startswith("ao"):
raise ValueError("Table must begin with 'ao'")
elif any([table_prefix.startswith(prefix) for prefix in simple_prefixes]):
return load_elastic_mappings_simple(path, table_prefix)
else:
prefix, aggregate, facet = table_prefix.split("_", 2)
mappings_file_name = "ao-relations-mappings.json.jinja2"
is_fixed_facet = facet in ["unique_list", "access_types", "disciplines", "output_types", "events", "metrics"]
if is_fixed_facet:
mappings_file_name = f"ao-{facet.replace('_', '-')}-mappings.json.jinja2"
mappings_path = os.path.join(path, mappings_file_name)
return json.loads(render_template(mappings_path, aggregate=aggregate, facet=facet))


ELASTIC_IMPORT_CONFIG = ElasticImportConfig(
elastic_mappings_path=elastic_mappings_folder(),
elastic_mappings_func=load_elastic_mappings_ao,
kibana_time_fields=[TimeField("^.*$", "published_year")],
index_keep_info={
"": KeepInfo(ordering=KeepOrder.newest, num=2),
"ao": KeepInfo(ordering=KeepOrder.newest, num=2),
},
)
4 changes: 0 additions & 4 deletions academic_observatory_workflows/dag_tag.py

This file was deleted.

29 changes: 0 additions & 29 deletions academic_observatory_workflows/dags/crossref_events_telescope.py

This file was deleted.

29 changes: 0 additions & 29 deletions academic_observatory_workflows/dags/crossref_fundref_telescope.py

This file was deleted.

29 changes: 0 additions & 29 deletions academic_observatory_workflows/dags/crossref_metadata_telescope.py

This file was deleted.

70 changes: 0 additions & 70 deletions academic_observatory_workflows/dags/doi_workflow.py

This file was deleted.

Loading

0 comments on commit 2e4e2f4

Please sign in to comment.