Skip to content

Add country field to run and edla schemas #79

@oto-macenauer-absa

Description

@oto-macenauer-absa

Background

There was a new proposal to a country field to run and edla schemas.

Feature

Change the schemas by the definitions:

Runs

{
  "type": "object",
  "properties": {
    "event_id": {
      "type": "string",
      "description": "Unique identifier for the event (GUID), generated for each unique event, for de-duplication purposes"
    },
    "job_ref": {
      "type": "string",
      "description": "Identifier of the job in it’s respective system (e.g. Spark Application Id, Glue Job Id, EMR Step Id, etc)."
    },
    "tenant_id ": {
      "type": "string",
      "description": "Application ID (4 letter code) or ServiceNow identifier related to the pipeline/domain/process owner (tenant of the tool)"
    },
    "source_app": {
      "type": "string",
      "description": "Standardized source application name (aqueduct, unify, lum, etc)"
    },
    "source_app_version": {
      "type": "string",
      "description": "Source application version (SemVer preferred)"
    },
    "environment": {
      "type": "string",
      "description": "Environment (dev, uat, pre-prod, prod, test or others)"
    },
    "timestamp_start": { 
      "type": "number",
      "description": "Start timestamp of the run in epoch milliseconds"
    },
    "timestamp_end": {
      "type": "number",
      "description": "End timestamp of the run in epoch milliseconds"
    },
    "jobs": { 
      "type": "array",
      "description": "List of individual jobs withing the run"
      "element_type": "object"
      "object_schema":  {
         "country": {
         "type": "string", /* since this is a new field, it is optional initially, with “” or empty string as the default value */
         "description": "The country the data is related to (see the appendix B)."
       },
         "catalog_id": {
         "type": "string",
         "description": "Identifier for the data definition (Glue/Hive) database and table name for  example"
       },
       "status": {
        "type": "string",
        "enum": ["succeeded", "failed", "killed", "skipped"],
        "description": "Status of the job."
      },
      "timestamp_start": {
        "type": "number",
        "description": "Start timestamp of a job that is a part of a run in epoch milliseconds"
      },
      "timestamp_end": {
        "type": "number",
        "description": "End timestamp of a job that is a part of a run in epoch milliseconds"
      },
      "message": {
        "type": "string",
        "description": "Job status/error message."
       },
       "additional_info": {
         "type": "object/map",
         "description": "Optional additional fields structured as an inner JSON"
       }
      }
    },
    
  },
  "required": ["event_id", "job_ref", "tenant_id", "source_app", "source_app_version", "environment", "timestamp_start", "timestamp_end", "jobs", "jobs.catalog_id", "jobs.status", " jobs.timestamp_start", "jobs.timestamp_end"]
}

EDLA change

{
  "type": "object",
  "properties": {
    "event_id": {
      "type": "string",
      "description": "Unique identifier for the event (GUID)"
    },
    "tenant_id": {
      "type": "string",
      "description": "Application ID or ServiceNow identifier"
    },
    "source_app": {
      "type": "string",
      "description": " Standardized source application name (aqueduct, unify, lum, etc)"
    },
    "source_app_version": {
      "type": "string",
      "description": "Source application version (SemVer preferred)"
    },
    "environment": {
      "type": "string",
      "description": "Environment (dev, uat, pre-prod, prod, test or others)"
    },
    "timestamp_event": {
      "type": "number",
      "description": "Timestamp of the event in epoch milliseconds"
    },
      "country": {
      "type": "string", /* since this is a new field, it is optional initially, with “” or empty string as the default value */
      "description": "The country the data is related to (see the appendix B)." 
    },
    "catalog_id": {
      "type": "string",
      "description": "Identifier for the data definition (Glue/Hive) database and table name for example "
    },
    "operation": {
      "type": "string",
      "enum": ["overwrite", "append", "archive", "delete", ],
      "description": "Operation performed"
    },
    "location": {
      "type": "string",
      "description": "Location of the data"
    },
    "format": {
      "type": "string",
      "description": "Format of the data (parquet, delta, crunch, etc)."
    },
    "format_options": {
      "type": "object/map",
      "description": "When possible, add additional options related to the format"
    },
    "additional_info": {
      "type": "object/map",
      "description": "Optional additional fields structured as an inner JSON"
    }

  },
  "required": ["event_id", "tenant_id", "source_app", "source_app_version", "envoronment", "timestamp_event", "catalog_id", "operation", "format"]
}

Metadata

Metadata

Assignees

Labels

enhancementNew feature or request

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions