From 68274b5c67a9a3dc253edbec6fab1176d414cba5 Mon Sep 17 00:00:00 2001 From: Alex Danilin <18176076+alex-danilin@users.noreply.github.com> Date: Tue, 22 Aug 2023 16:02:23 +0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Serpstat=20(#281?= =?UTF-8?q?47)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Marcos Marx Co-authored-by: marcosmarxm --- .../connectors/source-serpstat/.dockerignore | 6 + .../connectors/source-serpstat/Dockerfile | 38 ++ .../connectors/source-serpstat/README.md | 82 +++ .../connectors/source-serpstat/__init__.py | 3 + .../acceptance-test-config.yml | 27 + .../source-serpstat/acceptance-test-docker.sh | 3 + .../connectors/source-serpstat/build.gradle | 9 + .../connectors/source-serpstat/icon.svg | 12 + .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 5 + .../integration_tests/acceptance.py | 16 + .../integration_tests/configured_catalog.json | 113 ++++ .../integration_tests/invalid_config.json | 7 + .../integration_tests/sample_config.json | 3 + .../integration_tests/sample_state.json | 5 + .../connectors/source-serpstat/main.py | 13 + .../connectors/source-serpstat/metadata.yaml | 22 + .../source-serpstat/requirements.txt | 2 + .../connectors/source-serpstat/setup.py | 29 + .../source_serpstat/__init__.py | 8 + .../source_serpstat/manifest.yaml | 551 ++++++++++++++++++ .../source-serpstat/source_serpstat/source.py | 18 + .../source-serpstat/source_serpstat/spec.yaml | 87 +++ docs/integrations/sources/serpstat.md | 52 ++ 24 files changed, 1114 insertions(+) create mode 100644 airbyte-integrations/connectors/source-serpstat/.dockerignore create mode 100644 airbyte-integrations/connectors/source-serpstat/Dockerfile create mode 100644 airbyte-integrations/connectors/source-serpstat/README.md create mode 100644 airbyte-integrations/connectors/source-serpstat/__init__.py create mode 100644 airbyte-integrations/connectors/source-serpstat/acceptance-test-config.yml create mode 100755 airbyte-integrations/connectors/source-serpstat/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-serpstat/build.gradle create mode 100644 airbyte-integrations/connectors/source-serpstat/icon.svg create mode 100644 airbyte-integrations/connectors/source-serpstat/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-serpstat/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-serpstat/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-serpstat/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-serpstat/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-serpstat/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-serpstat/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-serpstat/main.py create mode 100644 airbyte-integrations/connectors/source-serpstat/metadata.yaml create mode 100644 airbyte-integrations/connectors/source-serpstat/requirements.txt create mode 100644 airbyte-integrations/connectors/source-serpstat/setup.py create mode 100644 airbyte-integrations/connectors/source-serpstat/source_serpstat/__init__.py create mode 100644 airbyte-integrations/connectors/source-serpstat/source_serpstat/manifest.yaml create mode 100644 airbyte-integrations/connectors/source-serpstat/source_serpstat/source.py create mode 100644 airbyte-integrations/connectors/source-serpstat/source_serpstat/spec.yaml create mode 100644 docs/integrations/sources/serpstat.md diff --git a/airbyte-integrations/connectors/source-serpstat/.dockerignore b/airbyte-integrations/connectors/source-serpstat/.dockerignore new file mode 100644 index 0000000000000..dbd3fc7ddd07c --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_serpstat +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-serpstat/Dockerfile b/airbyte-integrations/connectors/source-serpstat/Dockerfile new file mode 100644 index 0000000000000..6e113cf522d00 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_serpstat ./source_serpstat + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-serpstat diff --git a/airbyte-integrations/connectors/source-serpstat/README.md b/airbyte-integrations/connectors/source-serpstat/README.md new file mode 100644 index 0000000000000..c4698cf993b86 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/README.md @@ -0,0 +1,82 @@ +# Serpstat Source + +This is the repository for the Serpstat configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/serpstat). + +## Local development + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-serpstat:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/serpstat) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_serpstat/spec.yaml` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source serpstat test creds` +and place them into `secrets/config.json`. + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-serpstat:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-serpstat:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-serpstat:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-serpstat:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-serpstat:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-serpstat:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +To run your integration tests with Docker, run: +``` +./acceptance-test-docker.sh +``` + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-serpstat:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-serpstat:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-serpstat/__init__.py b/airbyte-integrations/connectors/source-serpstat/__init__.py new file mode 100644 index 0000000000000..c941b30457953 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-serpstat/acceptance-test-config.yml b/airbyte-integrations/connectors/source-serpstat/acceptance-test-config.yml new file mode 100644 index 0000000000000..da4d7e8dc7ec4 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/acceptance-test-config.yml @@ -0,0 +1,27 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-serpstat:dev +acceptance_tests: + spec: + tests: + - spec_path: "source_serpstat/spec.yaml" + connection: + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + tests: + - config_path: "secrets/config.json" + basic_read: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + incremental: + bypass_reason: "This connector does not implement incremental sync" + full_refresh: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-serpstat/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-serpstat/acceptance-test-docker.sh new file mode 100755 index 0000000000000..b6d65deeccb43 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/acceptance-test-docker.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +source "$(git rev-parse --show-toplevel)/airbyte-integrations/bases/connector-acceptance-test/acceptance-test-docker.sh" diff --git a/airbyte-integrations/connectors/source-serpstat/build.gradle b/airbyte-integrations/connectors/source-serpstat/build.gradle new file mode 100644 index 0000000000000..446bea8580d3d --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-connector-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_serpstat' +} diff --git a/airbyte-integrations/connectors/source-serpstat/icon.svg b/airbyte-integrations/connectors/source-serpstat/icon.svg new file mode 100644 index 0000000000000..a0adc252270f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/icon.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/airbyte-integrations/connectors/source-serpstat/integration_tests/__init__.py b/airbyte-integrations/connectors/source-serpstat/integration_tests/__init__.py new file mode 100644 index 0000000000000..c941b30457953 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-serpstat/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-serpstat/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..1b36aac11a45f --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "Domain history": { + "date": "1200-12-31" + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-serpstat/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-serpstat/integration_tests/acceptance.py new file mode 100644 index 0000000000000..9e6409236281f --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-serpstat/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-serpstat/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..14b4b26c1b7cd --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/integration_tests/configured_catalog.json @@ -0,0 +1,113 @@ +{ + "streams": [ + { + "stream": { + "name": "Domain history", + "json_schema": { + "$schema": "http://json-schema.org/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh" + ], + "source_defined_primary_key": [ + [ + "date" + ] + ] +}, +"sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream":{ + "name": "Domains summary", + "json_schema": { + "$schema": "http://json-schema.org/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh" + ], + "source_defined_primary_key": [ + [ + "domain" + ] + ] + }, +"sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream":{ + "name": "Domain keywords", + "json_schema": { + "$schema": "http://json-schema.org/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh" + ], + "source_defined_primary_key": [ + [ + "keyword" + ] + ] + }, +"sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream":{ + "name": "Domain keywords by region", + "json_schema": { + "$schema": "http://json-schema.org/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh" + ], + "source_defined_primary_key": [ + [ + "db_name" + ] + ] + }, +"sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream":{ + "name": "Domain competitors", + "json_schema": { + "$schema": "http://json-schema.org/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh" + ], + "source_defined_primary_key": [ + [ + "domain" + ] + ] + }, +"sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream":{ + "name": "Domain top pages", + "json_schema": { + "$schema": "http://json-schema.org/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh" + ] + }, +"sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-serpstat/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-serpstat/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..178151460bc77 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/integration_tests/invalid_config.json @@ -0,0 +1,7 @@ +{ + "api_key": "api_key", + "domain": "serpstat.com", + "region_id": "g_us", + "page_size": 10, + "pages_to_fetch": 1 +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-serpstat/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-serpstat/integration_tests/sample_config.json new file mode 100644 index 0000000000000..ecc4913b84c74 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/integration_tests/sample_config.json @@ -0,0 +1,3 @@ +{ + "fix-me": "TODO" +} diff --git a/airbyte-integrations/connectors/source-serpstat/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-serpstat/integration_tests/sample_state.json new file mode 100644 index 0000000000000..3587e579822d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-serpstat/main.py b/airbyte-integrations/connectors/source-serpstat/main.py new file mode 100644 index 0000000000000..92fb7edc04743 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_serpstat import SourceSerpstat + +if __name__ == "__main__": + source = SourceSerpstat() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-serpstat/metadata.yaml b/airbyte-integrations/connectors/source-serpstat/metadata.yaml new file mode 100644 index 0000000000000..e764c9c80059a --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/metadata.yaml @@ -0,0 +1,22 @@ +data: + allowedHosts: + hosts: + - api.serpstat.com + registries: + oss: + enabled: true + connectorSubtype: api + connectorType: source + definitionId: 3b2e8fb2-9137-41ff-a1e1-83ecb39e26c8 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/source-serpstat + githubIssueLabel: source-serpstat + icon: serpstat.svg + license: MIT + name: Serpstat + releaseDate: 2023-08-21 + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/sources/serpstat + tags: + - language:lowcode +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-serpstat/requirements.txt b/airbyte-integrations/connectors/source-serpstat/requirements.txt new file mode 100644 index 0000000000000..cc57334ef619a --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/connector-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-serpstat/setup.py b/airbyte-integrations/connectors/source-serpstat/setup.py new file mode 100644 index 0000000000000..42ab32a171b05 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.2", + "pytest-mock~=3.6.1", + "connector-acceptance-test", +] + +setup( + name="source_serpstat", + description="Source implementation for Serpstat.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-serpstat/source_serpstat/__init__.py b/airbyte-integrations/connectors/source-serpstat/source_serpstat/__init__.py new file mode 100644 index 0000000000000..8a15ca81d45fe --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/source_serpstat/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceSerpstat + +__all__ = ["SourceSerpstat"] diff --git a/airbyte-integrations/connectors/source-serpstat/source_serpstat/manifest.yaml b/airbyte-integrations/connectors/source-serpstat/source_serpstat/manifest.yaml new file mode 100644 index 0000000000000..2bf61ead520f3 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/source_serpstat/manifest.yaml @@ -0,0 +1,551 @@ +version: 0.43.0 +type: DeclarativeSource +check: + type: CheckStream + stream_names: + - Domains summary + +streams: + - type: DeclarativeStream + name: Domain history + primary_key: + - date + schema_loader: + type: InlineSchemaLoader + additionalProperties: true + schema: + $schema: http://json-schema.org/schema# + properties: + ad_keywords: + type: number + ads: + type: number + date: + type: string + domain: + type: string + down_keywords: + type: number + keywords: + type: number + new_keywords: + type: number + out_keywords: + type: number + rised_keywords: + type: number + traff: + type: number + visible: + type: number + visible_static: + type: number + type: object + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://api.serpstat.com/v4/ + path: / + http_method: POST + request_parameters: {} + request_headers: + X-request-sender: Airbyte + authenticator: + type: ApiKeyAuthenticator + api_token: "{{ config['api_key'] }}" + inject_into: + type: RequestOption + field_name: token + inject_into: header + error_handler: + type: CompositeErrorHandler + error_handlers: + - type: DefaultErrorHandler + response_filters: + - type: HttpResponseFilter + action: RETRY + predicate: "{{response.error.code == 32000}}" + error_message: >- + You are sending more requests per second then available for + your Serpstat plan + backoff_strategies: + - type: ExponentialBackoffStrategy + factor: 2 + request_body_json: + id: "{{ now_utc() }}" + method: SerpstatDomainProcedure.getDomainsHistory + params: + se: "{{config['region_id']}}" + page: "{{(next_page_token['next_page_token'] or 0) + 1}}" + size: "{{config['page_size']}}" + sort: + date: desc + domain: "{{config['domain']}}" + during_all_time: true + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - result + - data + paginator: + type: DefaultPaginator + page_token_option: + type: RequestOption + inject_into: header + field_name: X-page + pagination_strategy: + type: CursorPagination + cursor_value: "{{response.result.summary_info.page}}" + stop_condition: "{{response.result.summary_info.page > config['pages_to_fetch'] - 1}}" + - type: DeclarativeStream + name: Domains summary + primary_key: + - domain + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/schema# + properties: + ad_keywords: + type: number + ads: + type: number + ads_dynamic: + type: number + domain: + type: string + down_keywords: + type: number + keywords: + type: number + keywords_dynamic: + type: number + new_keywords: + type: number + out_keywords: + type: number + prev_date: + type: string + rised_keywords: + type: number + traff: + type: number + traff_dynamic: + type: number + visible: + type: number + visible_dynamic: + type: number + type: object + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://api.serpstat.com/v4/ + path: / + http_method: POST + request_parameters: {} + request_headers: + X-request-sender: Airbyte + authenticator: + type: ApiKeyAuthenticator + api_token: "{{ config['api_key'] }}" + inject_into: + type: RequestOption + field_name: token + inject_into: header + error_handler: + type: CompositeErrorHandler + error_handlers: + - response_filters: + - type: HttpResponseFilter + action: FAIL + predicate: "{{ 'Invalid token' in response.error.message }}" + error_message: Invalid Token + - type: HttpResponseFilter + action: RETRY + predicate: "{{ 'Too Many Requests' in response.error.message }}" + error_message: >- + You are sending more requests per second then available for + your Serpstat plan + backoff_strategies: + - type: ExponentialBackoffStrategy + factor: 2 + request_body_json: + id: "{{ now_utc() }}" + method: SerpstatDomainProcedure.getDomainsInfo + params: + se: "{{config['region_id']}}" + domains: "{{config['domains']}}" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - result + - data + paginator: + type: NoPagination + - type: DeclarativeStream + name: Domain keywords + primary_key: + - keyword + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/schema# + properties: + concurrency: + type: number + cost: + type: number + difficulty: + type: number + domain: + type: string + dynamic: + type: + - "null" + - number + found_results: + type: number + geo_names: + type: array + keyword: + type: string + keyword_length: + type: number + position: + type: number + region_queries_count: + type: number + region_queries_count_wide: + type: number + subdomain: + type: + - "null" + - string + traff: + type: number + types: + items: + type: string + type: array + url: + type: string + type: object + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://api.serpstat.com/v4/ + path: / + http_method: POST + request_parameters: {} + request_headers: + X-request-sender: Airbyte + authenticator: + type: ApiKeyAuthenticator + api_token: "{{ config['api_key'] }}" + inject_into: + type: RequestOption + field_name: token + inject_into: header + error_handler: + type: CompositeErrorHandler + error_handlers: + - type: DefaultErrorHandler + response_filters: + - type: HttpResponseFilter + action: RETRY + predicate: "{{response.error.code == 32000}}" + error_message: >- + You are sending more requests per second then available for + your Serpstat plan + backoff_strategies: + - type: ExponentialBackoffStrategy + factor: 2 + request_body_json: + id: "{{ now_utc() }}" + method: SerpstatDomainProcedure.getDomainKeywords + params: + se: "{{config['region_id']}}" + page: "{{(next_page_token['next_page_token'] or 0) + 1}}" + size: "{{config['page_size']}}" + sort: + "{{config['sort_by']}}": "{{config['sort_value']}}" + domain: "{{config['domain']}}" + filters: + "{{config['filter_by']}}": "{{config['filter_value']}}" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - result + - data + paginator: + type: DefaultPaginator + page_token_option: + type: RequestOption + inject_into: header + field_name: X-page + pagination_strategy: + type: CursorPagination + cursor_value: "{{response.result.summary_info.page}}" + stop_condition: "{{response.result.summary_info.page > config['pages_to_fetch'] - 1}}" + - type: DeclarativeStream + name: Domain keywords by region + primary_key: + - db_name + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/schema# + properties: + country_name_en: + type: string + db_name: + type: string + domain: + type: string + keywords_count: + type: number + type: object + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://api.serpstat.com/v4/ + path: / + http_method: POST + request_parameters: {} + request_headers: + X-request-sender: Airbyte + authenticator: + type: ApiKeyAuthenticator + api_token: "{{ config['api_key'] }}" + inject_into: + type: RequestOption + field_name: token + inject_into: header + error_handler: + type: CompositeErrorHandler + error_handlers: + - type: DefaultErrorHandler + response_filters: + - type: HttpResponseFilter + action: RETRY + predicate: "{{response.error.code == 32000}}" + error_message: >- + You are sending more requests per second then available for + your Serpstat plan + backoff_strategies: + - type: ExponentialBackoffStrategy + factor: 2 + request_body_json: + id: "{{ now_utc() }}" + method: SerpstatDomainProcedure.getRegionsCount + params: + sort: "{{config['sort_by']}}" + order: "{{config['sort_value']}}" + domain: "{{config['domain']}}" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - result + - data + paginator: + type: NoPagination + - type: DeclarativeStream + name: Domain competitors + primary_key: + - domain + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/schema# + properties: + ad_keywords: + type: number + ads: + type: number + ads_dynamic: + type: number + common: + type: number + domain: + type: string + down_keywords: + type: number + intersected: + type: number + keywords: + type: number + keywords_dynamic: + type: number + missing: + type: number + new_keywords: + type: number + new_relevance: + type: number + not_intersected: + type: number + our_relevance: + type: number + out_keywords: + type: number + relevance: + type: number + rised_keywords: + type: number + traff: + type: number + traff_dynamic: + type: number + visible: + type: number + visible_dynamic: + type: number + type: object + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://api.serpstat.com/v4/ + path: / + http_method: POST + request_parameters: {} + request_headers: + X-request-sender: Airbyte + authenticator: + type: ApiKeyAuthenticator + api_token: "{{ config['api_key'] }}" + inject_into: + type: RequestOption + field_name: token + inject_into: header + error_handler: + type: CompositeErrorHandler + error_handlers: + - type: DefaultErrorHandler + response_filters: + - type: HttpResponseFilter + action: RETRY + predicate: "{{response.error.code == 32000}}" + error_message: >- + You are sending more requests per second then available for + your Serpstat plan + backoff_strategies: + - type: ExponentialBackoffStrategy + factor: 2 + request_body_json: + id: "{{ now_utc() }}" + method: SerpstatDomainProcedure.getCompetitors + params: + se: "{{config['region_id']}}" + size: "{{config['page_size']}}" + sort: + "{{config['sort_by']}}": "{{config['sort_value']}}" + domain: "{{config['domain']}}" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - result + - data + paginator: + type: NoPagination + - type: DeclarativeStream + name: Domain top pages + primary_key: [] + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/schema# + properties: + facebook_shares: + type: number + organic_keywords: + type: number + potencial_traff: + type: number + url: + type: string + type: object + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://api.serpstat.com/v4/ + path: / + http_method: POST + request_parameters: {} + request_headers: + X-request-sender: Airbyte + authenticator: + type: ApiKeyAuthenticator + api_token: "{{ config['api_key'] }}" + inject_into: + type: RequestOption + field_name: token + inject_into: header + error_handler: + type: CompositeErrorHandler + error_handlers: + - type: DefaultErrorHandler + response_filters: + - type: HttpResponseFilter + action: RETRY + predicate: "{{response.error.code == 32000}}" + error_message: >- + You are sending more requests per second then available for + your Serpstat plan + backoff_strategies: + - type: ExponentialBackoffStrategy + factor: 2 + request_body_json: + id: "{{ now_utc() }}" + method: SerpstatDomainProcedure.getTopUrls + params: + se: "{{config['region_id']}}" + page: "{{(next_page_token['next_page_token'] or 0) + 1}}" + size: "{{config['page_size']}}" + sort: + "{{config['sort_by']}}": "{{config['sort_value']}}" + domain: "{{config['domain']}}" + filters: + "{{config['filter_by']}}": "{{config['filter_value']}}" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - result + - data + paginator: + type: DefaultPaginator + page_token_option: + type: RequestOption + inject_into: header + field_name: X-page + pagination_strategy: + type: CursorPagination + cursor_value: "{{response.result.summary_info.page}}" + stop_condition: "{{response.result.summary_info.page > config['pages_to_fetch'] - 1}}" + +metadata: + autoImportSchema: + Domain history: true + Domains summary: true + Domain keywords: true + Domain keywords by region: true + Domain competitors: true + Domain top pages: true diff --git a/airbyte-integrations/connectors/source-serpstat/source_serpstat/source.py b/airbyte-integrations/connectors/source-serpstat/source_serpstat/source.py new file mode 100644 index 0000000000000..4acdab3e2cec5 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/source_serpstat/source.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource + +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. + +WARNING: Do not modify this file. +""" + + +# Declarative Source +class SourceSerpstat(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "manifest.yaml"}) diff --git a/airbyte-integrations/connectors/source-serpstat/source_serpstat/spec.yaml b/airbyte-integrations/connectors/source-serpstat/source_serpstat/spec.yaml new file mode 100644 index 0000000000000..a41bbfe5ce514 --- /dev/null +++ b/airbyte-integrations/connectors/source-serpstat/source_serpstat/spec.yaml @@ -0,0 +1,87 @@ +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + type: object + required: + - api_key + properties: + api_key: + type: string + title: API Key + airbyte_secret: true + order: 0 + description: >- + Serpstat API key can be found here: + https://serpstat.com/users/profile/ + domain: + type: string + order: 1 + title: Domain + default: serpstat.com + description: The domain name to get data for (ex. serpstat.com) + page_size: + type: integer + order: 2 + title: Page size + default: 10 + description: >- + The number of data rows per page to be returned. Each data row can + contain multiple data points. The max value is 1000. Reducing the size + of the page will result in fewer API credits spent. + domains: + type: array + order: 3 + title: Domains + description: >- + The list of domains that will be used in streams that support batch + operations + filter_by: + type: string + order: 4 + title: Filter by + description: >- + The field name by which the results should be filtered. Filtering the + results will result in fewer API credits spent. Each stream has + different filtering options. See https://serpstat.com/api/ for more + details. + filter_value: + type: string + order: 5 + title: Filter value + description: >- + The value of the field to filter by. Each stream has different + filtering options. See https://serpstat.com/api/ for more details. + sort_by: + type: string + order: 6 + title: Sort by + description: >- + The field name by which the results should be sorted. Each stream has + different sorting options. See https://serpstat.com/api/ for more + details. + sort_value: + type: string + order: 7 + title: Sort value + description: >- + The value of the field to sort by. Each stream has different sorting + options. See https://serpstat.com/api/ for more details. + pages_to_fetch: + type: integer + order: 8 + title: Pages to fetch + default: 1 + description: >- + The number of pages that should be fetched. All results will be + obtained if left blank. Reducing the number of pages will result in + fewer API credits spent. + region_id: + type: string + order: 9 + title: Region ID + default: g_us + description: >- + The ID of a region to get data from in the form of a two-letter + country code prepended with the g_ prefix. See the list of supported + region IDs here: https://serpstat.com/api/664-request-parameters-v4/. + additionalProperties: true +documentationUrl: https://docs.airbyte.com/integrations/sources/serpstat \ No newline at end of file diff --git a/docs/integrations/sources/serpstat.md b/docs/integrations/sources/serpstat.md new file mode 100644 index 0000000000000..55ea29d539db3 --- /dev/null +++ b/docs/integrations/sources/serpstat.md @@ -0,0 +1,52 @@ +# Serpstat + +This page contains the setup guide and reference information for the Serpstat source connector. + +## Setup guide +### Step 1: Get Serpstat API key + +#### For new Serpstat users + +1. Create a new [Serpstat account](https://serpstat.com/signup/?utm_source=). +2. Go to [My account](https://serpstat.com/users/profile/) page and click **Get API key**. +3. Follow the instructions to get the API key. +4. Click **Copy** to copy the API key. + +#### For existing Serpstat users + +Go to [My account](https://serpstat.com/users/profile/) page and click **Copy** to copy the API key. + +### Step 2: Set up the Serpstat connector in Airbyte + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.io/workspaces) or Airbyte Open Source account. +2. Click **Sources** and then click **+ New source**. +3. On the **Set up the source** page, select **Serpstat** from the **Source type** dropdown. +4. Enter a name for your connector. +5. Enter the API key. +6. Expand **Optional fields** and fill them in. Each API response consumes API credits available to your Serpstat subscription plan. To limit the number of consumed API rows, decrease **Page size** and **Pages to fetch** options. +7. Click **Set up source**. + +## Supported sync modes + +The Serpstat source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): + +* Full refresh + +## Supported Streams + +* [Domains summary](https://serpstat.com/api/412-summarnij-otchet-po-domenu-v4-serpstatdomainproceduregetdomainsinfo/) +* [Domain history](https://serpstat.com/api/420-istoriya-po-domenu-v4-serpstatdomainproceduregetdomainshistory/) +* [Domain keywords](https://serpstat.com/api/584-top-search-engine-keywords-by-v4-domain-serpstatdomainproceduregetdomainkeywords/) +* [Domain keywords by region](https://serpstat.com/api/sorting-the-domain-by-keywords/) +* [Domain competitors](https://serpstat.com/api/590-domain-competitors-in-v4-search-result-serpstatdomainproceduregetcompetitors/) +* [Domain top pages](https://serpstat.com/api/588-domain-top-urls-v4-serpstatdomainproceduregettopurls/) + +## Performance considerations + +The maximum sync speed is limited by the number of requests per second per API key. See this limit in your [Serpstat account](https://serpstat.com/users/profile/). + +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------| :--------- | :------------------------------------------------------- | :-------------------------------------------------------------------------------------------- | +| 0.1.0 | 2023-08-21 | [28147](https://github.com/airbytehq/airbyte/pull/28147) | Release Serpstat Connector |