Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions domaintools/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from hmac import new as hmac
import re

from domaintools.constants import Endpoint, ENDPOINT_TO_SOURCE_MAP, OutputFormat
from domaintools._version import current as version
from domaintools.results import (
GroupedIterable,
Expand All @@ -18,6 +19,8 @@
filter_by_field,
DTResultFilter,
)
from domaintools.utils import validate_feeds_parameters


AVAILABLE_KEY_SIGN_HASHES = ["sha1", "sha256", "md5"]

Expand Down Expand Up @@ -1088,15 +1091,29 @@ def nad(self, **kwargs):

def domainrdap(self, **kwargs):
"""Returns changes to global domain registration information, populated by the Registration Data Access Protocol (RDAP)"""
sessionID = kwargs.get("sessionID")
after = kwargs.get("after")
before = kwargs.get("before")
if not (sessionID or after or before):
raise ValueError("sessionID or after or before must be defined")
validate_feeds_parameters(kwargs)
endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
source = ENDPOINT_TO_SOURCE_MAP.get(endpoint)

return self._results(
f"domain-registration-data-access-protocol-feed-({source.value})",
f"v1/{endpoint}/domainrdap/",
response_path=(),
**kwargs,
)

def domaindiscovery(self, **kwargs):
"""Returns new domains as they are either discovered in domain registration information, observed by our global sensor network, or reported by trusted third parties"""
validate_feeds_parameters(kwargs)
endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
source = ENDPOINT_TO_SOURCE_MAP.get(endpoint)
if endpoint == Endpoint.DOWNLOAD.value or kwargs.get("output_format", OutputFormat.JSONL.value) != OutputFormat.CSV.value:
# headers param is allowed only in Feed API and CSV format
kwargs.pop("headers", None)

return self._results(
"domain-registration-data-access-protocol-feed-(api)",
"v1/feed/domainrdap/",
f"real-time-domain-discovery-feed-({source.value})",
f"v1/{endpoint}/domaindiscovery/",
response_path=(),
**kwargs,
)
43 changes: 42 additions & 1 deletion domaintools/base_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import re
import time
import logging

from copy import deepcopy
from datetime import datetime
from httpx import Client

from domaintools.constants import OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
from domaintools.exceptions import (
BadRequestException,
InternalServerErrorException,
Expand All @@ -18,7 +22,6 @@
)
from domaintools.utils import get_feeds_products_list

from httpx import Client

try: # pragma: no cover
from collections.abc import MutableMapping, MutableSequence
Expand Down Expand Up @@ -90,6 +93,18 @@ def _make_request(self):
patch_data = self.kwargs.copy()
patch_data.update(self.api.extra_request_params)
return session.patch(url=self.url, json=patch_data)
elif self.product in get_feeds_products_list():
parameters = deepcopy(self.kwargs)
parameters.pop("output_format", None)
parameters.pop(
"format", None
) # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
headers = {}
if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT

return session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
else:
return session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)

Expand Down Expand Up @@ -259,6 +274,32 @@ def json(self):
**self.kwargs,
)

@property
def jsonl(self):
self.kwargs.pop("format", None)
return self.__class__(
format="jsonl",
product=self.product,
url=self.url,
items_path=self.items_path,
response_path=self.response_path,
api=self.api,
**self.kwargs,
)

@property
def csv(self):
self.kwargs.pop("format", None)
return self.__class__(
format="csv",
product=self.product,
url=self.url,
items_path=self.items_path,
response_path=self.response_path,
api=self.api,
**self.kwargs,
)

@property
def xml(self):
self.kwargs.pop("format", None)
Expand Down
23 changes: 22 additions & 1 deletion domaintools/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Optional, Dict, Tuple
from rich.progress import Progress, SpinnerColumn, TextColumn

from domaintools.constants import Endpoint, OutputFormat
from domaintools.api import API
from domaintools.exceptions import ServiceException
from domaintools.cli.utils import get_file_extension
Expand All @@ -32,6 +33,20 @@ def validate_format_input(value: str):
raise typer.BadParameter(f"{value} is not in available formats: {VALID_FORMATS}")
return value

@staticmethod
def validate_feeds_format_input(value: str):
VALID_FEEDS_FORMATS = ("jsonl", "csv")
if value not in VALID_FEEDS_FORMATS:
raise typer.BadParameter(f"{value} is not in available formats: {VALID_FEEDS_FORMATS}")
return value

@staticmethod
def validate_endpoint_input(value: str):
VALID_ENDPOINTS = (Endpoint.FEED.value, Endpoint.DOWNLOAD.value)
if value not in VALID_ENDPOINTS:
raise typer.BadParameter(f"{value} is not in available endpoints: {VALID_ENDPOINTS}")
return value

@staticmethod
def validate_after_or_before_input(value: str):
if value is None or value.replace("-", "").isdigit():
Expand Down Expand Up @@ -152,7 +167,13 @@ def run(cls, name: str, params: Optional[Dict] = {}, **kwargs):
"""
try:
rate_limit = params.pop("rate_limit", False)
response_format = params.pop("format", "json")
response_format = (
params.pop("format", "json")
if params.get("format", None)
else params.get(
"output_format", OutputFormat.JSONL.value
) # Using output_format for RTUF endpoints to separate from other endpoints. This will be needed further along the process
)
out_file = params.pop("out_file", sys.stdout)
verify_ssl = params.pop("no_verify_ssl", False)
always_sign_api_key = params.pop("no_sign_api_key", False)
Expand Down
83 changes: 83 additions & 0 deletions domaintools/cli/commands/feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from domaintools.cli.api import DTCLICommand
from domaintools.cli.utils import get_cli_helptext_by_name
from domaintools.cli import constants as c
from domaintools.constants import Endpoint, OutputFormat


@dt_cli.command(
Expand Down Expand Up @@ -158,6 +159,13 @@ def feeds_domainrdap(
"--no-sign-api-key",
help="Skip signing of api key",
),
endpoint: str = typer.Option(
Endpoint.FEED.value,
"-e",
"--endpoint",
help=f"Valid endpoints: [{Endpoint.FEED.value}, {Endpoint.DOWNLOAD.value}]",
callback=DTCLICommand.validate_endpoint_input,
),
sessionID: str = typer.Option(
None,
"--session-id",
Expand Down Expand Up @@ -188,3 +196,78 @@ def feeds_domainrdap(
),
):
DTCLICommand.run(name=c.FEEDS_DOMAINRDAP, params=ctx.params)


@dt_cli.command(
name=c.FEEDS_DOMAINDISCOVERY,
help=get_cli_helptext_by_name(command_name=c.FEEDS_DOMAINDISCOVERY),
)
def feeds_domaindiscovery(
ctx: typer.Context,
user: str = typer.Option(None, "-u", "--user", help="Domaintools API Username."),
key: str = typer.Option(None, "-k", "--key", help="DomainTools API key"),
creds_file: str = typer.Option(
"~/.dtapi",
"-c",
"--credfile",
help="Optional file with API username and API key, one per line.",
),
no_verify_ssl: bool = typer.Option(
False,
"--no-verify-ssl",
help="Skip verification of SSL certificate when making HTTPs API calls",
),
no_sign_api_key: bool = typer.Option(
False,
"--no-sign-api-key",
help="Skip signing of api key",
),
output_format: str = typer.Option(
"jsonl",
"-f",
"--format",
help=f"Output format in [{OutputFormat.JSONL.value}, {OutputFormat.CSV.value}]",
callback=DTCLICommand.validate_feeds_format_input,
),
endpoint: str = typer.Option(
Endpoint.FEED.value,
"-e",
"--endpoint",
help=f"Valid endpoints: [{Endpoint.FEED.value}, {Endpoint.DOWNLOAD.value}]",
callback=DTCLICommand.validate_endpoint_input,
),
sessionID: str = typer.Option(
None,
"--session-id",
help="Unique identifier for the session",
),
after: str = typer.Option(
None,
"--after",
help="Start of the time window, relative to the current time in seconds, for which data will be provided",
callback=DTCLICommand.validate_after_or_before_input,
),
before: str = typer.Option(
None,
"--before",
help="The end of the query window in seconds, relative to the current time, inclusive",
callback=DTCLICommand.validate_after_or_before_input,
),
domain: str = typer.Option(
None,
"-d",
"--domain",
help="A string value used to filter feed results",
),
headers: bool = typer.Option(
False,
"--headers",
help="Adds a header to the first line of response when text/csv is set in header parameters",
),
top: str = typer.Option(
None,
"--top",
help="Number of results to return in the response payload. This is ignored in download endpoint",
),
):
DTCLICommand.run(name=c.FEEDS_DOMAINDISCOVERY, params=ctx.params)
1 change: 1 addition & 0 deletions domaintools/cli/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@
FEEDS_NAD = "nad"
FEEDS_NOD = "nod"
FEEDS_DOMAINRDAP = "domainrdap"
FEEDS_DOMAINDISCOVERY = "domaindiscovery"
24 changes: 24 additions & 0 deletions domaintools/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from enum import Enum


class Endpoint(Enum):
FEED = "feed"
DOWNLOAD = "download"


class Source(Enum):
API = "api"
S3 = "s3"


class OutputFormat(Enum):
JSONL = "jsonl"
CSV = "csv"


HEADER_ACCEPT_KEY_CSV_FORMAT = "text/csv"

ENDPOINT_TO_SOURCE_MAP = {
Endpoint.FEED.value: Source.API,
Endpoint.DOWNLOAD.value: Source.S3,
}
18 changes: 17 additions & 1 deletion domaintools/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from datetime import datetime

from typing import Optional

from domaintools.constants import Endpoint, OutputFormat

import re


Expand Down Expand Up @@ -176,4 +177,19 @@ def get_feeds_products_list():
"newly-active-domains-feed-(api)",
"newly-observed-domains-feed-(api)",
"domain-registration-data-access-protocol-feed-(api)",
"domain-registration-data-access-protocol-feed-(s3)",
"real-time-domain-discovery-feed-(api)",
"real-time-domain-discovery-feed-(s3)",
]


def validate_feeds_parameters(params):
sessionID = params.get("sessionID")
after = params.get("after")
before = params.get("before")
if not (sessionID or after or before):
raise ValueError("sessionID or after or before must be defined")

format = params.get("output_format")
if params.get("endpoint") == Endpoint.DOWNLOAD.value and format == OutputFormat.CSV.value:
raise ValueError(f"{format} format is not available in {Endpoint.DOWNLOAD.value} API.")
18 changes: 16 additions & 2 deletions domaintools_async/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Adds async capabilities to the base product object"""

import asyncio

from copy import deepcopy
from httpx import AsyncClient

from domaintools.base_results import Results

from domaintools.exceptions import ServiceUnavailableException, ServiceException
from domaintools.constants import OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
from domaintools.exceptions import ServiceUnavailableException
from domaintools.utils import get_feeds_products_list


class _AIter(object):
Expand Down Expand Up @@ -49,6 +52,17 @@ async def _make_async_request(self, session):
patch_data = self.kwargs.copy()
patch_data.update(self.api.extra_request_params)
results = await session.patch(url=self.url, json=patch_data)
elif self.product in get_feeds_products_list():
parameters = deepcopy(self.kwargs)
parameters.pop("output_format", None)
parameters.pop(
"format", None
) # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
headers = {}
if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
results = await session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
else:
results = await session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
if results:
Expand Down
Loading