DomainTools · jbabac · Jan 26, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/domaintools/api.py b/domaintools/api.py
@@ -3,6 +3,7 @@
 from hmac import new as hmac
 import re
 
+from domaintools.constants import Endpoint, ENDPOINT_TO_SOURCE_MAP, OutputFormat
 from domaintools._version import current as version
 from domaintools.results import (
     GroupedIterable,
@@ -18,6 +19,8 @@
     filter_by_field,
     DTResultFilter,
 )
+from domaintools.utils import validate_feeds_parameters
+
 
 AVAILABLE_KEY_SIGN_HASHES = ["sha1", "sha256", "md5"]
 
@@ -1088,15 +1091,29 @@ def nad(self, **kwargs):
 
     def domainrdap(self, **kwargs):
         """Returns changes to global domain registration information, populated by the Registration Data Access Protocol (RDAP)"""
-        sessionID = kwargs.get("sessionID")
-        after = kwargs.get("after")
-        before = kwargs.get("before")
-        if not (sessionID or after or before):
-            raise ValueError("sessionID or after or before must be defined")
+        validate_feeds_parameters(kwargs)
+        endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
+        source = ENDPOINT_TO_SOURCE_MAP.get(endpoint)
+
+        return self._results(
+            f"domain-registration-data-access-protocol-feed-({source.value})",
+            f"v1/{endpoint}/domainrdap/",
+            response_path=(),
+            **kwargs,
+        )
+
+    def domaindiscovery(self, **kwargs):
+        """Returns new domains as they are either discovered in domain registration information, observed by our global sensor network, or reported by trusted third parties"""
+        validate_feeds_parameters(kwargs)
+        endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
+        source = ENDPOINT_TO_SOURCE_MAP.get(endpoint)
+        if endpoint == Endpoint.DOWNLOAD.value or kwargs.get("output_format", OutputFormat.JSONL.value) != OutputFormat.CSV.value:
+            # headers param is allowed only in Feed API and CSV format
+            kwargs.pop("headers", None)
 
         return self._results(
-            "domain-registration-data-access-protocol-feed-(api)",
-            "v1/feed/domainrdap/",
+            f"real-time-domain-discovery-feed-({source.value})",
+            f"v1/{endpoint}/domaindiscovery/",
             response_path=(),
             **kwargs,
         )
diff --git a/domaintools/base_results.py b/domaintools/base_results.py
@@ -4,8 +4,12 @@
 import re
 import time
 import logging
+
+from copy import deepcopy
 from datetime import datetime
+from httpx import Client
 
+from domaintools.constants import OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
 from domaintools.exceptions import (
     BadRequestException,
     InternalServerErrorException,
@@ -18,7 +22,6 @@
 )
 from domaintools.utils import get_feeds_products_list
 
-from httpx import Client
 
 try:  # pragma: no cover
     from collections.abc import MutableMapping, MutableSequence
@@ -90,6 +93,18 @@ def _make_request(self):
                 patch_data = self.kwargs.copy()
                 patch_data.update(self.api.extra_request_params)
                 return session.patch(url=self.url, json=patch_data)
+            elif self.product in get_feeds_products_list():
+                parameters = deepcopy(self.kwargs)
+                parameters.pop("output_format", None)
+                parameters.pop(
+                    "format", None
+                )  # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
+                headers = {}
+                if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
+                    parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
+                    headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
+
+                return session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
             else:
                 return session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
 
@@ -259,6 +274,32 @@ def json(self):
             **self.kwargs,
         )
 
+    @property
+    def jsonl(self):
+        self.kwargs.pop("format", None)
+        return self.__class__(
+            format="jsonl",
+            product=self.product,
+            url=self.url,
+            items_path=self.items_path,
+            response_path=self.response_path,
+            api=self.api,
+            **self.kwargs,
+        )
+
+    @property
+    def csv(self):
+        self.kwargs.pop("format", None)
+        return self.__class__(
+            format="csv",
+            product=self.product,
+            url=self.url,
+            items_path=self.items_path,
+            response_path=self.response_path,
+            api=self.api,
+            **self.kwargs,
+        )
+
     @property
     def xml(self):
         self.kwargs.pop("format", None)

diff --git a/domaintools/cli/api.py b/domaintools/cli/api.py
@@ -8,6 +8,7 @@
 from typing import Optional, Dict, Tuple
 from rich.progress import Progress, SpinnerColumn, TextColumn
 
+from domaintools.constants import Endpoint, OutputFormat
 from domaintools.api import API
 from domaintools.exceptions import ServiceException
 from domaintools.cli.utils import get_file_extension
@@ -32,6 +33,20 @@ def validate_format_input(value: str):
             raise typer.BadParameter(f"{value} is not in available formats: {VALID_FORMATS}")
         return value
 
+    @staticmethod
+    def validate_feeds_format_input(value: str):
+        VALID_FEEDS_FORMATS = ("jsonl", "csv")
+        if value not in VALID_FEEDS_FORMATS:
+            raise typer.BadParameter(f"{value} is not in available formats: {VALID_FEEDS_FORMATS}")
+        return value
+
+    @staticmethod
+    def validate_endpoint_input(value: str):
+        VALID_ENDPOINTS = (Endpoint.FEED.value, Endpoint.DOWNLOAD.value)
+        if value not in VALID_ENDPOINTS:
+            raise typer.BadParameter(f"{value} is not in available endpoints: {VALID_ENDPOINTS}")
+        return value
+
     @staticmethod
     def validate_after_or_before_input(value: str):
         if value is None or value.replace("-", "").isdigit():
@@ -152,7 +167,13 @@ def run(cls, name: str, params: Optional[Dict] = {}, **kwargs):
         """
         try:
             rate_limit = params.pop("rate_limit", False)
-            response_format = params.pop("format", "json")
+            response_format = (
+                params.pop("format", "json")
+                if params.get("format", None)
+                else params.get(
+                    "output_format", OutputFormat.JSONL.value
+                )  # Using output_format for RTUF endpoints to separate from other endpoints. This will be needed further along the process
+            )
             out_file = params.pop("out_file", sys.stdout)
             verify_ssl = params.pop("no_verify_ssl", False)
             always_sign_api_key = params.pop("no_sign_api_key", False)

diff --git a/domaintools/cli/commands/feeds.py b/domaintools/cli/commands/feeds.py
@@ -6,6 +6,7 @@
 from domaintools.cli.api import DTCLICommand
 from domaintools.cli.utils import get_cli_helptext_by_name
 from domaintools.cli import constants as c
+from domaintools.constants import Endpoint, OutputFormat
 
 
 @dt_cli.command(
@@ -158,6 +159,13 @@ def feeds_domainrdap(
         "--no-sign-api-key",
         help="Skip signing of api key",
     ),
+    endpoint: str = typer.Option(
+        Endpoint.FEED.value,
+        "-e",
+        "--endpoint",
+        help=f"Valid endpoints: [{Endpoint.FEED.value}, {Endpoint.DOWNLOAD.value}]",
+        callback=DTCLICommand.validate_endpoint_input,
+    ),
     sessionID: str = typer.Option(
         None,
         "--session-id",
@@ -188,3 +196,78 @@ def feeds_domainrdap(
     ),
 ):
     DTCLICommand.run(name=c.FEEDS_DOMAINRDAP, params=ctx.params)
+
+
+@dt_cli.command(
+    name=c.FEEDS_DOMAINDISCOVERY,
+    help=get_cli_helptext_by_name(command_name=c.FEEDS_DOMAINDISCOVERY),
+)
+def feeds_domaindiscovery(
+    ctx: typer.Context,
+    user: str = typer.Option(None, "-u", "--user", help="Domaintools API Username."),
+    key: str = typer.Option(None, "-k", "--key", help="DomainTools API key"),
+    creds_file: str = typer.Option(
+        "~/.dtapi",
+        "-c",
+        "--credfile",
+        help="Optional file with API username and API key, one per line.",
+    ),
+    no_verify_ssl: bool = typer.Option(
+        False,
+        "--no-verify-ssl",
+        help="Skip verification of SSL certificate when making HTTPs API calls",
+    ),
+    no_sign_api_key: bool = typer.Option(
+        False,
+        "--no-sign-api-key",
+        help="Skip signing of api key",
+    ),
+    output_format: str = typer.Option(
+        "jsonl",
+        "-f",
+        "--format",
+        help=f"Output format in [{OutputFormat.JSONL.value}, {OutputFormat.CSV.value}]",
+        callback=DTCLICommand.validate_feeds_format_input,
+    ),
+    endpoint: str = typer.Option(
+        Endpoint.FEED.value,
+        "-e",
+        "--endpoint",
+        help=f"Valid endpoints: [{Endpoint.FEED.value}, {Endpoint.DOWNLOAD.value}]",
+        callback=DTCLICommand.validate_endpoint_input,
+    ),
+    sessionID: str = typer.Option(
+        None,
+        "--session-id",
+        help="Unique identifier for the session",
+    ),
+    after: str = typer.Option(
+        None,
+        "--after",
+        help="Start of the time window, relative to the current time in seconds, for which data will be provided",
+        callback=DTCLICommand.validate_after_or_before_input,
+    ),
+    before: str = typer.Option(
+        None,
+        "--before",
+        help="The end of the query window in seconds, relative to the current time, inclusive",
+        callback=DTCLICommand.validate_after_or_before_input,
+    ),
+    domain: str = typer.Option(
+        None,
+        "-d",
+        "--domain",
+        help="A string value used to filter feed results",
+    ),
+    headers: bool = typer.Option(
+        False,
+        "--headers",
+        help="Adds a header to the first line of response when text/csv is set in header parameters",
+    ),
+    top: str = typer.Option(
+        None,
+        "--top",
+        help="Number of results to return in the response payload. This is ignored in download endpoint",
+    ),
+):
+    DTCLICommand.run(name=c.FEEDS_DOMAINDISCOVERY, params=ctx.params)
diff --git a/domaintools/cli/constants.py b/domaintools/cli/constants.py
@@ -47,3 +47,4 @@
 FEEDS_NAD = "nad"
 FEEDS_NOD = "nod"
 FEEDS_DOMAINRDAP = "domainrdap"
+FEEDS_DOMAINDISCOVERY = "domaindiscovery"
diff --git a/domaintools/constants.py b/domaintools/constants.py
@@ -0,0 +1,24 @@
+from enum import Enum
+
+
+class Endpoint(Enum):
+    FEED = "feed"
+    DOWNLOAD = "download"
+
+
+class Source(Enum):
+    API = "api"
+    S3 = "s3"
+
+
+class OutputFormat(Enum):
+    JSONL = "jsonl"
+    CSV = "csv"
+
+
+HEADER_ACCEPT_KEY_CSV_FORMAT = "text/csv"
+
+ENDPOINT_TO_SOURCE_MAP = {
+    Endpoint.FEED.value: Source.API,
+    Endpoint.DOWNLOAD.value: Source.S3,
+}
diff --git a/domaintools/utils.py b/domaintools/utils.py
@@ -1,7 +1,8 @@
 from datetime import datetime
-
 from typing import Optional
 
+from domaintools.constants import Endpoint, OutputFormat
+
 import re
 
 
@@ -176,4 +177,19 @@ def get_feeds_products_list():
         "newly-active-domains-feed-(api)",
         "newly-observed-domains-feed-(api)",
         "domain-registration-data-access-protocol-feed-(api)",
+        "domain-registration-data-access-protocol-feed-(s3)",
+        "real-time-domain-discovery-feed-(api)",
+        "real-time-domain-discovery-feed-(s3)",
     ]
+
+
+def validate_feeds_parameters(params):
+    sessionID = params.get("sessionID")
+    after = params.get("after")
+    before = params.get("before")
+    if not (sessionID or after or before):
+        raise ValueError("sessionID or after or before must be defined")
+
+    format = params.get("output_format")
+    if params.get("endpoint") == Endpoint.DOWNLOAD.value and format == OutputFormat.CSV.value:
+        raise ValueError(f"{format} format is not available in {Endpoint.DOWNLOAD.value} API.")
diff --git a/domaintools_async/__init__.py b/domaintools_async/__init__.py
@@ -1,11 +1,14 @@
 """Adds async capabilities to the base product object"""
 
 import asyncio
+
+from copy import deepcopy
 from httpx import AsyncClient
 
 from domaintools.base_results import Results
-
-from domaintools.exceptions import ServiceUnavailableException, ServiceException
+from domaintools.constants import OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
+from domaintools.exceptions import ServiceUnavailableException
+from domaintools.utils import get_feeds_products_list
 
 
 class _AIter(object):
@@ -49,6 +52,17 @@ async def _make_async_request(self, session):
             patch_data = self.kwargs.copy()
             patch_data.update(self.api.extra_request_params)
             results = await session.patch(url=self.url, json=patch_data)
+        elif self.product in get_feeds_products_list():
+            parameters = deepcopy(self.kwargs)
+            parameters.pop("output_format", None)
+            parameters.pop(
+                "format", None
+            )  # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
+            headers = {}
+            if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
+                parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
+                headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
+            results = await session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
         else:
             results = await session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
         if results: