Skip to content

Commit 11b1357

Browse files
authored
Merge pull request #147 from DomainTools/IDEV-2013-update-help-text-and-readme
IDEV-2013: Update help text and readme
2 parents 50b8b84 + 20e3820 commit 11b1357

File tree

7 files changed

+136
-59
lines changed

7 files changed

+136
-59
lines changed

README.md

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,51 @@ Real-Time Threat Intelligence Feeds provide data on the different stages of the
223223
Custom parameters aside from the common `GET` Request parameters:
224224
- `endpoint` (choose either `download` or `feed` API endpoint - default is `feed`)
225225
```python
226-
api = API(USERNAME, KEY)
226+
api = API(USERNAME, KEY, always_sign_api_key=False)
227227
api.nod(endpoint="feed", **kwargs)
228228
```
229229
- `header_authentication`: by default, we're using API Header Authentication. Set this False if you want to use API Key and Secret Authentication. Apparently, you can't use API Header Authentication for `download` endpoints so you need to set this to `False` when calling `download` API endpoints.
230230
```python
231-
api = API(USERNAME, KEY)
231+
api = API(USERNAME, KEY, always_sign_api_key=False)
232232
api.nod(header_authentication=False, **kwargs)
233233
```
234234
- `output_format`: (choose either `csv` or `jsonl` - default is `jsonl`). Cannot be used in `domainrdap` feeds. Additionally, `csv` is not available for `download` endpoints.
235235
```python
236-
api = API(USERNAME, KEY)
236+
api = API(USERNAME, KEY, always_sign_api_key=False)
237237
api.nod(output_format="csv", **kwargs)
238238
```
239+
240+
The Feed API standard access pattern is to periodically request the most recent feed data, as often as every 60 seconds. Specify the range of data you receive in one of two ways:
241+
242+
1. With `sessionID`: Make a call and provide a new `sessionID` parameter of your choosing. The API will return the last hour of data by default.
243+
- Each subsequent call to the API using your `sessionID` will return all data since the last.
244+
- Any single request returns a maximum of 10M results. Requests that exceed 10M results will return a HTTP 206 response code; repeat the same request (with the same `sessionID`) to receive the next tranche of data until receiving a HTTP 200 response code.
245+
2. Or, specify the time range in one of two ways:
246+
- Either an `after=-60` query parameter, where (in this example) -60 indicates the previous 60 seconds.
247+
- Or `after` and `before` query parameters for a time range, with each parameter accepting an ISO-8601 UTC formatted timestamp (a UTC date and time of the format YYYY-MM-DDThh:mm:ssZ)
248+
249+
## Handling iterative response from RTUF endpoints:
250+
251+
Since we may dealing with large feeds datasets, the python wrapper uses `generator` for efficient memory handling. Therefore, we need to iterate through the `generator` if we're accessing the partial results of the feeds data.
252+
253+
### Single request because the requested data is within the maximum result:
254+
```python
255+
from domaintools import API
256+
257+
api = API(USERNAME, KEY, always_sign_api_key=False)
258+
results = api.nod(sessionID="my-session-id", after=-60)
259+
260+
for result in results.response() # generator that holds NOD feeds data for the past 60 seconds and is expected to request only once
261+
# do things to result
262+
```
263+
264+
## Multiple requests because the requested data is more than the maximum result per request:
265+
```python
266+
from domaintools import API
267+
268+
api = API(USERNAME, KEY, always_sign_api_key=False)
269+
results = api.nod(sessionID="my-session-id", after=-7200)
270+
271+
for partial_result in results.response() # generator that holds NOD feeds data for the past 2 hours and is expected to request multiple times
272+
# do things to partial_result
273+
```

domaintools/base_results.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,9 @@ def setStatus(self, code, response=None):
182182
if callable(reason):
183183
reason = reason()
184184

185-
if code == 400:
185+
if code in (400, 422):
186186
raise BadRequestException(code, reason)
187-
elif code == 403:
187+
elif code in (401, 403):
188188
raise NotAuthorizedException(code, reason)
189189
elif code == 404:
190190
raise NotFoundException(code, reason)

domaintools/cli/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def _phisheye_termlist():
8585
c.FEEDS_NAD: "Returns back newly active domains feed.",
8686
c.FEEDS_NOD: "Returns back newly observed domains feed.",
8787
c.FEEDS_DOMAINRDAP: "Returns changes to global domain registration information, populated by the Registration Data Access Protocol (RDAP).",
88+
c.FEEDS_DOMAINDISCOVERY: "Returns new domains as they are either discovered in domain registration information, observed by our global sensor network, or reported by trusted third parties.",
8889
}
8990

9091

domaintools/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,12 @@ def validate_feeds_parameters(params):
177177
after = params.get("after")
178178
before = params.get("before")
179179
if not (sessionID or after or before):
180-
raise ValueError("sessionID or after or before must be defined")
180+
raise ValueError("sessionID or after or before must be provided")
181181

182182
format = params.get("output_format")
183-
if params.get("endpoint") == Endpoint.DOWNLOAD.value and format == OutputFormat.CSV.value:
183+
endpoint = params.get("endpoint")
184+
if endpoint == Endpoint.DOWNLOAD.value and format == OutputFormat.CSV.value:
184185
raise ValueError(f"{format} format is not available in {Endpoint.DOWNLOAD.value} API.")
186+
187+
if endpoint == Endpoint.DOWNLOAD.value and params.get("header_authentication", True):
188+
raise ValueError(f"{Endpoint.DOWNLOAD.value} API does not support header authentication. Provide api_key in the parameter")

tests/conftest.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,14 @@
11
"""Configuration for test environment"""
2+
3+
import pytest
4+
5+
6+
@pytest.fixture
7+
def test_feeds_params():
8+
return {
9+
"sessionID": "test-session-id",
10+
"after": -60,
11+
"before": -120,
12+
"output_format": "csv",
13+
"endpoint": "download",
14+
}

tests/test_api.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,3 +643,27 @@ def test_verify_response_is_a_generator():
643643
results = feeds_api.domaindiscovery(after="-60", header_authenticationn=False)
644644

645645
assert isgenerator(results.response())
646+
647+
648+
@vcr.use_cassette
649+
def test_feeds_endpoint_should_raise_error_if_download_api_using_header_auth():
650+
with pytest.raises(ValueError) as excinfo:
651+
feeds_api.domaindiscovery(after="-60", endpoint="download")
652+
653+
assert str(excinfo.value) == "download API does not support header authentication. Provide api_key in the parameter"
654+
655+
656+
@vcr.use_cassette
657+
def test_feeds_endpoint_should_raise_error_if_no_required_params():
658+
with pytest.raises(ValueError) as excinfo:
659+
feeds_api.domaindiscovery()
660+
661+
assert str(excinfo.value) == "sessionID or after or before must be provided"
662+
663+
664+
@vcr.use_cassette
665+
def test_feeds_endpoint_should_raise_error_if_asked_csv_format_for_download_api():
666+
with pytest.raises(ValueError) as excinfo:
667+
feeds_api.domaindiscovery(after="-60", output_format="csv", endpoint="download")
668+
669+
assert str(excinfo.value) == "csv format is not available in download API."

tests/test_utils.py

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import json
2+
import pytest
3+
24
from datetime import datetime, timedelta
35

46
from tests.responses import iris_investigate_data
@@ -15,24 +17,18 @@ def test_get_domain_age():
1517
def test_get_threat_component():
1618
threat_components = [
1719
{"name": "threat_profile_malware", "risk_score": 31},
18-
{"name": "threat_profile_spam", "risk_score": 73, "threats": ["spam"]}
20+
{"name": "threat_profile_spam", "risk_score": 73, "threats": ["spam"]},
1921
]
2022
result = utils.get_threat_component(threat_components, "threat_profile_malware")
2123
assert result.get("risk_score") == 31
2224

2325

2426
def test_investigate_average_risk_score():
25-
domains = [
26-
{"domain_risk": {"risk_score": 25}},
27-
{"domain_risk": {"risk_score": 27}}
28-
]
27+
domains = [{"domain_risk": {"risk_score": 25}}, {"domain_risk": {"risk_score": 27}}]
2928
result = utils.get_average_risk_score(domains)
3029
assert result == 26
3130

32-
domains = [
33-
{"domain_risk": {"risk_score": 25}},
34-
{}
35-
]
31+
domains = [{"domain_risk": {"risk_score": 25}}, {}]
3632
result = utils.get_average_risk_score(domains)
3733
assert result == 25
3834

@@ -42,17 +38,11 @@ def test_investigate_average_risk_score():
4238

4339

4440
def test_detect_average_risk_score():
45-
domains = [
46-
{"risk_score": 25},
47-
{"risk_score": 27}
48-
]
41+
domains = [{"risk_score": 25}, {"risk_score": 27}]
4942
result = utils.get_average_risk_score(domains)
5043
assert result == 26
5144

52-
domains = [
53-
{"risk_score": 25},
54-
{"risk_score": None}
55-
]
45+
domains = [{"risk_score": 25}, {"risk_score": None}]
5646
result = utils.get_average_risk_score(domains)
5747
assert result == 25
5848

@@ -65,24 +55,15 @@ def test_investigate_average_age():
6555
two_days_ago = (datetime.now() - timedelta(days=2)).strftime("%Y-%m-%d")
6656
five_days_ago = (datetime.now() - timedelta(days=5)).strftime("%Y-%m-%d")
6757

68-
domains = [
69-
{"create_date": {"value": two_days_ago}},
70-
{"create_date": {"value": five_days_ago}}
71-
]
58+
domains = [{"create_date": {"value": two_days_ago}}, {"create_date": {"value": five_days_ago}}]
7259
result = utils.get_average_age(domains)
7360
assert result == 3
7461

75-
domains = [
76-
{"create_date": {"value": two_days_ago}},
77-
{}
78-
]
62+
domains = [{"create_date": {"value": two_days_ago}}, {}]
7963
result = utils.get_average_age(domains)
8064
assert result == 2
8165

82-
domains = [
83-
{"create_date": {"value": two_days_ago}},
84-
{"create_date": {"value": ""}}
85-
]
66+
domains = [{"create_date": {"value": two_days_ago}}, {"create_date": {"value": ""}}]
8667
result = utils.get_average_age(domains)
8768
assert result == 2
8869

@@ -94,17 +75,11 @@ def test_investigate_average_age():
9475
def test_detect_average_age():
9576
two_days_ago = int((datetime.now() - timedelta(days=2)).strftime("%Y%m%d"))
9677
five_days_ago = int((datetime.now() - timedelta(days=5)).strftime("%Y%m%d"))
97-
domains = [
98-
{"create_date": two_days_ago},
99-
{"create_date": five_days_ago}
100-
]
78+
domains = [{"create_date": two_days_ago}, {"create_date": five_days_ago}]
10179
result = utils.get_average_age(domains)
10280
assert result == 3
10381

104-
domains = [
105-
{"create_date": two_days_ago},
106-
{"create_date": None}
107-
]
82+
domains = [{"create_date": two_days_ago}, {"create_date": None}]
10883
result = utils.get_average_age(domains)
10984
assert result == 2
11085

@@ -121,26 +96,51 @@ def test_data_prune():
12196

12297
def test_find_emails():
12398
emails = utils.find_emails(json.dumps(iris_investigate_data.domaintools()))
124-
assert emails == {'abuse@enom.com', 'hostmaster@nsone.net'}
99+
assert emails == {"abuse@enom.com", "hostmaster@nsone.net"}
125100

126101

127102
def test_find_ips():
128103
ips = utils.find_ips(json.dumps(iris_investigate_data.domaintools()))
129-
assert ips == {'142.250.115.26',
130-
'142.250.141.27',
131-
'198.51.44.4',
132-
'198.51.44.68',
133-
'198.51.45.4',
134-
'198.51.45.68',
135-
'199.30.228.112',
136-
'64.233.171.26',
137-
'74.125.142.26'}
104+
assert ips == {
105+
"142.250.115.26",
106+
"142.250.141.27",
107+
"198.51.44.4",
108+
"198.51.44.68",
109+
"198.51.45.4",
110+
"198.51.45.68",
111+
"199.30.228.112",
112+
"64.233.171.26",
113+
"74.125.142.26",
114+
}
115+
138116

139117
def test_get_pivots():
140118
pivots = utils.get_pivots(iris_investigate_data.domaintools().get("results"), "")
141-
assert pivots == [
142-
['IP ADDRESS', ('199.30.228.112', 4)],
143-
['IP ASN', (17318, 111)],
144-
['IP ISP', ('DomainTools LLC', 222)]
145-
]
119+
assert pivots == [["IP ADDRESS", ("199.30.228.112", 4)], ["IP ASN", (17318, 111)], ["IP ISP", ("DomainTools LLC", 222)]]
120+
121+
122+
def test_validate_feeds_parameters_should_raise_error_if_download_api_using_header_auth(test_feeds_params):
123+
test_feeds_params["output_format"] = "jsonl"
124+
125+
with pytest.raises(ValueError) as excinfo:
126+
utils.validate_feeds_parameters(test_feeds_params)
127+
128+
assert str(excinfo.value) == "download API does not support header authentication. Provide api_key in the parameter"
129+
130+
131+
def test_validate_feeds_parameters_should_raise_error_if_no_required_params(test_feeds_params):
132+
test_feeds_params.pop("sessionID", None)
133+
test_feeds_params.pop("after", None)
134+
test_feeds_params.pop("before", None)
135+
136+
with pytest.raises(ValueError) as excinfo:
137+
utils.validate_feeds_parameters(test_feeds_params)
138+
139+
assert str(excinfo.value) == "sessionID or after or before must be provided"
140+
141+
142+
def test_validate_feeds_parameters_should_raise_error_if_asked_csv_format_for_download_api(test_feeds_params):
143+
with pytest.raises(ValueError) as excinfo:
144+
utils.validate_feeds_parameters(test_feeds_params)
146145

146+
assert str(excinfo.value) == "csv format is not available in download API."

0 commit comments

Comments
 (0)