Skip to content

Commit

Permalink
TI Providers - Az Sentinel BYOTI (#23)
Browse files Browse the repository at this point in the history
* Initial code for BYOTI provider. Plus tidying module references.

* kql_base.py provider for BYOTI

* BYOTI provider with unit tests.

Fixes for mypy warnings

* Black formatting

* Re-wrote test_tiproviders to use mocked http requests.  Simplified tests to make it easy to add new types.

Now also tests lookup_iocs for http providers.

* Bug fixes and TIProviders.ipynb Usage notebook

* Documentation updates

* Renaming AzureSentinelByoti to AzSTI.

Fixing a couple of bugs foundin network queries and geoip.py

* Few more bugs and typos found in testing
  • Loading branch information
ianhelle committed Aug 23, 2019
1 parent e114eb7 commit 95fc399
Show file tree
Hide file tree
Showing 42 changed files with 14,805 additions and 416 deletions.
12,113 changes: 12,113 additions & 0 deletions docs/notebooks/TIProviders.ipynb

Large diffs are not rendered by default.

1,182 changes: 1,182 additions & 0 deletions docs/source/TIProviders.rst

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions docs/source/otherdocs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ Other Documents

CollectingLinuxAuditLogs.rst

TIProviders.rst

19 changes: 16 additions & 3 deletions docs/source/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ DataFrame (for dataframe input).

`IoCExtract Usage Notebook <https://github.com/Microsoft/msticpy/docs/notebooks/IoCExtract.ipynb>`__

tiproviders
~~~~~~~~~~~

:py:mod:`msticpy.sectools.tilookup`

The TILookup class can lookup IoCs across multiple TI providers. builtin
providers include AlienVault OTX, IBM XForce, VirusTotal and Azure Sentinel.

The input can be a single IoC observable or a pandas DataFrame containing
multiple observables. Depending on the provider, you may require an account
and an API key. Some providers also enforce throttling (especially for free
tiers), which might affect performing bulk lookups.

For more details see `TIProviders.rst <TIProviders.rst>`__ and
`TILookup Usage Notebook <https://github.com/Microsoft/msticpy/docs/notebooks/TIProviders.ipynb>`__

vtlookup
~~~~~~~~

Expand Down Expand Up @@ -191,9 +207,6 @@ will be refactored to separate them into their own sub-package.
To-Do Items
-----------

- Refactor data modules into separate package.
- Replace custom data schema with
`Intake <https://intake.readthedocs.io/en/latest/>`__.
- Add additional notebooks to document use of the tools.

Supported Platforms and Packages
Expand Down
2 changes: 1 addition & 1 deletion msticpy/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Version file."""
VERSION = "0.2.4"
VERSION = "0.2.5"
84 changes: 68 additions & 16 deletions msticpy/data/data_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"""Data provider loader."""
from functools import partial
from pathlib import Path
from typing import Union, Any, List, Dict
from typing import Union, Any, List, Dict, Optional
import warnings

import pandas as pd

Expand Down Expand Up @@ -34,7 +35,7 @@ class AttribHolder:
"""Empty class used to create hierarchical attributes."""

def __len__(self):
"""Retrun number of items in the attribute collection."""
"""Return number of items in the attribute collection."""
return len(self.__dict__)

def __iter__(self):
Expand All @@ -52,7 +53,7 @@ class QueryProvider:
"""

def __init__(
def __init__( # noqa: MC001
self, data_environment: Union[str, DataEnvironment], driver: DriverBase = None
):
"""
Expand Down Expand Up @@ -96,19 +97,26 @@ def __init__(

self._query_provider = driver

settings: Dict[str, str] = config.settings.get( # type: ignore
settings: Dict[str, Any] = config.settings.get( # type: ignore
"QueryDefinitions"
) # type: ignore
query_paths = []
for default_path in settings.get("Default"):
query_paths.append(Path(__file__).resolve().parent.joinpath(default_path))
for default_path in settings.get("Default"): # type: ignore
qry_path = self._resolve_path(default_path)
if qry_path:
query_paths.append(qry_path)

if settings.get("Custom") is not None:
for custom_path in settings.get("Custom"):
query_paths.append(
Path(__file__).resolve().parent.joinpath(custom_path)
)

for custom_path in settings.get("Custom"): # type: ignore
qry_path = self._resolve_path(custom_path)
if qry_path:
query_paths.append(qry_path)

if not query_paths:
raise RuntimeError(
"No valid query definition files found. "
+ "Please check your msticpyconfig.yaml settings."
)
data_environments = QueryStore.import_files(
source_path=query_paths, recursive=True
)
Expand All @@ -117,6 +125,15 @@ def __init__(
self.all_queries = AttribHolder()
self._add_query_functions()

def __getattr__(self, name):
"""Return the value of the named property 'name'."""
if "." in name:
parent_name, child_name = name.split(".", maxsplit=1)
parent = getattr(self, parent_name, None)
if parent:
return getattr(parent, child_name)
raise AttributeError(f"{name} is not a valid attribute.")

def connect(self, connection_str: str, **kwargs):
"""
Connect to data source.
Expand Down Expand Up @@ -155,6 +172,32 @@ def connection_string(self) -> str:
"""
return self._query_provider.current_connection

@property
def schema(self) -> Dict[str, Dict]:
"""
Return current data schema of connection.
Returns
-------
Dict[str, Dict]
Data schema of current connection.
"""
return self._query_provider.schema

@property
def schema_tables(self) -> List[str]:
"""
Return list of tables in the data schema of the connection.
Returns
-------
List[str]
Tables in the of current connection.
"""
return list(self._query_provider.schema.keys())

def import_query_file(self, query_file: str):
"""
Import a yaml data source definition.
Expand All @@ -179,9 +222,9 @@ def list_data_environments(cls) -> List[str]:
List of current data environments
"""
return list(DataEnvironment.__members__)
return [env for env in DataEnvironment.__members__ if env != "Unknown"]

def list_queries(self):
def list_queries(self) -> List[str]:
"""
Return list of family.query in the store.
Expand All @@ -191,7 +234,7 @@ def list_queries(self):
List of queries
"""
return self._query_store.query_names
return list(self._query_store.query_names)

def query_help(self, query_name):
"""Print help for query."""
Expand Down Expand Up @@ -239,7 +282,7 @@ def _execute_query(self, *args, **kwargs) -> Union[pd.DataFrame, Any]:
raise ValueError(f"No values found for these parameters: {missing}")

query_str = query_source.create_query(**params)
if "print" in args:
if "print" in args or "query" in args:
return query_str
return self._query_provider.query(query_str)

Expand All @@ -257,8 +300,17 @@ def _add_query_functions(self):
self._execute_query, data_family=family, query_name=query_name
)
query_func.__doc__ = self._query_store.get_query(
family, query_name
data_family=family, query_name=query_name
).create_doc_string()

setattr(query_family, query_name, query_func)
setattr(self.all_queries, query_name, query_func)

@classmethod
def _resolve_path(cls, config_path: str) -> Optional[str]:
if not Path(config_path).is_absolute():
config_path = str(Path(__file__).resolve().parent.joinpath(config_path))
if not Path(config_path).is_dir():
warnings.warn(f"Custom query definitions path {config_path} not found")
return None
return config_path
15 changes: 14 additions & 1 deletion msticpy/data/drivers/driver_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""Data driver base class."""
import abc
from abc import ABC
from typing import Tuple, Any, Union
from typing import Tuple, Any, Union, Dict

import pandas as pd

Expand Down Expand Up @@ -63,6 +63,19 @@ def connected(self) -> bool:
"""
return self._loaded

@property
def schema(self) -> Dict[str, Dict]:
"""
Return current data schema of connection.
Returns
-------
Dict[str, Dict]
Data schema of current connection.
"""
return {}

@abc.abstractmethod
def connect(self, connection_str: str, **kwargs):
"""
Expand Down
33 changes: 30 additions & 3 deletions msticpy/data/drivers/kql_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# license information.
# --------------------------------------------------------------------------
"""KQL Driver class."""
from typing import Tuple, Union, Any
from typing import Tuple, Union, Any, Dict

import pandas as pd
from IPython import get_ipython
Expand Down Expand Up @@ -44,6 +44,8 @@ def __init__(self, connection_str: str = None, **kwargs):
self.current_connection = connection_str
self.connect(connection_str)

self._schema: Dict[str, Any] = {}

def connect(self, connection_str: str, **kwargs):
"""
Connect to data source.
Expand All @@ -57,8 +59,22 @@ def connect(self, connection_str: str, **kwargs):
self.current_connection = connection_str
result = self._ip.run_cell_magic("kql", line="", cell=connection_str)
self._connected = True
self._schema = self._get_schema()
return result

@property
def schema(self) -> Dict[str, Dict]:
"""
Return current data schema of connection.
Returns
-------
Dict[str, Dict]
Data schema of current connection.
"""
return self._schema

def query(self, query: str) -> Union[pd.DataFrame, Any]:
"""
Execute query string and return DataFrame of results.
Expand All @@ -78,6 +94,7 @@ def query(self, query: str) -> Union[pd.DataFrame, Any]:
data, result = self.query_with_results(query)
return data if data is not None else result

# pylint: disable=too-many-branches
def query_with_results(self, query: str) -> Tuple[pd.DataFrame, Any]:
"""
Execute query string and return DataFrame of results.
Expand Down Expand Up @@ -119,14 +136,21 @@ def query_with_results(self, query: str) -> Tuple[pd.DataFrame, Any]:
if result is not None:
if isinstance(result, pd.DataFrame):
return result, None
if result and result.completion_query_info["StatusCode"] == 0:
if (
hasattr(result, "completion_query_info")
and result.completion_query_info["StatusCode"] == 0
):
data_frame = result.to_dataframe()
if result.is_partial_table:
print("Warning - query returned partial results.")
return data_frame, result

print("Warning - query did not complete successfully.")
print("Kql ResultSet returned - check 'completion_query_info' property.")
if hasattr(result, "completion_query_info"):
print(
result.completion_query_info["StatusCode"],
"(code: {}".format(result.completion_query_info["StatusCode"]),
)
return None, result

def _load_kql_magic(self):
Expand All @@ -142,3 +166,6 @@ def _is_kqlmagic_loaded(self) -> bool:
if self._ip is not None:
return self._ip.find_magic("kql") is not None
return False

def _get_schema(self) -> Dict[str, Dict]:
return self._ip.run_line_magic("kql", line="--schema")

0 comments on commit 95fc399

Please sign in to comment.