# DomainCAT: Domain Connectivity Analysis Tool

### Analyzing the domain to domain connectivity of an Iris API Search

In [None]:
# Run This First: imports all the helper functions and sets stuff up
%run domain_cat_module.py

print("DomainCAT is ready to go")

## Iris REST API Credentials

In [None]:
api_username_ui = widgets.Text(placeholder='Iris API Username', description='Username:', layout={'width': '500px'}, value="")
api_pw_ui = widgets.Password(placeholder='Iris API Password', description='Password:', layout={'width': '500px'}, value="")
widgets.VBox([api_username_ui, api_pw_ui])

## Query Domain Data From Iris Investigate API

Enter either a list of return delimited domains into the Domains text box, _OR_ an Iris search hash into the hash text box.

Note: if both a list of domains _AND_ a search hash is entered, the liast of domains will be queried and the search hash will be ignored

In [None]:
domain_list_ui = widgets.Textarea(placeholder='Enter list of domains', description='Domains:', layout={'height': '300px', 'width': '700px'}) 
search_hash_ui = widgets.Text(placeholder='Enter list of domains', description='Hash:', layout={'width': '700px'})
show_iris_query_ui(domain_list_ui, search_hash_ui)

In [None]:
# Data Loading Config
query_api = True
save_search_to_disk = False
json_file_path = "data/dash_gov_dot_us.json"

if query_api:
    iris_results = query_iris_rest_api(api_username_ui, api_pw_ui, domain_list_ui, search_hash_ui)
    print(f'Iris API returned {len(iris_results)} domains')

    # save search results to disk to be used later
    if save_search_to_disk:
        with open(json_file_path, 'w') as f:
            json.dump(iris_results, f)
else:
    with open(json_file_path) as json_data:
        iris_results = json.loads(json_data.read())

    print(f'Loaded {len(iris_results)} domains from {json_file_path}')

## DomainCAT Configuration

Please refer to the DomainCAT documentation for details about these configuration options

In [None]:
config = Config()

# only analyze domains that are active (currently registered)
config.active_domains_only = True

# config for pivoting on matching substrings. Only matching substrings this long or longer will be used to create a pivot
config.longest_common_substring = 6

# List of substrings to ignore when creating pivots by matching substrings
config.ignore_substrings = []

# use the pivot count to scale how important the pivot is during graph layout. Smaller pivot counts is more influence, and vice versa
config.scale_edge_strength_by_pivot_count = True

# Global pivot count threshold. Any pivot with more than this value is discarded. sys.maxsize effectivly keeps all pivots
config.global_count_threshold = sys.maxsize

# The smallest pivot count size to use. Default of 2 means no pivots are filtered out because it's count is too low
config.min_pivot_size = 2

# theoretical max pivot size for calculating edge strengths
config.max_domains = 100000000

# If True DomainCAT will print out some debug info while building the connected graph of domains
config.print_debug_output = False

## Choose Which Pivots To Use & Build Domain Graph


In [None]:
pivot_category_config = {
    "adsense",
    "google_analytics",
    "create_date",
    "redirect_domain",
    "registrar",
    "ip_address",
    "ip_country_code",
    "ip_isp",
    "ip_asn",
    "ssl_hash",
    "ssl_subject",
    "ssl_org",
    "ssl_email",
    
#     # Note: commented out ns_host and ns_ip because they double count ns connectedness when used with ns_domain. 
    "ns_domain",
#     "ns_host",  
    "ns_ip",  
    
#     # Note: commented out mx_host and mx_ip because they double counts mx connectedness when used with mx_domain    
    "mx_domain",
#     "mx_host",
    "mx_ip", 
    
    "tld",
    "longest_common_substring",
}

# Build the domain pivot graph structure
config.pivot_category_config = pivot_category_config
graph, pivot_categories, trimmed_domains = build_domain_pivot_graph(iris_results, config)

## Trimmed Domains

In [None]:
print_trimmed_domains = True
if print_trimmed_domains:
    if len(trimmed_domains["unconnected"]) > 0:
        print("trimmed unconnected domains:")
        for domain in trimmed_domains["unconnected"]: print(f"  {domain}")
    if len(trimmed_domains["create_date"]) > 0:
        print("\ntrimmed domains with only create date pivot:")
        for domain in trimmed_domains["create_date"]: print(f"  {domain}")

## Draw the Domain Graph in an Interactive 3D Layout

In [None]:
build_3d_graph_layout(graph)

In [None]:
build_3d_graph_layout(graph)

In [None]:
build_3d_graph_layout(graph)

## Calculate & Show Pivot Statistics

In [None]:
# Calculate a bunch of pivot statistics to see how well connected all the domains in the search result are
calc_pivot_stats(graph, pivot_categories)

## Draw the Domain Graph in an Interactive 2D Layout

In [None]:
# calculate the pivots shared in commmon across all selected domains
shared_pivots = {}
def get_2d_shared_pivots(graph, selected_domains):
    global shared_pivots
    shared_pivots = get_shared_pivots(graph, selected_domains)
    
build_2d_graph_layout(graph, get_2d_shared_pivots)

## Heatmap of which pivots connect the most domains together: by pivot category

In [None]:
if len(shared_pivots) == 0:
    print("Select a set of domains in the 2D graph")
else:
    create_pivot_heatmaps(shared_pivots)

## Removing domains from the graph

Sometimes you find disconnected domains in the 3D graph visualization that make pivoting the viz really annoying. To remove domains from the graph, enter the domain(s) you want removed in the text box below and run the second cell. This will remove the domains from the graph structure without having to requery the data.

After you do this, re-run the 3D viz and the domains should be gone.

In [None]:
remove_domains_ui = widgets.Textarea(placeholder='Enter domains to remove from graph', description='Domains:', layout={'height': '100px', 'width': '700px'}) 
remove_domains_ui

In [None]:
# Run this to remove the domains in the above text box from the graph
graph = remove_domains_from_graph(graph, remove_domains_ui)