In [5]:
import pickle
import pandas as pd
import datetime
import pytz
import cabby
from lxml import etree

class AVLNode:
    def __init__(self, key):
        self.key = key
        self.height = 1
        self.left = None
        self.right = None

class AVLTree:
    def __init__(self):
        self.root = None

    def height(self, node):
        if node is None:
            return 0
        return node.height

    def balance_factor(self, node):
        if node is None:
            return 0
        return self.height(node.left) - self.height(node.right)

    def rotate_right(self, y):
        x = y.left
        T2 = x.right

        x.right = y
        y.left = T2

        y.height = 1 + max(self.height(y.left), self.height(y.right))
        x.height = 1 + max(self.height(x.left), self.height(x.right))

        return x

    def rotate_left(self, x):
        y = x.right
        T2 = y.left

        y.left = x
        x.right = T2

        x.height = 1 + max(self.height(x.left), self.height(x.right))
        y.height = 1 + max(self.height(y.left), self.height(y.right))

        return y

    def insert(self, root, key):
        if root is None:
            return AVLNode(key)

        if key < root.key:
            root.left = self.insert(root.left, key)
        elif key > root.key:
            root.right = self.insert(root.right, key)
        else:
            return root

        root.height = 1 + max(self.height(root.left), self.height(root.right))

        balance = self.balance_factor(root)

        # Left Heavy
        if balance > 1:
            if key < root.left.key:
                return self.rotate_right(root)
            else:
                root.left = self.rotate_left(root.left)
                return self.rotate_right(root)

        # Right Heavy
        if balance < -1:
            if key > root.right.key:
                return self.rotate_left(root)
            else:
                root.right = self.rotate_right(root.right)
                return self.rotate_left(root)

        return root

    def insert_domain(self, key):
        self.root = self.insert(self.root, key)



    def search(self, root, key):
        if root is None or root.key == key:
            return root

        if key < root.key:
            return self.search(root.left, key)
        return self.search(root.right, key)

    def search_domain(self, key):
        return self.search(self.root, key)

    def inorder_traversal(self, root, result):
        if root:
            self.inorder_traversal(root.left, result)
            result.append(root.key)
            self.inorder_traversal(root.right, result)


    def save_to_file(self, filename):
        with open(filename, 'wb') as file:
            pickle.dump(self.root, file)
        print("Database Updated")

    def load_from_file(self, filename):
        try:
            with open(filename, 'rb') as file:
                self.root = pickle.load(file)
        except FileNotFoundError:
            pass  # File doesn't exist, start with an empty tree

    def search_result(self, key):
        search_result = self.search_domain(key)
        if search_result:
            print(f"{search_result.key} found!")
        else:
            print("Domain not found.")

    def build_and_save_new_tree(self):
        new_avl = AVLTree()
        new_avl.load_from_file('new_avl_tree.pkl')
        taxii_server = "https://otx.alienvault.com/taxii/poll"
        collection_name = "user_AlienVault"
        username = "0725b45940f32b2423097f41154bd111c94e526d10c3fac501356be02ceb436c"
        begin_date = datetime.datetime(2023, 12, 10, 0, 0, 0, tzinfo=pytz.utc)

        # Create a TAXII client
        client = cabby.create_client(discovery_url=taxii_server, version="1.1", headers={"username": username, "password": "abcd"})

        # Get the collection for polling
        collections = client.get_collections()
        collection = next((c for c in collections if c.name == collection_name), None)

        if collection:
            # Perform the poll request
            poll_result = client.poll(collection.name, begin_date=begin_date)

            # Process the poll result
            for content_block in poll_result:
                print("Received Content Block:")


                # Parse the content block as XML
                xml_content = etree.fromstring(content_block.content)

                # Extract the domain value
                namespace = {'DomainNameObj': 'http://cybox.mitre.org/objects#DomainNameObject-1'}
                domain_element = xml_content.find('.//DomainNameObj:Value', namespaces=namespace)
                if domain_element is not None:

                  domain_value = domain_element.text

                  if new_avl.search_domain(domain_value):
                    print("Already here not inserted")
                    continue
                  else:
                    new_avl.insert_domain(domain_value)
                    print(f"{domain_value} inserted in avl")
                else:
                  print("nothing here")


            new_avl.save_to_file('new_avl_tree.pkl')
            self.load_from_file('new_avl_tree.pkl')





In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
avl_tree_blacklist = AVLTree()
avl_tree_blacklist.load_from_file("/content/drive/MyDrive/lists/avl_tree_blacklist.pkl")
avl_tree_whitelist = AVLTree()
avl_tree_whitelist.load_from_file("/content/drive/MyDrive/lists/avl_tree_whitelist.pkl")

In [12]:
import time
start_time = time.time()
domain_to_classify = "meet-with-wyattdowling.ca"
if avl_tree_blacklist.search_result(domain_to_classify):
  result = {'class': 'BLC', 'ipv4' : None}
  print(result)
elif avl_tree_whitelist.search_result(domain_to_classify):
  result = {'class': 'LEG', 'ipv4' : 'x'}
  print(result)


end_time = time.time()
execution_time = end_time - start_time
print(f"Search took {execution_time:.9f} seconds")

meet-with-wyattdowling.ca found!
Domain not found.
Search took 0.002812862 seconds


In [4]:
!pip install cabby
!pip install taxii2-client

Collecting cabby
  Downloading cabby-0.1.23-py2.py3-none-any.whl (32 kB)
Collecting libtaxii>=1.1.111 (from cabby)
  Downloading libtaxii-1.1.119-py2.py3-none-any.whl (130 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.8/130.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from cabby)
  Downloading colorlog-6.8.0-py3-none-any.whl (11 kB)
Collecting furl>=0.4.7 (from cabby)
  Downloading furl-2.1.3-py2.py3-none-any.whl (20 kB)
Collecting orderedmultidict>=1.0.1 (from furl>=0.4.7->cabby)
  Downloading orderedmultidict-1.0.1-py2.py3-none-any.whl (11 kB)
Installing collected packages: orderedmultidict, colorlog, libtaxii, furl, cabby
Successfully installed cabby-0.1.23 colorlog-6.8.0 furl-2.1.3 libtaxii-1.1.119 orderedmultidict-1.0.1
Collecting taxii2-client
  Downloading taxii2_client-2.3.0-py2.py3-none-any.whl (24 kB)
Installing collected packages: taxii2-client
Successfully installed taxii2-client-2.3.0


In [8]:
updating_avl_tree = AVLTree()
updating_avl_tree.build_and_save_new_tree()

Received Content Block:
aerosunelectric.com inserted in avl
Received Content Block:
nothing here
Received Content Block:
rs.tdsclinical.com inserted in avl
Received Content Block:
nothing here
Received Content Block:
j3qxmk6g5sk3zw62i2yhjnwmhm55rfz47fdyfkhaithlpelfjdokdxad.onion inserted in avl
Received Content Block:
nothing here
Received Content Block:
aifanul.yachts inserted in avl
Received Content Block:
nothing here
Received Content Block:
blessed-with-luck.space inserted in avl
Received Content Block:
getclouddoc.com inserted in avl
Received Content Block:
53help.org inserted in avl
Received Content Block:
archax.privymeet.com inserted in avl
Received Content Block:
nothing here
Received Content Block:
ssl.explorecell.com inserted in avl
Received Content Block:
anonbin.ir inserted in avl
Received Content Block:
wlynch.com inserted in avl
Received Content Block:
brolink2s.site inserted in avl
Received Content Block:
brhosting.net inserted in avl
Received Content Block:
infinityfre

In [9]:
updating_avl_tree.search_result('updt.ps')

updt.ps found!


In [10]:
updating_avl_tree.build_and_save_new_tree()

Received Content Block:
Already here not inserted
Received Content Block:
nothing here
Received Content Block:
Already here not inserted
Received Content Block:
nothing here
Received Content Block:
Already here not inserted
Received Content Block:
nothing here
Received Content Block:
Already here not inserted
Received Content Block:
nothing here
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
nothing here
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
Already here not inserted
Received Content Block:
nothing here
Received Content Block:
Already here not inserted
Received Content Block:
Alre

In [None]:
print(avl_tree_blacklist.search_domain("meet-with-wyattdowling.ca"))

<__main__.AVLNode object at 0x7f51ddc71180>


In [13]:
import time

file_path = '/content/drive/MyDrive/lists/blacklist.txt'

# Define the domain to search
domain_to_search = 'meet-with-wyattdowling.ca'

def search_domain_in_file(file_path, domain):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            for line in lines:
                if domain in line:
                    return True
            return False
    except Exception as e:
        print(f"Error: {e}")
        return False

start_time = time.time()
# Search for the domain in the file
result = search_domain_in_file(file_path, domain_to_search)

# Print the result
if result:
    print(f"The domain {domain_to_search} is in the blacklist.")
else:
    print(f"The domain {domain_to_search} is not in the blacklist.")
end_time = time.time()
execution_time = end_time - start_time
print(f"Search took {execution_time:.9f} seconds")

The domain meet-with-wyattdowling.ca is in the blacklist.
Search took 2.722686291 seconds
