In [None]:
import requests
from bs4 import BeautifulSoup
import json

# Send a GET request to the Wikipedia page
url = 'https://en.wikipedia.org/wiki/List_of_intelligence_agencies'
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Create a dictionary to store the intelligence agencies by country
agencies_by_country = {}

# Iterate over the H2 headers containing the country names
for h2 in soup.find_all('h2'):
    span = h2.find('span', {'class': 'mw-headline'})
    if span:
        country = span.text.strip()
        if country == "See also":
            break
        agencies = []
        for next_element in h2.next_siblings:
            if next_element.name == 'h2':
                break
            if next_element.name == 'h3':
                continue
            if next_element.name == 'ul':
                for li in next_element.find_all('li'):
                    agency = li.find('a')
                    if agency:
                        agencies.append({'value': agency.text.strip()})
                    else:
                        agencies.append({'value': li.text.strip()})
        agencies_by_country[country] = {'description': f"Intelligence agencies in {country}", 'Galaxy': {'Threat-Actor': {'threat-actor': agencies}}, 'meta': {'source': url}}

# Create the MISP taxonomy
taxonomy = {
    "name": "Intelligence Agencies",
    "description": "Galaxy of intelligence agencies by country",
    "type": "taxonomy",
    "version": "1.1",
    "authors": ["Timon ECKERT - Nina KERN"],
    "values": []
}

# Iterate over the countries and agencies to populate the taxonomy
for country, agencies in agencies_by_country.items():
    taxonomy_value = {
        "value": country,
        "description": agencies['description'],
        "meta": agencies['meta'],
        "Galaxy": agencies['Galaxy']
    }
    taxonomy['values'].append(taxonomy_value)

# Write the taxonomy to a JSON file
with open('intelligence_agencies.json', 'w', encoding='utf-8') as f:
    json.dump(taxonomy, f, ensure_ascii=False, indent=4)

print('Taxonomy created successfully!')
