In [50]:
import google.generativeai as genai
import urllib.parse
import json
import os
import requests
import base64
from dotenv import load_dotenv


In [51]:


# Load environment variables from a .env file
load_dotenv()

CLIENT_ID = os.getenv("BOLAGSVERKET_CLIENT_ID", "YOUR_CLIENT_ID")
CLIENT_SECRET = os.getenv("BOLAGSVERKET_CLIENT_SECRET", "YOUR_CLIENT_SECRET")

# Endpoints documented by Bolagsverket
TOKEN_URL = "https://portal.api.bolagsverket.se/oauth2/token"
# Base URL found in documentation for V√§rdefulla datam√§ngder
API_BASE_URL = "https://gw.api.bolagsverket.se/vardefulla-datamangder/v1"

def get_access_token():
    """Authenticates with Bolagsverket and returns an access token."""
    
    # Encode client_id:client_secret in base64 for Basic Auth header
    creds = f"{CLIENT_ID}:{CLIENT_SECRET}"
    creds_b64 = base64.b64encode(creds.encode("utf-8")).decode("utf-8")

    headers = {
        "Authorization": f"Basic {creds_b64}",
        "Content-Type": "application/x-www-form-urlencoded"
    }
    
    data = {
        "grant_type": "client_credentials",
        # needed for read / ping access 
        "scope": "vardefulla-datamangder:read vardefulla-datamangder:ping" 
    }

    try:
        response = requests.post(TOKEN_URL, headers=headers, data=data)
        response.raise_for_status()
        token_data = response.json()
        return token_data["access_token"]
    except requests.exceptions.RequestException as e:
        print(f"‚ùå Error fetching token: {e}")
        if response.content:
            print(f"Details: {response.content}")
        return None


In [52]:
def check_api_status(token):
    """Checks if the API is reachable using the /isalive endpoint."""
    
    # Endpoint: /isalive (Standard health check)
    url = f"{API_BASE_URL}/isalive"
    
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/json"
    }

    try:
        print(f"üì° Pinging: {url}")
        # The /isalive endpoint is a GET request and needs no payload
        response = requests.get(url, headers=headers)
        
        print(f"üì° Status Code: {response.status_code}")
        
        # Bolagsverket usually returns simple text or empty 200 OK for isalive
        if response.status_code == 200:
            return "‚úÖ API is ALIVE and working!"
        else:
            return f"‚ùå Unexpected status: {response.text}"
            
    except requests.exceptions.RequestException as e:
        print(f"‚ùå Error connecting to API: {e}")
        return None

# --- MAIN EXECUTION ---
if __name__ == "__main__":
    print("1Ô∏è‚É£  Authenticating...")
    token = get_access_token()

    if token:
        print("‚úÖ Access Token received!")
        
        print("2Ô∏è‚É£  Checking API Health...")
        status = check_api_status(token)
        print(status)
    else:
        print("üõë Could not proceed without token.")

1Ô∏è‚É£  Authenticating...
‚úÖ Access Token received!
2Ô∏è‚É£  Checking API Health...
üì° Pinging: https://gw.api.bolagsverket.se/vardefulla-datamangder/v1/isalive
üì° Status Code: 200
‚úÖ API is ALIVE and working!


In [None]:

def search_company(org_number, token):
    """
    Fetches company information using the provided organization number and access token.
    Args:
        org_number (str or int): The 10-digit tax number of the organization. If the input 
            is not 10 digits, it will be zero-padded to ensure the correct format.
        token (str): The access token required for authentication.
    Returns:
        dict: A dictionary containing the company information retrieved from the API, 
            if the request is successful.
        None: If the request fails, returns None and logs the error details.
    Notes:
        - The organization number should be provided without any dashes (e.g., "5560160680").
        - Ensure that the `API_BASE_URL` variable is defined and points to the correct API endpoint.
        - The function prints detailed error messages to help debug issues with the API response.
    """
    
    url = f"{API_BASE_URL}/organisationer"
    
    # ---------------------------------------------------------
    # Use tax id of comapny -> without - 
    # ---------------------------------------------------------
    payload = {
        "identitetsbeteckning": org_number
    }

    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
        "Accept": "application/json"
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"‚ùå Error fetching data: {e}")
        # Print detailed error to debug schema issues
        print(f"Response: {response.text}")
        return None
    

1Ô∏è‚É£  Authenticating...
‚úÖ Access Token received!
2Ô∏è‚É£  Searching for company: 9697802230...
‚úÖ Data received:
{'organisationer': [{'avregistreradOrganisation': {'avregistreringsdatum': '2018-01-31', 'dataproducent': 'Bolagsverket', 'fel': None}, 'avregistreringsorsak': {'dataproducent': 'Bolagsverket', 'fel': None, 'klartext': 'Anm√§lan om att verksamheten har upph√∂rt', 'kod': 'VERKUPP'}, 'juridiskForm': {'dataproducent': 'SCB', 'fel': {'felBeskrivning': 'Den efterfr√•gade informationen gick inte att hitta.', 'typ': 'ORGANISATION_FINNS_EJ'}, 'klartext': None, 'kod': None}, 'namnskyddslopnummer': None, 'naringsgrenOrganisation': {'dataproducent': 'SCB', 'fel': {'felBeskrivning': 'Den efterfr√•gade informationen gick inte att hitta.', 'typ': 'ORGANISATION_FINNS_EJ'}, 'sni': []}, 'organisationsdatum': {'dataproducent': 'Bolagsverket', 'fel': None, 'infortHosScb': None, 'registreringsdatum': '2016-07-07'}, 'organisationsform': {'dataproducent': 'Bolagsverket', 'fel': None, 'klart

In [None]:
def check_api_status(token):
    """Checks if the API is reachable using the /isalive endpoint."""
    
    # Endpoint: /isalive (Standard health check)
    url = f"{API_BASE_URL}/isalive"
    
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/json"
    }

    try:
        print(f"üì° Pinging: {url}")
        # The /isalive endpoint is a GET request and needs no payload
        response = requests.get(url, headers=headers)
        
        print(f"üì° Status Code: {response.status_code}")
        
        # Bolagsverket usually returns simple text or empty 200 OK for isalive
        if response.status_code == 200:
            return "‚úÖ API is ALIVE and working!"
        else:
            return f"‚ùå Unexpected status: {response.text}"
            
    except requests.exceptions.RequestException as e:
        print(f"‚ùå Error connecting to API: {e}")
        return None


1Ô∏è‚É£  Authenticating...
‚úÖ Access Token received!
2Ô∏è‚É£  Checking API Health...
üì° Pinging: https://gw.api.bolagsverket.se/vardefulla-datamangder/v1/isalive
üì° Status Code: 200
‚úÖ API is ALIVE and working!


In [None]:
# --- TEST EXECUTION ---
if __name__ == "__main__":
    
    print("1Ô∏è‚É£  Authenticating...")
    token = get_access_token()

    if token:
        print("‚úÖ Access Token received!")
        
        # Example: Search for Bolagsverket's own org number (202100-5489)
        # Remove hyphen for the API: 2021005489
        test_org_number = 9697802230
        
        print(f"2Ô∏è‚É£  Searching for company: {test_org_number}...")
        data = search_company(test_org_number, token)
        
        if data:
            print("‚úÖ Data received:")
            print(data)
    else:
        print("üõë Could not proceed without token.")

In [55]:
def generate_bolagsverket_search_terms(user_input):
    """
    Uses Gemini to convert a broad description into specific Bolagsverket search terms.
    Similar to the logic in 'src/agents/search_agent.py'.
    """
    
    # Prompt engineering: We ask for Swedish business terms and specific formatting
    prompt = f"""
    You are an AI assistant specialized in searching the Swedish Companies Registration Office (Bolagsverket).
    
    User Input: "{user_input}"
    
    Task:
    1. Translate the core concept to Swedish if it is in English.
    2. Identify specific business codes (SNI) or legal forms (Aktiebolag, Handelsbolag) if relevant.
    3. Generate 3 distinct search terms optimized for a database search.
    
    Return the result as a raw JSON list of strings. Example: ["Svenska AgriTech AB", "Jordbruksteknik", "H√•llbar odling"]
    """
    
    try:
        response = model.generate_content(prompt)
        # Clean up response to ensure it's valid JSON
        cleaned_text = response.text.replace('```json', '').replace('```', '').strip()
        search_terms = json.loads(cleaned_text)
        return search_terms
    except Exception as e:
        print(f"Error generating terms: {e}")
        return [user_input] # Fallback to original input

def search_bolagsverket(terms):
    """
    Simulates the search using the URL pattern found in 'tavily_config.json'.
    """
    base_url = "https://foretagsinfo.bolagsverket.se/sok-foretagsinformation-web/foretag"
    
    print(f"\n--- Searching Bolagsverket for: {terms} ---\n")
    
    for term in terms:
        # URL encode the search term (e.g., spaces become %20)
        encoded_term = urllib.parse.quote(term)
        
        # Construct the URL based on the pattern in the reference files
        search_url = f"{base_url}?sokord={encoded_term}"
        
        print(f"Term: '{term}'")
        print(f"üîó Link: {search_url}")
        print("-" * 30)

# --- MAIN EXECUTION ---
if __name__ == "__main__":
    # Example User Input (what you might type in the dashboard)
    user_query = "Find me sustainable agriculture tech companies in Stockholm"
    
    print(f"User Query: {user_query}")
    print("Asking Gemini for optimal search terms...")
    
    # 1. Get smart terms from Gemini
    smart_terms = generate_bolagsverket_search_terms(user_query)
    
    # 2. 'Call' the Bolagsverket search
    search_bolagsverket(smart_terms)

User Query: Find me sustainable agriculture tech companies in Stockholm
Asking Gemini for optimal search terms...
Error generating terms: name 'model' is not defined

--- Searching Bolagsverket for: ['Find me sustainable agriculture tech companies in Stockholm'] ---

Term: 'Find me sustainable agriculture tech companies in Stockholm'
üîó Link: https://foretagsinfo.bolagsverket.se/sok-foretagsinformation-web/foretag?sokord=Find%20me%20sustainable%20agriculture%20tech%20companies%20in%20Stockholm
------------------------------
