In [1]:
import weaviate
import os
import requests
import json

from markdownify import markdownify as md
from dotenv import load_dotenv

load_dotenv()
# Weaviate will only accept OPENAI_APIKEY as an env variable so we need to do that:
os.environ["OPENAI_APIKEY"] = os.getenv("OPENAI_API_KEY")


In [2]:
client = weaviate.connect_to_local()

print(client.is_ready())  # Should print: `True`

client.close()

True


In [26]:
# Get endpoints for a certain tag
#tag = "Metric"

obp_base_url = "https://apisandbox.openbankproject.com"
print(obp_base_url)
obp_version = "v5.1.0"

#swagger_url = "{}/obp/v5.1.0/resource-docs/{}/swagger?tags={}".format(os.getenv("OBP_BASE_URL"), obp_version, tag)
swagger_url = "{}/obp/v5.1.0/resource-docs/{}/swagger?content=static".format(obp_base_url, obp_version)

# check validator is working

import logging
from swagger_spec_validator import validate_spec_url

#logging.basicConfig(level=logging.DEBUG)
print("Validating Swagger Spec")
try:
    validate_spec_url(swagger_url)
except Exception as e:
    print(f"Could not validate swagger spec: {e}")
    
swagger_response = requests.get(swagger_url)

if swagger_response.status_code >= 300:
    raise Exception(f"Swagger endpoint malfunction: {swagger_response.json()}")
swagger_json = swagger_response.json()

# Fix three very broken refs
#swagger_json['definitions']['ResourceDocJson']['properties']['success_response_body']['$ref'] = '#/definitions/EmptyClassJson'
#swagger_json['definitions']['ResourceDocJson']['properties']['example_request_body']['$ref'] = '#/definitions/EmptyClassJson'
#swagger_json['definitions']['ConsentJsonV500']['properties']['account_access']['$ref'] = '#/definitions/EmptyClassJson'
#swagger_json['definitions']['ConsentJWT']['properties']['entitlements']['items']['$ref'] = '#/definitions/EntitlementsJsonV400'

# Fix duplicate operationID
#swagger_json['paths']['/obp/v5.1.0/dynamic-registration/consumers']['post']['operationId'] = 'dynamicRegistrationCreateConsumer'

# Fix duplicate tags
#swagger_json['paths']['/obp/v5.1.0/banks']['get']['tags'] = list(set(swagger_json['paths']['/obp/v5.1.0/banks']['get']['tags']))

print(len(swagger_json['paths']))

#### IMPORTANT: UNTIL THE SWAGGER SPEC CHECKING ON OBP SIDE IS FIXED, DO NOT UNCOMMENT THIS
# We will do development with a static i.e. not dynamically updated spec until this is the case

#with open('swagger_spec.json', 'w') as f:
#    json.dump(swagger_json, f, indent =3)

https://apisandbox.openbankproject.com
Validating Swagger Spec
Could not validate swagger spec: ('The read operation timed out', TimeoutError('The read operation timed out'))


Exception: Swagger endpoint malfunction: {'code': 400, 'message': 'OBP-50000: Unknown Error. Can not convert internal swagger file. <- JArray type should not be empty. <- java.lang.RuntimeException: JArray type should not be empty.'}

In [3]:
from prance import ResolvingParser

def recursion_handler(recursion_limit, parsed_url, refs):
    print(recursion_limit, parsed_url, refs)
    return {'$ref': '#'+parsed_url.fragment}

parser = ResolvingParser('swagger_spec.json', backend ="swagger-spec-validator", recursion_limit_handler=recursion_handler)

  from jsonschema import RefResolver
  from jsonschema.validators import RefResolver
  from jsonschema.validators import RefResolver


In [4]:
resolved_spec_dict = parser.specification

In [5]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

# Define an endpoint to summarize the descriptions
prompt = ChatPromptTemplate.from_template(
    "Summarise the following OpenAPI endpoint description (written in html) in plain text with less that 1024 characters:\n\n{raw_description}"
)
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

endpoint_summary_chain = prompt | llm

  return cast(Any, type_)._evaluate(globalns, localns, recursive_guard=set())
  return cast(Any, type_)._evaluate(globalns, localns, recursive_guard=set())


In [6]:
from http.client import responses

def response_code_to_string(code: int) -> str:
    """
    Takes a HTTP response status code i.e. 200 and prepends its descriptor to it i.e.
    200 becomes OK_200
    404 becomes NotAuthorized_404
    """
    descriptor = responses[code]

    # Turn the descriptors into camelCase
    words = descriptor.split()
    if len(words) > 1:
        camel_case_descriptor = words[0].lower() + ''.join(w.title() for w in words[1:])
    else:
        camel_case_descriptor = words[0].lower()
    
    return f"{camel_case_descriptor}_{code}"

test_codes = [100, 200, 301, 202, 404, 500]
for code in test_codes:
    result = response_code_to_string(code)
    print(f"{code} -> {result}")


100 -> continue_100
200 -> ok_200
301 -> movedPermanently_301
202 -> accepted_202
404 -> notFound_404
500 -> internalServerError_500


In [7]:
# Count the total number of endpoints, this is for the progress bar
total_endpoints_count = 0
for path, endpoint in resolved_spec_dict["paths"].items():
    for method, properties in endpoint.items():
        total_endpoints_count +=1
        
print(total_endpoints_count)

538


In [24]:
from pydantic import BaseModel, Field

class Endpoint(BaseModel):
    
    path: str = Field(
        description="Path of endpoint on OBP. I.e. /obp/v5.1.0/banks",
    )
    
    method: str = Field(
        description="HTTP method keyword for this endpoint i.e. GET or POST",
    )
    
    tags: list = Field(
        description="Endpoint tags, giving more information about the endpoint and for use in categorization and filtering. Multiple tags allowed but should not have duplicate tags.",
    )
    
    operationId: str = Field(
        description="Unique endpoint ID for this endpoint, can be used to create UUIDs/hashes",
    )
    
    summary: str = Field(
        description="Short summary of what the endpoint does."
    )

    description: str = Field(
        description="Full description in markdown of what the endpoint does."
    )
    
    parameters: str = Field(
        description="Json Schema for the endpoint input parameters."
    )

    responses: str = Field(
        description="Json Schema for the possible responses from OBP for this endpoint"
    )

In [47]:
from weaviate.classes.config import Configure

client = weaviate.connect_to_local(
    headers=headers
)

if not client.collections.get('Test'):

    client.collections.create(
        "Test",
        vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    )

client.close()

In [49]:
from weaviate.util import generate_uuid5

headers = {
    "X-Openai-Api-Key": os.getenv("OPENAI_APIKEY")
}
client = weaviate.connect_to_local(
    headers=headers
)
test_collection = client.collections.get('Test')

documents = []

for path, endpoint in resolved_spec_dict["paths"].items():
    for method, properties in endpoint.items():
        
        
        # Skip dynamic resources (this will not be needed when the swagger endpoint is updated to have ?static=false)
        if 'Dynamic-Entity' in properties["tags"]:
            continue
            
        else:
            
            # Summarize the description of the endpoint in markdown
            #summary_chain_response = endpoint_summary_chain.invoke({"raw_description": properties['description']})
            #props['description'] = summary_chain_response.content
            
            endpoint_object = Endpoint(
                path=path,
                method=method,
                tags=properties["tags"],
                operationId=properties["operationId"],
                summary=properties["summary"],
                description=md(props['description']), # Change description from HTML to markdown format
                parameters=str(json.dumps(props['parameters'])),
                responses=str(json.dumps(props['responses']))
            )
            
            # Generate deterministic UUID from OperationID
            endpoint_uuid = generate_uuid5(props['operationId'])

            #Weaviate does not like straight response codes as object keys i.e. 200 or 404 so we need to change that
            #responses_with_new_keys = {}
            #for response_code, response_schema in endpoint_object['responses'].copy().items():
            #   
            #    stringified_response_code = response_code_to_string(int(response_code)) 
            #    
            #    responses_with_new_keys[stringified_response_code] = response_schema

            # Replace all keys
            #endpoint_object['responses'] = responses_with_new_keys
            
            
            documents.append(endpoint_object)

            if test_collection.data.exists(endpoint_uuid):
                print(f"UPDATE   {method.upper()} {path}")
                test_collection.data.update(
                    properties = endpoint_object.model_dump(),
                    uuid=endpoint_uuid,
                )
            
            else:
                print(f"ADD    {method.upper()} {path}")
                test_collection.data.insert(
                    properties = endpoint_object.model_dump(),
                    uuid=endpoint_uuid
                )

client.close()

UPDATE   POST /obp/v5.0.0/account/check/scheme/iban
UPDATE   GET /obp/v5.0.0/accounts/public
UPDATE   GET /obp/v5.0.0/adapter
UPDATE   GET /obp/v5.0.0/api-collections/API_COLLECTION_ID/api-collection-endpoints
UPDATE   GET /obp/v5.0.0/api-collections/featured
UPDATE   GET /obp/v5.0.0/api-collections/sharable/API_COLLECTION_ID
UPDATE   GET /obp/v5.0.0/api/glossary
UPDATE   GET /obp/v5.0.0/api/versions
UPDATE   GET /obp/v5.0.0/banks
UPDATE   POST /obp/v5.0.0/banks
UPDATE   PUT /obp/v5.0.0/banks
UPDATE   GET /obp/v5.0.0/banks/{BANK_ID}
UPDATE   DELETE /obp/v5.0.0/banks/{BANK_ID}/{CUSTOMER_ID}/attributes/CUSTOMER_ATTRIBUTE_ID
UPDATE   GET /obp/v5.0.0/banks/{BANK_ID}/account-applications
UPDATE   POST /obp/v5.0.0/banks/{BANK_ID}/account-applications
UPDATE   GET /obp/v5.0.0/banks/{BANK_ID}/account-applications/{ACCOUNT_APPLICATION_ID}
UPDATE   PUT /obp/v5.0.0/banks/{BANK_ID}/account-applications/{ACCOUNT_APPLICATION_ID}
UPDATE   POST /obp/v5.0.0/banks/{BANK_ID}/account-web-hooks
UPDATE   PU

In [56]:
len(documents)

client.connect()
test_collection = client.collections.get('test')

with test_collection.batch.dynamic() as batch:
    for endpoint in documents:
        obj_uuid = generate_uuid5(endpoint.operationId)
        print(f"ADD    {endpoint.method.upper()} {endpoint.path}  Hash: {obj_uuid}")
        batch.add_object(
            properties=endpoint.model_dump(),
            uuid=obj_uuid
        )

client.close()

ADD    POST /obp/v5.0.0/account/check/scheme/iban  Hash: 8790c274-49b9-5505-9b1b-56eaa40bce78
ADD    GET /obp/v5.0.0/accounts/public  Hash: 39854fce-0273-53e3-8469-b5728ffbf575
ADD    GET /obp/v5.0.0/adapter  Hash: 3436a993-6d51-5249-9775-d772cb0547dc
ADD    GET /obp/v5.0.0/api-collections/API_COLLECTION_ID/api-collection-endpoints  Hash: ab6e05e5-0f9c-5d2e-b61c-f7e832c9a7f7
ADD    GET /obp/v5.0.0/api-collections/featured  Hash: c35c7588-d219-58fb-947d-e5d44dda9f64
ADD    GET /obp/v5.0.0/api-collections/sharable/API_COLLECTION_ID  Hash: b1b2eecb-003d-54ed-865c-85eaab18cb2c
ADD    GET /obp/v5.0.0/api/glossary  Hash: 893bb50e-0edb-59ed-8c8e-3a45058719fb
ADD    GET /obp/v5.0.0/api/versions  Hash: c28a568b-1e9d-5c4c-bcf9-f7a2b84e7f39
ADD    GET /obp/v5.0.0/banks  Hash: 12439603-2fc0-5783-b005-37eb6a0487c1
ADD    POST /obp/v5.0.0/banks  Hash: ed952b2a-20fd-5af8-b6b4-88247d2e216b
ADD    PUT /obp/v5.0.0/banks  Hash: e23472fc-05b7-5e81-9f48-8371cc75aece
ADD    GET /obp/v5.0.0/banks/{BANK_ID}  

In [57]:
import pprint

client.connect()
test_collection = client.collections.get('test')

config = test_collection.config.get()

pprint.pp(config)

client.close()

_CollectionConfig(name='Test',
                  description=None,
                  generative_config=None,
                  inverted_index_config=_InvertedIndexConfig(bm25=_BM25Config(b=0.75,
                                                                              k1=1.2),
                                                             cleanup_interval_seconds=60,
                                                             index_null_state=False,
                                                             index_property_length=False,
                                                             index_timestamps=False,
                                                             stopwords=_StopwordsConfig(preset=<StopwordsPreset.EN: 'en'>,
                                                                                        additions=None,
                                                                                        removals=None)),
                  multi_tenancy_conf

In [66]:
client.connect()
test_collection = client.collections.get('Test')

#response = test_collection.query.near_text(query="How do I create a consent", limit=5, return_properties=["method", "path"])
response = test_collection.query.fetch_objects()

print(len(response.objects))
for o in response.objects:
    print(o.properties)

client.close()

5
{'path': '/obp/v5.0.0/consumer/consent-requests/CONSENT_REQUEST_ID/EMAIL/consents', 'method': 'post'}
{'path': '/obp/v5.0.0/consumer/consent-requests/CONSENT_REQUEST_ID/IMPLICIT/consents', 'method': 'post'}
{'path': '/obp/v5.0.0/banks/{BANK_ID}/my/consents/EMAIL', 'method': 'post'}
{'path': '/obp/v5.0.0/banks/{BANK_ID}/my/consents/IMPLICIT', 'method': 'post'}
{'path': '/obp/v5.0.0/consumer/consent-requests/CONSENT_REQUEST_ID/SMS/consents', 'method': 'post'}


NameError: name 'all_docs' is not defined