In [None]:
import json
import os
import numpy as np 
from azure.cosmos import CosmosClient, exceptions

In [8]:
def load_local_settings(path="local.settings.json"):
    """
    Load environment variables from local.settings.json Azure Functions style config.
    """
    try:
        with open(path, "r") as f:
            settings = json.load(f)
            values = settings.get("Values", {})
            for key, value in values.items():
                os.environ[key] = value
        print(f"Loaded settings from {path}")
    except FileNotFoundError:
        print(f"Warning: {path} not found. Make sure your config file is present.")

In [16]:
def main():
    # Load settings into env variables
    load_local_settings()

    # Fetch variables
    endpoint = os.getenv("COSMOS_ENDPOINT")
    key = os.getenv("COSMOS_KEY")

    # Hardcoded DB and container names (you can put these in your local.settings.json if you want)
    database_name = "ProcessedDB"
    container_name = "aecDataDB"

    # Basic validation
    if not all([endpoint, key, database_name, container_name]):
        print("Error: Missing one or more Cosmos DB config environment variables or hardcoded names!")
        return

    # Initialize Cosmos client
    try:
        client = CosmosClient(endpoint, key)
        database = client.get_database_client(database_name)
        container = database.get_container_client(container_name)
    except exceptions.CosmosHttpResponseError as e:
        print("Cosmos DB connection error:", e)
        return

    # Read some documents
    print("\nReading up to 5 documents from Cosmos DB container:")
    try:
        for i, item in enumerate(container.read_all_items()):
            print(f"\nDocument {i + 1}:")
            print(item)
            if i == 4:
                break
    except Exception as e:
        print("Error reading documents:", e)

if __name__ == "__main__":
    main()

Loaded settings from local.settings.json

Reading up to 5 documents from Cosmos DB container:

Document 1:
{'id': 'digitaltrends__1_pdf_0', 'category': 'mydataset1', 'filename': 'digitaltrends__1.pdf', 'text': "2025 REPORTDIGITAL TECHNOLOGY TRENDS IN THE AEC INDUSTRYVVECTORWORKS A NEMETSCHEK COMPANYFOREWORDBY DR. BIPLAB SARKAR, CEO, VECTORWORKSI believe it's our job to create tools that enhance workflows rather than limit or redefine them.The architecture, engineering, and construction (AEC) industry has been changing for a long time, in part due to technological advancement and in part because of the workflow pressure provided by increasing demands on public infrastructure. In tandem, these forces create an opportunity for AEC professionals to adjust their processes to deliver intelligent, climate-sensitive work.But I'll be the first to admit that changing established workflows is uncomfortable, especially because our livelihood is tied to the projects we deliver. However, as you'll r

In [20]:
keywords = [
    "BIM", "sustainable design", "AR", "VR", "climate-sensitive", 
    "digital technology", "workflow", "infrastructure", "vectorworks", 
    "AEC industry", "building information modeling", "tender", "contract"
]


In [21]:
query = """
SELECT c.id, c.filename, c.text
FROM c
WHERE """ + " OR ".join([f'CONTAINS(c.text, "{kw}")' for kw in keywords])


In [22]:
import re

container = main()

results = list(container.query_items(
    query=query,
    enable_cross_partition_query=True
))

for doc in results:
    text = doc["text"]
    print(f"Document ID: {doc['id']}")
    
    for kw in keywords:
        for match in re.finditer(re.escape(kw), text, re.IGNORECASE):
            start = match.start()
            snippet = text[max(0, start-30):start+30]
            print(f" - Found '{kw}' at pos {start}: ...{snippet}...")
    print("-" * 50)


Loaded settings from local.settings.json

Reading up to 5 documents from Cosmos DB container:

Document 1:
{'id': 'digitaltrends__1_pdf_0', 'category': 'mydataset1', 'filename': 'digitaltrends__1.pdf', 'text': "2025 REPORTDIGITAL TECHNOLOGY TRENDS IN THE AEC INDUSTRYVVECTORWORKS A NEMETSCHEK COMPANYFOREWORDBY DR. BIPLAB SARKAR, CEO, VECTORWORKSI believe it's our job to create tools that enhance workflows rather than limit or redefine them.The architecture, engineering, and construction (AEC) industry has been changing for a long time, in part due to technological advancement and in part because of the workflow pressure provided by increasing demands on public infrastructure. In tandem, these forces create an opportunity for AEC professionals to adjust their processes to deliver intelligent, climate-sensitive work.But I'll be the first to admit that changing established workflows is uncomfortable, especially because our livelihood is tied to the projects we deliver. However, as you'll r

AttributeError: 'NoneType' object has no attribute 'query_items'

In [25]:
from azure.storage.blob import BlobServiceClient
import os

connect_str = os.getenv("AzureWebJobsStorage")

# Create a BlobServiceClient using the connection string
blob_service_client = BlobServiceClient.from_connection_string(connect_str)

# Choose your container
container_name = "mydataset1"
container_client = blob_service_client.get_container_client(container_name)

# List blobs
blob_list = container_client.list_blobs()
for blob in blob_list:
    print(blob.name)


digitaltrends__1.pdf
digitaltrends__10.pdf
digitaltrends__11.pdf
digitaltrends__12.pdf
digitaltrends__13.pdf
digitaltrends__14.pdf
digitaltrends__15.pdf
digitaltrends__16.pdf
digitaltrends__2.pdf
digitaltrends__3.pdf
digitaltrends__4.pdf
digitaltrends__5.pdf
digitaltrends__6.pdf
digitaltrends__7.pdf
digitaltrends__8.pdf
digitaltrends__9.pdf
marketreport3__1.pdf
marketreport3__10.pdf
marketreport3__11.pdf
marketreport3__12.pdf
marketreport3__13.pdf
marketreport3__14.pdf
marketreport3__15.pdf
marketreport3__16.pdf
marketreport3__17.pdf
marketreport3__18.pdf
marketreport3__19.pdf
marketreport3__2.pdf
marketreport3__20.pdf
marketreport3__21.pdf
marketreport3__22.pdf
marketreport3__23.pdf
marketreport3__24.pdf
marketreport3__25.pdf
marketreport3__3.pdf
marketreport3__4.pdf
marketreport3__5.pdf
marketreport3__6.pdf
marketreport3__7.pdf
marketreport3__8.pdf
marketreport3__9.pdf
newsarticles.json
projects1__1.pdf
projects1__2.pdf
projects1__3.pdf
projects1__4.pdf
projects1__5.pdf
projects1__6.

In [None]:

import pandas as pd 

blob_name = "tenders.csv"

blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_client = blob_service_client.get_container_client(container_name)

# Download blob content as bytes
blob_client = container_client.get_blob_client(blob_name)
stream = blob_client.download_blob()
data = stream.readall()

# Load into pandas DataFrame
from io import BytesIO
df = pd.read_csv(BytesIO(data))

# Check dataframe head
print(df.head(10))


   tender_id                                       project_name  \
0  tid-20000            power plant in british columbia, canada   
1  tid-20001                        stadium in algeria, algeria   
2  tid-20002                 school in netherlands, netherlands   
3  tid-20003                         hospital in kuwait, kuwait   
4  tid-20004                       warehouse in alberta, canada   
5  tid-20005                  warehouse in singapore, singapore   
6  tid-20006                      bridge in victoria, australia   
7  tid-20007                        dam in indonesia, indonesia   
8  tid-20008  residential complex in western australia, aust...   
9  tid-20009                                dam in egypt, egypt   

           sector         region      country          subregion  value_usd_m  \
0  infrastructure  north america       canada   british columbia    893.89000   
1      healthcare         africa      algeria            algeria   1587.12000   
2       utilities  