In [2]:
pip install haystack-ai

Collecting haystack-ai
  Downloading haystack_ai-2.12.1-py3-none-any.whl.metadata (14 kB)
Collecting haystack-experimental (from haystack-ai)
  Downloading haystack_experimental-0.8.0-py3-none-any.whl.metadata (12 kB)
Collecting lazy-imports (from haystack-ai)
  Downloading lazy_imports-0.4.0-py3-none-any.whl.metadata (10 kB)
Collecting posthog!=3.12.0 (from haystack-ai)
  Downloading posthog-3.24.1-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting monotonic>=1.5 (from posthog!=3.12.0->haystack-ai)
  Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting backoff>=1.10.0 (from posthog!=3.12.0->haystack-ai)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Downloading haystack_ai-2.12.1-py3-none-any.whl (482 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m483.0/483.0 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading posthog-3.24.1-py2.py3-none-

In [4]:
pip install keyBERT

Collecting keyBERT
  Downloading keybert-0.9.0-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=0.3.8->keyBERT)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=0.3.8->keyBERT)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=0.3.8->keyBERT)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers>=0.3.8->keyBERT)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers>=0.3.8->keyBERT)
  Downloading nvi

In [25]:
from haystack.tools import Tool

class AutonomousTechExtractorAgent:
    def __init__(self):
        try:
            import spacy
            from keybert import KeyBERT
            from collections import defaultdict
            import os

            os.environ["TORCH_COMPILE_DEBUG"] = "1"
            os.environ["TORCH_COMPILE"] = "0"

            self.spacy_nlp = spacy.load("en_core_web_sm")
            self.kw_model = KeyBERT(model='paraphrase-MiniLM-L6-v2')

            self.tech_categories = {
                "Operations": ["ai", "data analytics", "predictive", "optimization", "forecasting", "automation", "bot"],
                "Customer Experience": ["membership", "personalized", "behavior analytics", "recommendation"],
                "Supply Chain": ["inventory", "tracking", "logistics", "delivery"],
                "Sustainability": ["eco", "green", "environment", "carbon", "sustainability"]
            }

        except Exception as e:
            print(f"Error loading libraries: {e}")

    def clean_text(self, text):
        doc = self.spacy_nlp(text)
        return " ".join([sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 20])

    def extract_keywords(self, text, top_n=15):
        return self.kw_model.extract_keywords(
            text,
            keyphrase_ngram_range=(1, 3),
            stop_words="english",
            use_maxsum=True,
            top_n=top_n
        )

    def categorize_keywords(self, keywords):
        categorized = defaultdict(list)
        tech_terms = []

        for phrase, _ in keywords:
            phrase_lower = phrase.lower()
            tech_terms.append(phrase)
            for category, keywords in self.tech_categories.items():
                if any(term in phrase_lower for term in keywords):
                    categorized[category].append(phrase)
                    break
        return categorized, tech_terms

    def find_relationships(self, text, tech_terms):
        relationships = []
        for sent in text.split('.'):
            matched = [term for term in tech_terms if term.lower() in sent.lower()]
            if len(matched) >= 2:
                relationships.append({
                    "technologies": matched,
                    "context": sent.strip()
                })
        return relationships

    def summarize(self, categorized):
        total = sum(len(v) for v in categorized.values())
        return {
            "Total Technologies": total,
            "Categories Covered": f"{len(categorized)}/{len(self.tech_categories)}"
        }

    def run(self, text: str) -> dict:
        cleaned = self.clean_text(text)
        keywords = self.extract_keywords(cleaned)
        categorized, tech_terms = self.categorize_keywords(keywords)
        relationships = self.find_relationships(cleaned, tech_terms)
        summary = self.summarize(categorized)

        return {
            "Technologies by Category": dict(categorized),
            "Technology Relationships": relationships,
            "Summary Metrics": summary
        }




In [26]:

# Creating the tool using the agent
tech_extractor = AutonomousTechExtractorAgent()

tech_extractor_tool = Tool(
    name="TechExtractorTool",
    description="Extracts and categorizes technology and novel terms from a document.",
    function=lambda **inputs: tech_extractor.run(inputs["text"]),
    parameters={
        "type": "object",
        "properties": {
            "text": {"type": "string"}
        },
        "required": ["text"]
    }
)

In [28]:
# Step 1: user input
input_text = input("Enter a paragraph to extract technologies from: ")

# Step 2: Invoke the tool with the input text
result = tech_extractor_tool.invoke(text=input_text)

# Step 3: Display the result in a structured format
print("\nüîç Technology Extraction Result:\n")

# Extracting and printing the result in a structured format
print(" Technologies by Category:")
for category, technologies in result["Technologies by Category"].items():
    print(f"  - {category}:")
    for tech in technologies:
        print(f"    * {tech}")

print("\n Technology Relationships:")
if result["Technology Relationships"]:
    for relationship in result["Technology Relationships"]:
        print(f"  - Context: {relationship['context']}")
        print(f"    Technologies: {', '.join(relationship['technologies'])}\n")
else:
    print("  No relationships found.")

print("\n Summary Metrics:")
summary = result["Summary Metrics"]
print(f"  Total Technologies: {summary['Total Technologies']}")
print(f"  Categories Covered: {summary['Categories Covered']}")


Enter a paragraph to extract technologies from: Blockchain and AI are transforming inventory management and customer analytics across logistics.

üîç Technology Extraction Result:

 Technologies by Category:
  - Operations:
    * ai
    * ai transforming
    * blockchain
    * blockchain ai
    * blockchain ai transforming
  - Supply Chain:
    * logistics
    * inventory
    * customer analytics logistics
    * transforming inventory
    * inventory management

 Technology Relationships:
  - Context: Blockchain and AI are transforming inventory management and customer analytics across logistics
    Technologies: customer, ai, management, analytics, customer analytics, logistics, inventory, blockchain, transforming inventory, inventory management


 Summary Metrics:
  Total Technologies: 10
  Categories Covered: 2/4
