<a href="https://colab.research.google.com/github/Sidhtang/fine-tuning-of-llms-/blob/main/graph_of_thought_impplementation_with_bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
from transformers import BertTokenizer, BertModel
import numpy as np
from typing import Dict, List, Tuple
from collections import defaultdict
import torch.nn.functional as F

class BertThoughtNode:
    def __init__(self, content: str, bert_embedding: torch.Tensor, activation: float = 0.0):
        self.content = content
        self.bert_embedding = bert_embedding  # BERT embedding vector
        self.activation = activation
        self.connections: Dict['BertThoughtNode', float] = {}

    def connect(self, other_node: 'BertThoughtNode', weight: float = None):
        """Create a weighted connection to another node based on BERT similarity"""
        if weight is None:
            # Calculate semantic similarity using cosine similarity
            weight = F.cosine_similarity(
                self.bert_embedding.unsqueeze(0),
                other_node.bert_embedding.unsqueeze(0)
            ).item()
        self.connections[other_node] = weight

    def update_activation(self, input_activation: float):
        """Update node's activation level using a smooth activation function"""
        self.activation = np.tanh(self.activation + input_activation)

class BertGraphOfThoughts:
    def __init__(self):
        # Initialize BERT model and tokenizer
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.model = BertModel.from_pretrained('bert-base-uncased')
        self.model.eval()  # Set to evaluation mode

        self.nodes: Dict[str, BertThoughtNode] = {}
        self.activation_threshold = 0.3
        self.decay_rate = 0.1

    def get_bert_embedding(self, text: str) -> torch.Tensor:
        """Get BERT embedding for a piece of text"""
        with torch.no_grad():
            inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
            outputs = self.model(**inputs)
            # Use the [CLS] token embedding as the text representation
            embedding = outputs.last_hidden_state[:, 0, :].squeeze(0)
            return embedding

    def add_node(self, content: str) -> BertThoughtNode:
        """Add a new thought node with BERT embedding"""
        if content not in self.nodes:
            embedding = self.get_bert_embedding(content)
            self.nodes[content] = BertThoughtNode(content, embedding)
        return self.nodes[content]

    def connect_thoughts(self, content1: str, content2: str, weight: float = None):
        """Create bidirectional connections between thoughts"""
        node1 = self.add_node(content1)
        node2 = self.add_node(content2)
        node1.connect(node2, weight)
        node2.connect(node1, weight)

    def find_related_thoughts(self, prompt: str, threshold: float = 0.5) -> List[str]:
        """Find thoughts related to the prompt using BERT similarity"""
        prompt_embedding = self.get_bert_embedding(prompt)
        related_thoughts = []

        for content, node in self.nodes.items():
            similarity = F.cosine_similarity(
                prompt_embedding.unsqueeze(0),
                node.bert_embedding.unsqueeze(0)
            ).item()
            if similarity > threshold:
                related_thoughts.append((content, similarity))

        return sorted(related_thoughts, key=lambda x: x[1], reverse=True)

    def process_prompt(self, prompt: str, steps: int = 5) -> List[Dict]:
        """Process a prompt through the graph of thoughts using BERT"""
        # Find initially related thoughts
        related_thoughts = self.find_related_thoughts(prompt)
        active_thoughts = []

        # Initialize activations
        for content, similarity in related_thoughts:
            node = self.nodes[content]
            node.activation = similarity
            if similarity > self.activation_threshold:
                active_thoughts.append({
                    'content': content,
                    'activation': similarity
                })

        thought_process = [active_thoughts.copy()]

        # Simulate thought propagation
        for _ in range(steps):
            new_activations = defaultdict(float)

            # Propagate activations through connections
            for node in self.nodes.values():
                if node.activation > self.activation_threshold:
                    for connected_node, weight in node.connections.items():
                        new_activations[connected_node] += node.activation * weight

            # Update activations and apply decay
            active_thoughts = []
            for node in self.nodes.values():
                if node in new_activations:
                    node.update_activation(new_activations[node])
                node.activation *= (1 - self.decay_rate)

                if node.activation > self.activation_threshold:
                    active_thoughts.append({
                        'content': node.content,
                        'activation': node.activation
                    })

            active_thoughts.sort(key=lambda x: x['activation'], reverse=True)
            thought_process.append(active_thoughts.copy())

        return thought_process

def demonstrate_bert_got():
    # Create a graph of thoughts with BERT
    got = BertGraphOfThoughts()

    # Add knowledge nodes with automatic semantic connections
    concepts = [
        "Language models process text using neural networks",
        "BERT uses bidirectional context for understanding",
        "Transformers utilize self-attention mechanisms",
        "Natural language processing involves semantic analysis",
        "Deep learning models learn hierarchical representations",
        "Text generation predicts tokens based on context",
        "Neural networks process information in layers",
        "Machine learning models require training data",
        "Context understanding is crucial for language models",
        "Attention mechanisms focus on relevant information"
    ]

    # Create connections between all concepts
    for i, concept1 in enumerate(concepts):
        for concept2 in concepts[i+1:]:
            got.connect_thoughts(concept1, concept2)

    # Process a prompt
    prompt = "How do transformers understand context in language?"
    print(f"Processing prompt: {prompt}\n")

    thought_process = got.process_prompt(prompt)

    # Display the thought propagation
    for step_num, thoughts in enumerate(thought_process):
        print(f"Step {step_num}:")
        for thought in thoughts:
            print(f"  - {thought['content']} (activation: {thought['activation']:.3f})")
        print()

if __name__ == "__main__":
    demonstrate_bert_got()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Processing prompt: How do transformers understand context in language?

Step 0:
  - Context understanding is crucial for language models (activation: 0.924)
  - Machine learning models require training data (activation: 0.906)
  - Deep learning models learn hierarchical representations (activation: 0.904)
  - Language models process text using neural networks (activation: 0.903)
  - Attention mechanisms focus on relevant information (activation: 0.903)
  - Transformers utilize self-attention mechanisms (activation: 0.901)
  - BERT uses bidirectional context for understanding (activation: 0.900)
  - Neural networks process information in layers (activation: 0.894)
  - Natural language processing involves semantic analysis (activation: 0.886)
  - Text generation predicts tokens based on context (activation: 0.798)

Step 1:
  - Deep learning models learn hierarchical representations (activation: 0.900)
  - Machine learning models require training data (activation: 0.900)
  - Language mode