In [1]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain_community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 k

In [3]:
import torch
import time
import json
import re
import logging
from typing import List, Dict, Any, Tuple, Optional
from dataclasses import dataclass
from pathlib import Path
import gc

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f" Using device: {device}")
if device == "cuda":
    print(f" GPU: {torch.cuda.get_device_name()}")
    print(f" GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

@dataclass
class Document:
    """Represents a source document."""
    id: str
    title: str
    content: str
    metadata: Optional[Dict[str, Any]] = None


@dataclass
class DocumentChunk:
    """Represents a chunk of text from a document."""
    id: str
    text: str
    source_doc_id: str
    chunk_index: int
    metadata: Dict[str, Any]


@dataclass
class Entity:
    """Represents an extracted entity."""
    name: str
    type: str
    description: str
    source_chunks: List[str]
    confidence: float = 1.0


@dataclass
class Relationship:
    """Represents a relationship between entities."""
    source_entity: str
    target_entity: str
    relationship_type: str
    description: str
    source_chunks: List[str]
    confidence: float = 1.0


# ============================================================================
# DOCUMENT PROCESSING
# ============================================================================

class DocumentProcessor:
    """Simplified document processor for Colab."""

    def __init__(self, chunk_size: int = 600, chunk_overlap: int = 100):
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        logger.info(f" Document processor initialized (chunk_size={chunk_size})")

    def chunk_document(self, document: Document) -> List[DocumentChunk]:
        """Split a document into chunks."""
        if not document.content.strip():
            return []

        text = document.content
        chunks = []

        # Simple chunking by character count
        start = 0
        chunk_index = 0

        while start < len(text):
            end = start + self.chunk_size

            # Try to break at sentence boundary
            if end < len(text):
                last_period = text.rfind('.', start, end)
                last_newline = text.rfind('\n', start, end)
                break_point = max(last_period, last_newline)

                if break_point > start:
                    end = break_point + 1

            chunk_text = text[start:end].strip()

            if chunk_text:
                chunk = DocumentChunk(
                    id=f"{document.id}_chunk_{chunk_index}",
                    text=chunk_text,
                    source_doc_id=document.id,
                    chunk_index=chunk_index,
                    metadata={'source_title': document.title}
                )
                chunks.append(chunk)
                chunk_index += 1

            # Move start position with overlap
            start = end - self.chunk_overlap
            if start <= 0:
                start = end

        logger.info(f" Created {len(chunks)} chunks from document '{document.title}'")
        return chunks


# ============================================================================
# MODEL MANAGEMENT
# ============================================================================

class ModelManager:
    """Simplified model manager for Colab."""

    def __init__(self, model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"):
        self.model_name = model_name
        self.device = device
        self.model = None
        self.tokenizer = None

        logger.info(f" Model manager initialized with {model_name}")

    def load_model(self) -> bool:
        """Load the LLM model."""
        try:
            from transformers import AutoTokenizer, AutoModelForCausalLM

            logger.info(f" Loading model: {self.model_name}")
            start_time = time.time()

            # Load tokenizer
            self.tokenizer = AutoTokenizer.from_pretrained(
                self.model_name,
                trust_remote_code=True
            )

            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token

            # Load model
            model_kwargs = {
                'trust_remote_code': True,
                'torch_dtype': torch.float16 if self.device == "cuda" else torch.float32,
                'device_map': "auto" if self.device == "cuda" else None,
                'low_cpu_mem_usage': True
            }

            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                **model_kwargs
            )

            load_time = time.time() - start_time
            logger.info(f" Model loaded in {load_time:.1f}s")

            if self.device == "cuda":
                memory_used = torch.cuda.memory_allocated() / 1e9
                logger.info(f" GPU Memory used: {memory_used:.2f} GB")

            return True

        except Exception as e:
            logger.error(f"❌ Error loading model: {e}")
            return False

    def generate(self, prompt: str, max_tokens: int = 300, temperature: float = 0.1) -> str:
        """Generate text using the model."""
        if self.model is None:
            raise RuntimeError("Model not loaded")

        try:
            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
            inputs = {k: v.to(self.device) for k, v in inputs.items()}

            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=max_tokens,
                    temperature=temperature,
                    do_sample=temperature > 0,
                    pad_token_id=self.tokenizer.eos_token_id,
                    eos_token_id=self.tokenizer.eos_token_id
                )

            response = self.tokenizer.decode(
                outputs[0][inputs['input_ids'].shape[1]:],
                skip_special_tokens=True
            )

            return response.strip()

        except Exception as e:
            logger.error(f"❌ Generation error: {e}")
            return ""

    def cleanup(self):
        """Clean up model memory."""
        if self.model is not None:
            del self.model
            self.model = None

        if self.tokenizer is not None:
            del self.tokenizer
            self.tokenizer = None

        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        gc.collect()
        logger.info(" Model memory cleaned up")


# ============================================================================
# ENTITY EXTRACTION
# ============================================================================

class EntityExtractor:

    def __init__(self, model_manager: SimpleModelManager):
        self.model_manager = model_manager
        self.tuple_delimiter = "<|>"
        self.record_delimiter = "##"
        self.completion_delimiter = "<|COMPLETE|>"

        self.entity_types = [
            "PERSON", "ORGANIZATION", "LOCATION", "EVENT",
            "CONCEPT", "TECHNOLOGY", "PRODUCT"
        ]

        logger.info("🔍 Entity extractor initialized")

    def create_extraction_prompt(self, text: str) -> str:
        """Create entity extraction prompt."""
        entity_types_str = ", ".join(self.entity_types)

        return f"""You are an AI assistant that extracts entities and relationships from text.

## Task
Extract the most important entities and relationships from the text below.

## Entity Types
Extract these types: {entity_types_str}

## Output Format
For entities: (name{self.tuple_delimiter}type{self.tuple_delimiter}description){self.record_delimiter}
For relationships: (source{self.tuple_delimiter}relation{self.tuple_delimiter}target{self.tuple_delimiter}description){self.record_delimiter}

## Guidelines
- Focus on the most important entities
- Use clear, descriptive names
- Keep descriptions concise
- Extract meaningful relationships

## Text:
{text}

## Extracted Entities and Relationships:
"""

    def parse_response(self, response: str, chunk_id: str) -> Tuple[List[Entity], List[Relationship]]:
        """Parse model response into entities and relationships."""
        entities = []
        relationships = []

        if not response:
            return entities, relationships

        # Clean response
        response = response.replace(self.completion_delimiter, "")
        records = response.split(self.record_delimiter)

        for record in records:
            record = record.strip()
            if not record or not (record.startswith('(') and record.endswith(')')):
                continue

            content = record[1:-1]  # Remove parentheses
            parts = content.split(self.tuple_delimiter)

            if len(parts) == 3:
                # Entity
                name, entity_type, description = [p.strip() for p in parts]
                if name and entity_type:
                    entities.append(Entity(
                        name=name,
                        type=entity_type.upper(),
                        description=description,
                        source_chunks=[chunk_id]
                    ))

            elif len(parts) == 4:
                # Relationship
                source, relation, target, description = [p.strip() for p in parts]
                if source and target and relation:
                    relationships.append(Relationship(
                        source_entity=source,
                        target_entity=target,
                        relationship_type=relation.upper(),
                        description=description,
                        source_chunks=[chunk_id]
                    ))

        return entities, relationships

    def extract_from_chunk(self, chunk: DocumentChunk) -> Tuple[List[Entity], List[Relationship]]:
        """Extract entities and relationships from a chunk."""
        prompt = self.create_extraction_prompt(chunk.text)
        response = self.model_manager.generate(prompt, max_tokens=400, temperature=0.1)

        entities, relationships = self.parse_response(response, chunk.id)

        logger.info(f"🔍 Chunk {chunk.id}: {len(entities)} entities, {len(relationships)} relationships")
        return entities, relationships

    def merge_entities(self, entities: List[Entity]) -> List[Entity]:
        """Merge duplicate entities."""
        if not entities:
            return []

        # Group by normalized name
        entity_groups = {}

        for entity in entities:
            normalized_name = entity.name.lower().strip()

            if normalized_name not in entity_groups:
                entity_groups[normalized_name] = []
            entity_groups[normalized_name].append(entity)

        # Merge groups
        merged_entities = []
        for group in entity_groups.values():
            if len(group) == 1:
                merged_entities.append(group[0])
            else:
                # Merge multiple entities
                merged = Entity(
                    name=group[0].name,  # Use first name
                    type=max(set(e.type for e in group), key=lambda x: sum(1 for e in group if e.type == x)),  # Most common type
                    description="; ".join(set(e.description for e in group if e.description)),
                    source_chunks=list(set(chunk for e in group for chunk in e.source_chunks)),
                    confidence=sum(e.confidence for e in group) / len(group)
                )
                merged_entities.append(merged)

        logger.info(f"🔄 Merged {len(entities)} entities into {len(merged_entities)} unique entities")
        return merged_entities

    def merge_relationships(self, relationships: List[Relationship]) -> List[Relationship]:
        """Merge duplicate relationships."""
        if not relationships:
            return []

        # Group by source, target, and type
        rel_groups = {}

        for rel in relationships:
            key = (
                rel.source_entity.lower().strip(),
                rel.target_entity.lower().strip(),
                rel.relationship_type
            )

            if key not in rel_groups:
                rel_groups[key] = []
            rel_groups[key].append(rel)

        # Merge groups
        merged_relationships = []
        for group in rel_groups.values():
            if len(group) == 1:
                merged_relationships.append(group[0])
            else:
                # Merge multiple relationships
                merged = Relationship(
                    source_entity=group[0].source_entity,
                    target_entity=group[0].target_entity,
                    relationship_type=group[0].relationship_type,
                    description="; ".join(set(r.description for r in group if r.description)),
                    source_chunks=list(set(chunk for r in group for chunk in r.source_chunks)),
                    confidence=sum(r.confidence for r in group) / len(group)
                )
                merged_relationships.append(merged)

        logger.info(f"🔄 Merged {len(relationships)} relationships into {len(merged_relationships)} unique relationships")
        return merged_relationships


# ============================================================================
# MAIN PIPELINE
# ============================================================================

class GraphRAGPipeline:
    """Main GraphRAG pipeline for Colab."""

    def __init__(self, model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"):
        self.model_manager = ModelManager(model_name)
        self.doc_processor = DocumentProcessor()
        self.entity_extractor = EntityExtractor(self.model_manager)

        self.stats = {
            'documents_processed': 0,
            'chunks_created': 0,
            'entities_extracted': 0,
            'relationships_extracted': 0,
            'total_time': 0
        }

        logger.info("🚀 GraphRAG Pipeline initialized")

    def load_models(self) -> bool:
        """Load all required models."""
        return self.model_manager.load_model()

    def create_test_documents(self) -> List[Document]:
        """Create test documents for demonstration."""
        documents = [
            Document(
                id="ai_companies",
                title="AI Companies and Technology",
                content="""
                OpenAI is an artificial intelligence research company founded by Sam Altman, Elon Musk, and others in San Francisco in 2015.
                The company has developed several breakthrough AI models including GPT-3, GPT-4, and ChatGPT, which are large language models
                capable of understanding and generating human-like text. OpenAI's mission is to ensure that artificial general intelligence
                benefits all of humanity. The company has also created DALL-E, an AI system that can generate images from text descriptions.

                Google, led by CEO Sundar Pichai, has developed competing AI models like PaLM, Bard, and Gemini. Google's DeepMind division,
                headquartered in London, has made significant breakthroughs including AlphaGo, which defeated world champion Go players,
                and AlphaFold, which solved protein folding predictions. DeepMind was founded by Demis Hassabis and acquired by Google in 2014.

                Microsoft, under CEO Satya Nadella, has integrated OpenAI's technology into their products through a strategic partnership.
                Microsoft has invested billions in OpenAI and integrated GPT models into Microsoft Office, Bing search, and Azure cloud services.
                """
            ),
            Document(
                id="tech_leaders",
                title="Technology Leaders and Innovation",
                content="""
                Elon Musk, the CEO of Tesla and SpaceX, has been a major figure in technology innovation. Tesla, headquartered in Austin, Texas,
                has revolutionized the electric vehicle industry with models like the Model S, Model 3, and Model Y. The company's Gigafactories
                in Nevada, Texas, and other locations represent the future of sustainable manufacturing.

                Tim Cook serves as the CEO of Apple, continuing the legacy established by Steve Jobs. Apple, based in Cupertino, California,
                is known for innovative consumer products like the iPhone, iPad, and Mac computers. The company has also ventured into
                artificial intelligence with Siri and machine learning capabilities.

                Jeff Bezos founded Amazon in Seattle and served as CEO before transitioning to Executive Chairman. Amazon has expanded from
                e-commerce to cloud computing with Amazon Web Services (AWS), which has become a dominant force in cloud infrastructure.
                Andy Jassy, former head of AWS, now serves as Amazon's CEO.
                """
            ),
            Document(
                id="research_institutions",
                title="Research Institutions and Academia",
                content="""
                Stanford University, located in Silicon Valley, has been instrumental in training many technology leaders and fostering innovation.
                The Stanford AI Lab has contributed significantly to machine learning research, computer vision, and natural language processing.
                Notable alumni include the founders of Google, Yahoo, and many other tech companies.

                MIT (Massachusetts Institute of Technology) in Cambridge, Massachusetts, is renowned for its Computer Science and Artificial
                Intelligence Laboratory (CSAIL). Researchers there work on robotics, human-computer interaction, and distributed systems.
                MIT has produced numerous technology entrepreneurs and continues to be at the forefront of AI research.

                Carnegie Mellon University in Pittsburgh has a strong tradition in AI research, particularly in computer vision,
                natural language processing, and robotics. The university's partnerships with industry have led to numerous technological
                breakthroughs and successful startups.
                """
            )
        ]

        logger.info(f"Created {len(documents)} test documents")
        return documents

    def run_pipeline(self, documents: List[Document] = None) -> Dict[str, Any]:
        """Run the complete GraphRAG pipeline."""
        start_time = time.time()

        logger.info(" Starting GraphRAG pipeline")

        # Use test documents if none provided
        if documents is None:
            documents = self.create_test_documents()

        self.stats['documents_processed'] = len(documents)

        # Step 1: Process documents into chunks
        logger.info(" Processing documents into chunks:")
        all_chunks = []
        for doc in documents:
            chunks = self.doc_processor.chunk_document(doc)
            all_chunks.extend(chunks)

        self.stats['chunks_created'] = len(all_chunks)
        logger.info(f" Created {len(all_chunks)} total chunks")

        # Step 2: Extract entities and relationships
        logger.info("Extracting entities and relationships:")
        all_entities = []
        all_relationships = []

        for i, chunk in enumerate(all_chunks):
            logger.info(f"Processing chunk {i+1}/{len(all_chunks)}")
            entities, relationships = self.entity_extractor.extract_from_chunk(chunk)
            all_entities.extend(entities)
            all_relationships.extend(relationships)

        self.stats['entities_extracted'] = len(all_entities)
        self.stats['relationships_extracted'] = len(all_relationships)

        # Step 3: Merge duplicates
        logger.info("Merging duplicates:")
        merged_entities = self.entity_extractor.merge_entities(all_entities)
        merged_relationships = self.entity_extractor.merge_relationships(all_relationships)

        # Finalize
        end_time = time.time()
        self.stats['total_time'] = end_time - start_time

        logger.info("Pipeline completed successfully!")
        self.log_statistics()

        return {
            'entities': merged_entities,
            'relationships': merged_relationships,
            'statistics': self.stats
        }

    def log_statistics(self):
        """Log pipeline statistics."""
        stats = self.stats
        logger.info(" === Pipeline Statistics ===")
        logger.info(f"Total time: {stats['total_time']:.2f} seconds")
        logger.info(f"Documents processed: {stats['documents_processed']}")
        logger.info(f"Chunks created: {stats['chunks_created']}")
        logger.info(f"Entities extracted: {stats['entities_extracted']}")
        logger.info(f"Relationships extracted: {stats['relationships_extracted']}")

        if stats['chunks_created'] > 0:
            logger.info(f"Avg entities per chunk: {stats['entities_extracted'] / stats['chunks_created']:.1f}")
            logger.info(f"Avg relationships per chunk: {stats['relationships_extracted'] / stats['chunks_created']:.1f}")

    def display_results(self, results: Dict[str, Any]):
        """Display results in a nice format."""
        entities = results['entities']
        relationships = results['relationships']

        print("\n" + "="*80)
        print("GRAPHRAG EXTRACTION RESULTS")
        print("="*80)

        print(f"\n ENTITIES ({len(entities)}):")
        print("-" * 50)
        for i, entity in enumerate(entities[:20], 1):  # Show first 20
            print(f"{i:2d}. {entity.name} ({entity.type})")
            print(f"    {entity.description}")
            if len(entity.source_chunks) > 1:
                print(f"    Found in {len(entity.source_chunks)} chunks")

        if len(entities) > 20:
            print(f"    ... and {len(entities) - 20} more entities")

        print(f"\n RELATIONSHIPS ({len(relationships)}):")
        print("-" * 50)
        for i, rel in enumerate(relationships[:15], 1):  # Show first 15
            print(f"{i:2d}. {rel.source_entity} --[{rel.relationship_type}]--> {rel.target_entity}")
            print(f"    {rel.description}")

        if len(relationships) > 15:
            print(f"    ... and {len(relationships) - 15} more relationships")

        print(f"\n STATISTICS:")
        print("-" * 50)
        stats = results['statistics']
        print(f"Processing time: {stats['total_time']:.2f} seconds")
        print(f"Documents: {stats['documents_processed']}")
        print(f"Chunks: {stats['chunks_created']}")
        print(f"Total entities: {len(entities)}")
        print(f"Total relationships: {len(relationships)}")

        if device == "cuda":
            memory_used = torch.cuda.memory_allocated() / 1e9
            memory_total = torch.cuda.get_device_properties(0).total_memory / 1e9
            print(f"GPU Memory: {memory_used:.1f} / {memory_total:.1f} GB")

        print("\n" + "="*80)

    def save_results(self, results: Dict[str, Any], filename: str = None):
        """Save results to JSON file."""
        if filename is None:
            filename = f"graphrag_results_{int(time.time())}.json"

        # Convert dataclasses to dictionaries for JSON serialization
        json_results = {
            'entities': [
                {
                    'name': e.name,
                    'type': e.type,
                    'description': e.description,
                    'source_chunks': e.source_chunks,
                    'confidence': e.confidence
                }
                for e in results['entities']
            ],
            'relationships': [
                {
                    'source_entity': r.source_entity,
                    'target_entity': r.target_entity,
                    'relationship_type': r.relationship_type,
                    'description': r.description,
                    'source_chunks': r.source_chunks,
                    'confidence': r.confidence
                }
                for r in results['relationships']
            ],
            'statistics': results['statistics'],
            'timestamp': time.time(),
            'model': self.model_manager.model_name
        }

        with open(filename, 'w') as f:
            json.dump(json_results, f, indent=2)

        logger.info(f" Results saved to {filename}")
        return filename

    def cleanup(self):
        """Clean up resources."""
        self.model_manager.cleanup()


# ============================================================================
# MAIN EXECUTION
# ============================================================================

def main():

    model_options = {
        "small": "Qwen/Qwen2.5-1.5B-Instruct",
        "medium": "Qwen/Qwen2.5-3B-Instruct",
        "large": "Qwen/Qwen2.5-7B-Instruct"
    }

    if device == "cuda":
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
        if gpu_memory >= 14:
            model_size = "large"
        elif gpu_memory >= 8:
            model_size = "medium"
        else:
            model_size = "small"
    else:
        model_size = "small"

    print(f"Selected model size: {model_size}")
    print(f"Model: {model_options[model_size]}")

    # Initialize pipeline
    pipeline = GraphRAGPipeline(model_options[model_size])

    try:
        # Load models
        print("\n Loading models...")
        if not pipeline.load_models():
            print("Failed to load models")
            return

        print("Models loaded successfully!")

        # Run pipeline
        print("\n Running the pipeline...")
        results = pipeline.run_pipeline()

        # Display results
        pipeline.display_results(results)

        # Save results
        filename = pipeline.save_results(results)
        print(f"\n Complete results saved to: {filename}")

        return results

    except Exception as e:
        logger.error(f" Pipeline error: {e}")
        print(f" Error: {e}")

    finally:
        pipeline.cleanup()


if __name__ == "__main__":
    main()

 Using device: cuda
 GPU: Tesla T4
 GPU Memory: 15.8 GB
Selected model size: large
Model: Qwen/Qwen2.5-7B-Instruct

 Loading models...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]



Models loaded successfully!

 Running the pipeline...

GRAPHRAG EXTRACTION RESULTS

 ENTITIES (66):
--------------------------------------------------
 1. DALL-E (PRODUCT)
    An AI system for generating images from text descriptions
 2. Google (ORGANIZATION)
    Technology giant that acquired DeepMind; A tech company developing AI models; A tech company founded by Stanford alumni; Invested in OpenAI
    Found in 3 chunks
 3. Sundar Pichai (PERSON)
    CEO of Google
 4. PaLM (PRODUCT)
    One of Google's competing AI models
 5. Bard (PRODUCT)
    One of Google's competing AI models
 6. Gemini (PRODUCT)
    One of Google's competing AI models
 7. London (LOCATION)
    Headquarters of Google's DeepMind division
 8. DeepMind (ORGANIZATION)
    Google's AI research division; Acquired by Google; Artificial intelligence research company founded by Demis Hassabis and acquired by Google
    Found in 2 chunks
 9. Demis Hassabis (PERSON)
    Founder of DeepMind
    Found in 2 chunks
10. AlphaGo 