In [1]:
import os,sys,json
import yaml
from ipywidgets import widgets
from IPython.display import display, clear_output
from tqdm import tqdm

In [2]:
# Get the parent directory
parent_dir = os.path.abspath(os.path.join(os.path.dirname('PsycoreTestRunner.py'), ".."))
# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)

In [3]:
import PsycoreTestRunner
from src.results import ResultManager
import asyncio

In [4]:
class VariationType:
    def __init__(self, bert_graph: bool, llm_graph: bool, aws_embedding: bool, api_limited: bool, hardware_limited: bool, config_path: str):
        self.bert_graph = bert_graph
        self.llm_graph = llm_graph
        self.aws_embedding = aws_embedding
        self.api_limited = api_limited
        self.hardware_limited = hardware_limited
        self.config_path = config_path

    def __str__(self):
        return f"VariationType(bert_graph={self.bert_graph}, llm_graph={self.llm_graph}, aws_embedding={self.aws_embedding}, api_limited={self.api_limited}, hardware_limited={self.hardware_limited}, config_path={self.config_path})"
    
    @staticmethod
    def split_config(variations: list['VariationType']):
        config_structure = {
            "General": {
                "llm_graph" :{
                    "aws_embedding" :[],
                    "clip_embedding": []
                },
                "bert_graph": {
                    "aws_embedding" :[],
                    "clip_embedding": []
                }
            },
            "API_LIMITED": {
                "llm_graph" :{
                    "aws_embedding" :[],
                    "clip_embedding": []
                },
                "bert_graph": {
                    "aws_embedding" :[],
                    "clip_embedding": []
                }
            },
            "HARDWARE_LIMITED": {
                "llm_graph" :{
                    "aws_embedding" :[],
                    "clip_embedding": []
                },
                "bert_graph": {
                    "aws_embedding" :[],
                    "clip_embedding": []
                }
            },
            "API_HARDWARE_LIMITED": {
                "llm_graph" :{
                    "aws_embedding" :[],
                    "clip_embedding": []
                },
                "bert_graph": {
                    "aws_embedding" :[],
                    "clip_embedding": []
                }
            }
        }
        print("Starting split_config with variations:", [str(v) for v in variations])
        
        for variation in variations:
            # Determine which category to use
            category = "General"
            if variation.api_limited and variation.hardware_limited:
                category = "API_HARDWARE_LIMITED"
            elif variation.api_limited:
                category = "API_LIMITED"
            elif variation.hardware_limited:
                category = "HARDWARE_LIMITED"
            
            print(f"\nProcessing variation: {variation}")
            print(f"Selected category: {category}")
            
            # Determine which graph type to use
            graph_type = "llm_graph" if variation.llm_graph else "bert_graph"
            print(f"Selected graph_type: {graph_type}")
            
            # Determine which embedding type to use
            embedding_type = "aws_embedding" if variation.aws_embedding else "clip_embedding"
            print(f"Selected embedding_type: {embedding_type}")
            
            # Add the config path to the appropriate list
            config_structure[category][graph_type][embedding_type].append(variation.config_path)
            print(f"Added config path {variation.config_path} to {category}.{graph_type}.{embedding_type}")
            print(f"Current state of that list: {config_structure[category][graph_type][embedding_type]}")

        print("\nFinal config_structure:", json.dumps(config_structure, indent=2))
        return config_structure
    
    @staticmethod
    def group_by_preprocessing(config_structure: dict):
        preprocessing_groups = {
            'llm_graph_aws_embedding': [],
            'llm_graph_clip_embedding': [],
            'bert_graph_aws_embedding': [],
            'bert_graph_clip_embedding': []
        }
        
        print("Starting group_by_preprocessing with config_structure:", json.dumps(config_structure, indent=2))
        
        for category, category_data in config_structure.items():
            print(f"\nProcessing category: {category}")
            for graph_type, graph_data in category_data.items():
                print(f"Processing graph_type: {graph_type}")
                for embedding_type, config_paths in graph_data.items():
                    print(f"Processing embedding_type: {embedding_type} with paths: {config_paths}")
                    # Map the embedding_type to the correct preprocessing group key
                    key = f"{graph_type}_{embedding_type}"
                    if key in preprocessing_groups:
                        preprocessing_groups[key].extend(config_paths)
                        print(f"Added paths to {key}: {config_paths}")
        
        print("\nFinal preprocessing_groups:", json.dumps(preprocessing_groups, indent=2))
        return preprocessing_groups

In [5]:
class TestConfigRunner:
    def __init__(self, config_path: str):
        print(f"Initializing TestConfigRunner with config_path: {config_path}")
        self.config_path = config_path
        print("Creating variations...")
        self.variations = self.create_variations()
        print(f"Created {len(self.variations)} variations")
        print("Splitting config...")
        self.config_structure = VariationType.split_config(self.variations)
        print("Config structure created")
        self.result_manager = ResultManager()
        self.selection = None  # Store selection as class variable
        
        # Default configuration that will be applied to all tests
        self.default_config = {
            "logger": {
                "level": "WARNING"
            },
            "document_range": {
                "enabled": True,
                "document_ids": [1]  # Default document IDs
            },
            "rag": {
                "text_similarity_threshold": 0.3
            },
            "iteration": {
                "loop_retries": 2,
                "pass_threshold": 0.1
            }
        }
        
        # Default prompts for testing
        self.default_prompts = [
            "What programs are there to enhance broadband?",
            "What does the Department of Digital, Culture, Media and Sport do?",
            "What is FTTC?"
        ]

    @staticmethod
    def deep_merge(dict1, dict2):
        """
        Simple merge where dict2 values completely override dict1 values.
        No recursive merging - just straight override.
        """
        merged = dict1.copy()
        merged.update(dict2)
        return merged

    def select_test_types(self):
        # Create checkboxes for each test type
        test_types = {
            "General": widgets.Checkbox(value=False, description='General'),
            "API_LIMITED": widgets.Checkbox(value=False, description='API Limited'),
            "API_HARDWARE_LIMITED": widgets.Checkbox(value=False, description='API & Hardware Limited')
        }
        
        # Create preprocessing controls
        preprocessing_enabled = widgets.Checkbox(value=False, description='Enable Preprocessing')
        preprocessing_type = widgets.SelectMultiple(
            options=['llm_graph_aws_embedding', 'llm_graph_clip_embedding', 
                    'bert_graph_aws_embedding', 'bert_graph_clip_embedding'],
            description='Preprocessing Types:',
            disabled=True,
            layout=widgets.Layout(width='50%', height='100px')
        )
        
        # Create overwrite checkbox
        overwrite_enabled = widgets.Checkbox(value=False, description='Allow Overwrite')
        
        # Create prompts input
        prompts_input = widgets.Textarea(
            value='\n'.join(self.default_prompts),
            description='Test Prompts:',
            layout=widgets.Layout(width='100%', height='100px')
        )
        
        # Link preprocessing checkbox to dropdown
        def on_preprocessing_change(change):
            preprocessing_type.disabled = not change['new']
        preprocessing_enabled.observe(on_preprocessing_change, names='value')
        
        # Create output widget for displaying results
        output = widgets.Output()
        
        def log_message(message):
            with output:
                clear_output()
                print(message)
        
        # Store the widgets in the class variable
        self.selection = {
            'test_types': test_types,
            'preprocessing_enabled': preprocessing_enabled,
            'preprocessing_type': preprocessing_type,
            'overwrite_enabled': overwrite_enabled,
            'prompts_input': prompts_input,
            'output': output
        }
        
        # Display widgets
        display(widgets.VBox([
            widgets.HBox([v for v in test_types.values()]),
            widgets.HBox([preprocessing_enabled, preprocessing_type]),
            overwrite_enabled,
            prompts_input,
            output
        ]))

    def run_selected_tests(self):
        if not self.selection:
            print("Please select test types first")
            return
            
        test_types = self.selection['test_types']
        preprocessing_enabled = self.selection['preprocessing_enabled']
        preprocessing_type = self.selection['preprocessing_type']
        overwrite_enabled = self.selection['overwrite_enabled']
        prompts_input = self.selection['prompts_input']
        output = self.selection['output']
        
        def log_message(message):
            with output:
                clear_output()
                print(message)
        
        selected_types = [k for k, v in test_types.items() if v.value]
        if not selected_types:
            log_message("Please select at least one test type")
            return
        
        # Get the selected configurations
        selected_configs = {k: self.config_structure[k] for k in selected_types}
        
        # Group by preprocessing
        preprocessing_groups = VariationType.group_by_preprocessing(selected_configs)
        log_message(f"Selected test types: {selected_types}\nPreprocessing enabled: {preprocessing_enabled.value}")
        if preprocessing_enabled.value:
            log_message(f"Selected preprocessing types: {preprocessing_type.value}")
        
        # Filter by preprocessing type if enabled
        if preprocessing_enabled.value:
            if not preprocessing_type.value:
                log_message("Please select at least one preprocessing type")
                return
            
            log_message(f"Available preprocessing groups: {list(preprocessing_groups.keys())}")
            log_message(f"Selected preprocessing types: {preprocessing_type.value}")
            
            filtered_groups = {}
            for ptype in preprocessing_type.value:
                if ptype in preprocessing_groups:
                    filtered_groups[ptype] = preprocessing_groups[ptype]
                    log_message(f"Added {ptype} to filtered groups with {len(preprocessing_groups[ptype])} configs")
                else:
                    log_message(f"No configurations found for preprocessing type: {ptype}")
            
            if not filtered_groups:
                log_message("No valid preprocessing configurations found")
                return
            
            preprocessing_groups = filtered_groups
            log_message(f"Final filtered groups: {list(preprocessing_groups.keys())}")
        
        # Get prompts from input
        prompts = [p.strip() for p in prompts_input.value.split('\n') if p.strip()]
        try:
            # Initialize PsycoreTestRunner with timeout
            log_message("Initializing PsycoreTestRunner...")
            import threading
            import time
            
            runner = None
            init_error = None
            
            def init_runner():
                nonlocal runner, init_error
                try:
                    runner = PsycoreTestRunner.PsycoreTestRunner(preprocess=False)
                except Exception as e:
                    init_error = e
            
            # Start initialization in a separate thread
            init_thread = threading.Thread(target=init_runner)
            init_thread.daemon = True
            init_thread.start()
            
            # Wait for initialization with timeout
            timeout = 30  # seconds
            start_time = time.time()
            while init_thread.is_alive():
                if time.time() - start_time > timeout:
                    log_message("Error: PsycoreTestRunner initialization timed out after 30 seconds.\nPlease check your Pinecone credentials and connection.")
                    return
                time.sleep(0.1)
            
            if init_error:
                log_message(f"Error initializing PsycoreTestRunner: {str(init_error)}")
                return
                
            log_message("PsycoreTestRunner initialized successfully")
            
            # Process each group
            for group, configs in preprocessing_groups.items():
                # Flatten the list of lists and remove duplicates
                print(group)
                for i, config_path in enumerate(configs):
                    try:
                        config_name = os.path.basename(config_path)
                        log_message(f"Testing configuration: {config_name}")
                        
                        # Load and merge configuration
                        log_message(f"Loading configuration from {config_name}...")
                        try:
                            with open(config_path, 'r') as f:
                                config = yaml.safe_load(f)
                            if config is None:
                                log_message(f"Warning: Empty or invalid YAML file: {config_name}")
                                continue
                        except yaml.YAMLError as e:
                            log_message(f"Error parsing YAML file {confisg_name}: {str(e)}")
                            continue
                        
                        # Merge with default config
                        merged_config = TestConfigRunner.deep_merge(config, self.default_config)
                        print(merged_config)
                        # Print merged config before preprocessing
                        log_message(f"\nMerged config before preprocessing for {config_name}:")
                        log_message(json.dumps(merged_config, indent=2))
                        
                        # Check if result already exists
                        exists, config_hash = self.result_manager.check_hash_exists(merged_config)
                        if exists and not overwrite_enabled.value:
                            log_message(f"Result already exists for {config_name} (hash: {config_hash}). Skipping...")
                            continue
                        
                        # Update runner configuration
                        log_message(f"Updating runner configuration...")
                        runner.update_config(merged_config, (i == 0 and preprocessing_enabled.value == True))
                        
                        # Run tests
                        log_message(f"Running tests with prompts...")
                        results = runner.evaluate_prompts(prompts)
                        
                        # Map results to prompts
                        prompt_results = {prompt: result for prompt, result in zip(prompts, results)}
                        
                        # Save results
                        log_message(f"Saving results...")
                        self.result_manager.write_result(merged_config, prompt_results)
                        
                        log_message(f"Completed testing: {config_name}")
                        
                    except Exception as e:
                        import traceback
                        error_msg = f"Error processing {os.path.basename(config_path)}:\n{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
                        print(error_msg)
                        continue
            
            log_message("All tests completed successfully!")
            
        except Exception as e:
            import traceback
            error_msg = f"Critical error during test execution:\n{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
            log_message(error_msg)

    def run_test(self):
        print("Starting run_test method")
        self.select_test_types()
        # Create a button to run the tests
        run_button = widgets.Button(description='Run Selected Tests')
        run_button.on_click(lambda b: self.run_selected_tests())
        display(run_button)

    def create_variations(self) -> list[VariationType]:
        variations = []
        base_path = self.config_path
        print(f"Creating variations from base path: {base_path}")
        print(f"Base path exists: {os.path.exists(base_path)}")

        # API_and_Hardware_Intensive variations
        intensive_path = os.path.join(base_path, "API_and_Hardware_Intensive")
        print(f"Checking intensive path: {intensive_path}")
        print(f"Intensive path exists: {os.path.exists(intensive_path)}")
        if os.path.exists(intensive_path):
            # BERT Graph variations
            bert_aws_path = os.path.join(intensive_path, "BERT_Graph_AWS_Embedding")
            print(f"Checking BERT AWS path: {bert_aws_path}")
            print(f"BERT AWS path exists: {os.path.exists(bert_aws_path)}")
            if os.path.exists(bert_aws_path):
                for yaml_file in os.listdir(bert_aws_path):
                    if yaml_file.endswith('.yaml'):
                        full_path = os.path.join(bert_aws_path, yaml_file)
                        print(f"Found BERT AWS config: {full_path}")
                        variations.append(VariationType(bert_graph=True, llm_graph=False, aws_embedding=True, api_limited=True, hardware_limited=True, config_path=full_path))
            
            bert_clip_path = os.path.join(intensive_path, "BERT_Graph_CLIP_Embedding")
            print(f"Checking BERT CLIP path: {bert_clip_path}")
            print(f"BERT CLIP path exists: {os.path.exists(bert_clip_path)}")
            if os.path.exists(bert_clip_path):
                for yaml_file in os.listdir(bert_clip_path):
                    if yaml_file.endswith('.yaml'):
                        full_path = os.path.join(bert_clip_path, yaml_file)
                        print(f"Found BERT CLIP config: {full_path}")
                        variations.append(VariationType(bert_graph=True, llm_graph=False, aws_embedding=False, api_limited=True, hardware_limited=True, config_path=full_path))
            
            # LLM Graph variations
            llm_aws_path = os.path.join(intensive_path, "LLM_Graph_AWS_Embedding")
            print(f"Checking LLM AWS path: {llm_aws_path}")
            print(f"LLM AWS path exists: {os.path.exists(llm_aws_path)}")
            if os.path.exists(llm_aws_path):
                for yaml_file in os.listdir(llm_aws_path):
                    if yaml_file.endswith('.yaml'):
                        full_path = os.path.join(llm_aws_path, yaml_file)
                        print(f"Found LLM AWS config: {full_path}")
                        variations.append(VariationType(bert_graph=False, llm_graph=True, aws_embedding=True, api_limited=True, hardware_limited=True, config_path=full_path))
            
            llm_clip_path = os.path.join(intensive_path, "LLM_Graph_CLIP_Embedding")
            print(f"Checking LLM CLIP path: {llm_clip_path}")
            print(f"LLM CLIP path exists: {os.path.exists(llm_clip_path)}")
            if os.path.exists(llm_clip_path):
                for yaml_file in os.listdir(llm_clip_path):
                    if yaml_file.endswith('.yaml'):
                        full_path = os.path.join(llm_clip_path, yaml_file)
                        print(f"Found LLM CLIP config: {full_path}")
                        variations.append(VariationType(bert_graph=False, llm_graph=True, aws_embedding=False, api_limited=True, hardware_limited=True, config_path=full_path))

        # API_Limited variations (LLM Graph with AWS Embedding)
        api_limited_path = os.path.join(base_path, "API_Limited")
        print(f"Checking API Limited path: {api_limited_path}")
        print(f"API Limited path exists: {os.path.exists(api_limited_path)}")
        if os.path.exists(api_limited_path):
            for yaml_file in os.listdir(api_limited_path):
                if yaml_file.endswith('.yaml'):
                    full_path = os.path.join(api_limited_path, yaml_file)
                    print(f"Found API Limited config: {full_path}")
                    variations.append(VariationType(bert_graph=False, llm_graph=True, aws_embedding=True, api_limited=True, hardware_limited=False, config_path=full_path))

        # General Models variations (LLM Graph with AWS Embedding)
        general_path = os.path.join(base_path, "General_Models")
        print(f"Checking General Models path: {general_path}")
        print(f"General Models path exists: {os.path.exists(general_path)}")
        if os.path.exists(general_path):
            for yaml_file in os.listdir(general_path):
                if yaml_file.endswith('.yaml'):
                    full_path = os.path.join(general_path, yaml_file)
                    print(f"Found General Models config: {full_path}")
                    variations.append(VariationType(bert_graph=False, llm_graph=True, aws_embedding=True, api_limited=False, hardware_limited=False, config_path=full_path))

        print(f"Total variations found: {len(variations)}")
        return variations

In [6]:
print("Creating TestConfigRunner instance...")
# Get the current working directory
current_dir = os.getcwd()
config_path = os.path.join(current_dir, "config_variations")
print(f"Using config path: {config_path}")
print(f"Config path exists: {os.path.exists(config_path)}")
runner = TestConfigRunner(config_path)
print("TestConfigRunner instance created")
runner.select_test_types()

Creating TestConfigRunner instance...
Using config path: c:\Users\sebas\Documents\Primary_Documents\University\Year_M\Darwin\Code\Psycore-V2\jupyter_testing\config_variations
Config path exists: True
Initializing TestConfigRunner with config_path: c:\Users\sebas\Documents\Primary_Documents\University\Year_M\Darwin\Code\Psycore-V2\jupyter_testing\config_variations
Creating variations...
Creating variations from base path: c:\Users\sebas\Documents\Primary_Documents\University\Year_M\Darwin\Code\Psycore-V2\jupyter_testing\config_variations
Base path exists: True
Checking intensive path: c:\Users\sebas\Documents\Primary_Documents\University\Year_M\Darwin\Code\Psycore-V2\jupyter_testing\config_variations\API_and_Hardware_Intensive
Intensive path exists: True
Checking BERT AWS path: c:\Users\sebas\Documents\Primary_Documents\University\Year_M\Darwin\Code\Psycore-V2\jupyter_testing\config_variations\API_and_Hardware_Intensive\BERT_Graph_AWS_Embedding
BERT AWS path exists: True
Found BERT AWS 

VBox(children=(HBox(children=(Checkbox(value=False, description='General'), Checkbox(value=False, description=…

In [8]:
if input("Run tests? (y/n)") == "y":
    runner.run_selected_tests()

Starting group_by_preprocessing with config_structure: {
  "General": {
    "llm_graph": {
      "aws_embedding": [
        "c:\\Users\\sebas\\Documents\\Primary_Documents\\University\\Year_M\\Darwin\\Code\\Psycore-V2\\jupyter_testing\\config_variations\\General_Models\\config_variation_11.yaml",
        "c:\\Users\\sebas\\Documents\\Primary_Documents\\University\\Year_M\\Darwin\\Code\\Psycore-V2\\jupyter_testing\\config_variations\\General_Models\\config_variation_123.yaml",
        "c:\\Users\\sebas\\Documents\\Primary_Documents\\University\\Year_M\\Darwin\\Code\\Psycore-V2\\jupyter_testing\\config_variations\\General_Models\\config_variation_127.yaml",
        "c:\\Users\\sebas\\Documents\\Primary_Documents\\University\\Year_M\\Darwin\\Code\\Psycore-V2\\jupyter_testing\\config_variations\\General_Models\\config_variation_15.yaml",
        "c:\\Users\\sebas\\Documents\\Primary_Documents\\University\\Year_M\\Darwin\\Code\\Psycore-V2\\jupyter_testing\\config_variations\\General_Models\

2025-05-22 07:38:51,570 - PsycoreLogger - INFO - Initializing PineconeService with index name: psycore2
2025-05-22 07:38:52,068 - PsycoreLogger - INFO - Creating new Pinecone index: psycore2


llm_graph_aws_embedding


2025-05-22 07:39:02,389 - PsycoreLogger - INFO - Initializing PineconeService with index name: psycore2


Cleaning Pinecone index...


2025-05-22 07:39:13,520 - PsycoreLogger - INFO - Deleted index: psycore2
2025-05-22 07:39:21,148 - PsycoreLogger - INFO - Successfully recreated index: psycore2


Cleared 12 objects from nas666textsummary
Cleared 10 objects from nas666image
Cleared 2 objects from nas666graph


2025-05-22 07:39:22,253 - PsycoreLogger - INFO - Starting PDF extraction for C:\Users\sebas\AppData\Local\Temp\tmpe38uvql0.pdf
2025-05-22 07:39:22,266 - PsycoreLogger - INFO - Processing page 1/45
2025-05-22 07:39:22,284 - PsycoreLogger - INFO - Processing page 2/45
2025-05-22 07:39:22,325 - PsycoreLogger - INFO - Processing page 3/45
2025-05-22 07:39:22,328 - PsycoreLogger - INFO - Processing page 4/45
2025-05-22 07:39:22,338 - PsycoreLogger - INFO - Processing page 5/45
2025-05-22 07:39:22,343 - PsycoreLogger - INFO - Processing page 6/45
2025-05-22 07:39:22,349 - PsycoreLogger - INFO - Processing page 7/45
2025-05-22 07:39:22,359 - PsycoreLogger - INFO - Processing page 8/45
2025-05-22 07:39:22,374 - PsycoreLogger - INFO - Processing page 9/45
2025-05-22 07:39:22,391 - PsycoreLogger - INFO - Processing page 10/45
2025-05-22 07:39:22,399 - PsycoreLogger - INFO - Processing page 11/45
2025-05-22 07:39:22,404 - PsycoreLogger - INFO - Processing page 12/45
2025-05-22 07:39:22,421 - Psyc

deque(['What programs are there to enhance broadband?'], maxlen=6)


2025-05-22 07:43:08,106 - PsycoreLogger - INFO - Querying vector database with query: What programs, initiatives, or government policies are available from 2020 to 2024 to enhance, improve, or expand broadband internet access, connectivity, and infrastructure in urban, rural, and underserved areas? Include details on funding, eligibility, and implementation strategies., k=5
2025-05-22 07:43:10,691 - PsycoreLogger - INFO - [{'vector_id': '03ba4aa1-171b-41f2-884c-30845489dff3', 'score': 0.659548104, 'document_path': 's3://psycore-documents-445644858344/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf', 'graph_path': 's3://nas666graph/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf/graph.json', 'type': 'text', 'text': ' anticipated. These could include reduced commuting enabled by remote working, increases in leisure time or reducing the digital divide in the community through initiatives like digital education programmes. A summary of the initiative’s pathways to impac

(0.11764705882352941, [], [<src.kg.graph_creator.GraphRelation object at 0x0000012EC10F1890>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBE31E790>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC3546AD0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC3547150>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A58BD0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A58B50>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A58550>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A58350>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A59450>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A5A610>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A59B50>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBE367510>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC2ABBB10>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC3532C10>, <src.kg.graph_creator.GraphRelation

2025-05-22 07:43:40,391 - PsycoreLogger - INFO - Evaluating RAG result 0
INFO:PsycoreLogger:Evaluating RAG result 0
2025-05-22 07:43:40,392 - PsycoreLogger - INFO - Evaluating with GraphEvaluator
INFO:PsycoreLogger:Evaluating with GraphEvaluator
2025-05-22 07:43:45,813 - PsycoreLogger - INFO - Processing graph document 1 of 1
INFO:PsycoreLogger:Processing graph document 1 of 1
2025-05-22 07:43:45,814 - PsycoreLogger - INFO - Evaluating with BERTEvaluator
INFO:PsycoreLogger:Evaluating with BERTEvaluator
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2025-05-22 07:43:49,003 - PsycoreLogger - INFO - Evaluating with RougeEvaluator
INFO:PsycoreLogger:Evaluating with RougeEvaluator
2025-05-22 07:43:49,006 - PsycoreLogger - INFO - Evaluating RAG result

deque(['What does the Department of Digital, Culture, Media and Sport do?'], maxlen=6)


2025-05-22 07:44:14,423 - PsycoreLogger - INFO - Querying vector database with query: What are the roles, responsibilities, and functions of the UK Department of Digital, Culture, Media and Sport (DCMS) in 2023, including its impact on digital innovation, cultural development, media regulation, and sports promotion?, k=5
INFO:PsycoreLogger:Querying vector database with query: What are the roles, responsibilities, and functions of the UK Department of Digital, Culture, Media and Sport (DCMS) in 2023, including its impact on digital innovation, cultural development, media regulation, and sports promotion?, k=5
2025-05-22 07:44:20,306 - PsycoreLogger - INFO - [{'vector_id': 'e1404fdb-0b4a-4e90-9915-0d020ae7de22', 'score': 0.651252389, 'document_path': 's3://psycore-documents-445644858344/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf', 'graph_path': 's3://nas666graph/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf/graph.json', 'type': 'text', 'text': ' of the internat

(0.058823529411764705, [<src.kg.graph_creator.GraphRelation object at 0x0000012EC337EED0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC13FE990>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC1A6A690>, <src.kg.graph_creator.GraphRelation object at 0x0000012F008C5510>, <src.kg.graph_creator.GraphRelation object at 0x0000012F008C4DD0>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D4A10>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D6110>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D7390>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D6C10>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D41D0>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D5C50>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D56D0>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D7C90>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8D4950>, <src.kg.graph_creator.GraphRelation ob

2025-05-22 07:45:05,309 - PsycoreLogger - INFO - Evaluating RAG result 0
INFO:PsycoreLogger:Evaluating RAG result 0
2025-05-22 07:45:05,310 - PsycoreLogger - INFO - Evaluating with GraphEvaluator
INFO:PsycoreLogger:Evaluating with GraphEvaluator
2025-05-22 07:45:09,796 - PsycoreLogger - INFO - Processing graph document 1 of 1
INFO:PsycoreLogger:Processing graph document 1 of 1
2025-05-22 07:45:09,798 - PsycoreLogger - INFO - Evaluating with BERTEvaluator
INFO:PsycoreLogger:Evaluating with BERTEvaluator
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2025-05-22 07:45:13,161 - PsycoreLogger - INFO - Evaluating with RougeEvaluator
INFO:PsycoreLogger:Evaluating with RougeEvaluator
2025-05-22 07:45:13,168 - PsycoreLogger - INFO - Evaluating RAG result

deque(['What is FTTC?'], maxlen=6)


2025-05-22 07:45:41,675 - PsycoreLogger - INFO - Querying vector database with query: What is FTTC (Fiber to the Cabinet) in telecommunications, including its definition, technology, infrastructure, benefits, and differences from other broadband technologies like FTTH (Fiber to the Home) and FTTN (Fiber to the Node)?, k=5
INFO:PsycoreLogger:Querying vector database with query: What is FTTC (Fiber to the Cabinet) in telecommunications, including its definition, technology, infrastructure, benefits, and differences from other broadband technologies like FTTH (Fiber to the Home) and FTTN (Fiber to the Node)?, k=5
2025-05-22 07:45:45,576 - PsycoreLogger - INFO - [{'vector_id': '0d95f2c3-9615-47a9-b92c-d145bff073ed', 'score': 0.752231479, 'document_path': 's3://psycore-documents-445644858344/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf', 'graph_path': 's3://nas666graph/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf/graph.json', 'type': 'text', 'text': ' speeds3, such

(0.0, [<src.kg.graph_creator.GraphRelation object at 0x0000012EC33B1A10>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E80B110>, <src.kg.graph_creator.GraphRelation object at 0x0000012F00EDBB10>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E8522D0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC2AAB790>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E856B50>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E857290>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC3532C10>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC2C60DD0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBE78B1D0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC2C6CC90>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC2C6EB90>, <src.kg.graph_creator.GraphRelation object at 0x0000012EC17D6CD0>, <src.kg.graph_creator.GraphRelation object at 0x0000012F0E83E250>, <src.kg.graph_creator.GraphRelation object at 0x0000012

2025-05-22 07:46:29,950 - PsycoreLogger - INFO - Evaluating RAG result 0
INFO:PsycoreLogger:Evaluating RAG result 0
2025-05-22 07:46:29,951 - PsycoreLogger - INFO - Evaluating with GraphEvaluator
INFO:PsycoreLogger:Evaluating with GraphEvaluator
2025-05-22 07:46:36,136 - PsycoreLogger - INFO - Processing graph document 1 of 1
INFO:PsycoreLogger:Processing graph document 1 of 1
2025-05-22 07:46:36,137 - PsycoreLogger - INFO - Evaluating with BERTEvaluator
INFO:PsycoreLogger:Evaluating with BERTEvaluator
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2025-05-22 07:46:38,790 - PsycoreLogger - INFO - Evaluating with RougeEvaluator
INFO:PsycoreLogger:Evaluating with RougeEvaluator
2025-05-22 07:46:38,795 - PsycoreLogger - INFO - Evaluating RAG result



2025-05-22 07:47:08,853 - PsycoreLogger - INFO - Initializing PineconeService with index name: psycore2
INFO:PsycoreLogger:Initializing PineconeService with index name: psycore2
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "g

deque(['What programs are there to enhance broadband?'], maxlen=6)
Error processing config_variation_123.yaml:
429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_r

2025-05-22 07:47:12,597 - PsycoreLogger - INFO - Initializing PineconeService with index name: psycore2
INFO:PsycoreLogger:Initializing PineconeService with index name: psycore2
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "g

deque(['What programs are there to enhance broadband?'], maxlen=6)
Error processing config_variation_127.yaml:
429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_c

2025-05-22 07:47:16,021 - PsycoreLogger - INFO - Initializing PineconeService with index name: psycore2
INFO:PsycoreLogger:Initializing PineconeService with index name: psycore2


deque(['What programs are there to enhance broadband?'], maxlen=6)


2025-05-22 07:47:18,716 - PsycoreLogger - INFO - Querying vector database with query: What programs are there to enhance broadband?, k=5
INFO:PsycoreLogger:Querying vector database with query: What programs are there to enhance broadband?, k=5
2025-05-22 07:47:20,082 - PsycoreLogger - INFO - [{'vector_id': '0d95f2c3-9615-47a9-b92c-d145bff073ed', 'score': 0.598774672, 'document_path': 's3://psycore-documents-445644858344/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf', 'graph_path': 's3://nas666graph/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf/graph.json', 'type': 'text', 'text': ' speeds3, such as Fibre to the Cabinet (FTTC). The Superfast Broadband Programme has significantly bolstered the coverage of FTTC networks4. However, these technologies are insufficient to meet the demands outlined above. Several factors were thought to have constrained the roll-out of full fibre networks in the UK in 2017. These included: ▪ Other countries having a greater share of th

(0.2222222222222222, [], [<src.kg.graph_creator.GraphRelation object at 0x0000012EBE32DAD0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBE32D5D0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBE2F5E50>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBF126B10>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBF126A90>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBEFDB790>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBEFD83D0>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBEFDBC90>, <src.kg.graph_creator.GraphRelation object at 0x0000012EBEFD9DD0>])


2025-05-22 07:47:40,567 - PsycoreLogger - INFO - Evaluating RAG result 0
INFO:PsycoreLogger:Evaluating RAG result 0
2025-05-22 07:47:40,568 - PsycoreLogger - INFO - Evaluating with GraphEvaluator
INFO:PsycoreLogger:Evaluating with GraphEvaluator
2025-05-22 07:47:42,898 - PsycoreLogger - INFO - Processing graph document 1 of 1
INFO:PsycoreLogger:Processing graph document 1 of 1
2025-05-22 07:47:42,900 - PsycoreLogger - INFO - Evaluating with BERTEvaluator
INFO:PsycoreLogger:Evaluating with BERTEvaluator
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2025-05-22 07:47:45,797 - PsycoreLogger - INFO - Evaluating with RougeEvaluator
INFO:PsycoreLogger:Evaluating with RougeEvaluator
2025-05-22 07:47:45,800 - PsycoreLogger - INFO - Evaluating RAG result

deque(['What does the Department of Digital, Culture, Media and Sport do?'], maxlen=6)


2025-05-22 07:47:58,933 - PsycoreLogger - INFO - Querying vector database with query: What does the Department of Digital, Culture, Media and Sport do?, k=5
INFO:PsycoreLogger:Querying vector database with query: What does the Department of Digital, Culture, Media and Sport do?, k=5
2025-05-22 07:47:59,777 - PsycoreLogger - INFO - [{'vector_id': '4465e9d5-3e91-4bbf-875f-c681b790b68a', 'score': 0.522936761, 'document_path': 's3://psycore-documents-445644858344/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf', 'graph_path': 's3://nas666graph/18-049091-01_LFFN_Wave_1_final_evaluation_TPI_project.pdf/graph.json', 'type': 'text', 'text': ' expertise in a particular part of the public sector, ensuring we have a detailed understanding of specific sectors and policy challenges. Combined with our methods and communications expertise, this helps ensure that our research makes a difference for decision makers and communities.'}, {'vector_id': '78d484fb-7f78-46eb-b587-f8bd19fa0221', 'sco

KeyboardInterrupt: 