In [198]:
from project_constants import DATABASE_PATH, LM_STUDIO_URL
from project_constants import EMBEDDING_MODEL_NAME
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
from langchain.vectorstores import Chroma
DATABASE_PATH = "./../"+DATABASE_PATH.split("/")[1]+"/"+DATABASE_PATH.split("/")[2]
import torch
embedding_model=EMBEDDING_MODEL_NAME


In [199]:
!deepeval set-local-model --model-name="meta-llama-3.1-8b-instruct" --base-url="http://localhost:4500/v1/" --api-key="test"

+--------------------- Traceback (most recent call last) ---------------------+
| D:\Programs\Anaconda\envs\RAGThesisEnv\Lib\site-packages\deepeval\cli\main. |
| py:303 in set_local_model_env                                               |
|                                                                             |
|   300         KEY_FILE_HANDLER.write_key(KeyValues.LOCAL_MODEL_FORMAT, form |
|   301     KEY_FILE_HANDLER.write_key(KeyValues.USE_LOCAL_MODEL, "YES")      |
|   302     KEY_FILE_HANDLER.write_key(KeyValues.USE_AZURE_OPENAI, "NO")      |
| > 303     print(                                                            |
|   304         ":raising_hands: Congratulations! You're now using a local mo |
|   305     )                                                                 |
|   306                                                                       |
|                                                                             |
| +----------------- locals ------------

In [200]:
import gc
def clean_up_memory():
    """
    Clean up memory by deleting the model and tokenizer.
    """
    global llm, tokenizer
    if 'llm' in globals():
        del llm
        print("Model deleted")
    if 'tokenizer' in globals():
        del tokenizer
        print("Tokenizer deleted")
    torch.cuda.empty_cache()
    print("Memory cleaned up")
    gc.collect()


In [201]:
def optimize_model_configuration(quantization=8, model_name="google/gemma-2-2b-it", force_gpu=True, lora_weights=None):
    """
    Optimize the model configuration for the given quantization and model name.
    Used when we directly load the model from the Hugging Face Hub.
    :param quantization: precision of the model
    :param model_name: name of the model
    :param force_gpu: whether to force the model to load on GPU
    :param lora_weights: path to LoRA adapter weights
    :return: optimized model configuration
    """
    print("Optimizing model configuration...")
   
    global tokenizer, llm
    if torch.cuda.is_available():
        print(f"CUDA available: {torch.cuda.get_device_name(0)}")
        print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    else:
        print("CUDA not available, will use CPU")
        force_gpu = False
        
    if quantization == 4:
        print("Optimizing for maximum speed (4-bit quantization)")
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            llm_int8_enable_fp32_cpu_offload=not force_gpu
        )
    elif quantization == 8:
        print("Using balanced settings (8-bit quantization)")
        quantization_config = BitsAndBytesConfig(
            load_in_8bit=True,
            llm_int8_enable_fp32_cpu_offload=not force_gpu,
            llm_int8_skip_modules=["lm_head"]
        )
    elif quantization == 16:
        print("Optimizing for quality (16-bit precision)")
        quantization_config = None
    else:
        raise ValueError("Invalid quantization value. Use 4, 8, or 16.")
    
    print("Loading model...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if not tokenizer.pad_token:
        tokenizer.pad_token = tokenizer.eos_token
    
    if force_gpu and torch.cuda.is_available():
        device_map = {"": 0} 
        print("Forcing model to load on GPU")
    else:
        device_map = "auto"
    
    model_kwargs = {
        "device_map": device_map,
        "torch_dtype": torch.float16,
    }
    
    if quantization_config:
        model_kwargs["quantization_config"] = quantization_config
        
    try:
        # Load base model
        if lora_weights:
            from peft import PeftModel, PeftConfig
            
            print(f"Loading base model with LoRA adapter from {lora_weights}")
            config = PeftConfig.from_pretrained(lora_weights)
            
            # First load the base model
            base_model = AutoModelForCausalLM.from_pretrained(
                model_name if config.base_model_name_or_path is None else config.base_model_name_or_path,
                **model_kwargs
            )
            
            # Then load the LoRA adapter
            llm = PeftModel.from_pretrained(base_model, lora_weights)
            print("LoRA adapter loaded successfully")
        else:
            # Regular model loading without LoRA
            llm = AutoModelForCausalLM.from_pretrained(
                model_name,
                **model_kwargs
            )
            print("Base model loaded successfully")
        
        device_location = next(llm.parameters()).device
        print(f"Model loaded on: {device_location}")
        
        if 'cuda' not in str(device_location) and torch.cuda.is_available():
            print("Warning: Model loaded on CPU despite CUDA being available")
            
    except Exception as e:
        print(f"Error loading model: {e}")
        if force_gpu:
            print("Falling back to CPU loading")
            return optimize_model_configuration(quantization, model_name, force_gpu=False, lora_weights=lora_weights)
    
    return {
        "model_size": sum(p.numel() for p in llm.parameters()) / 1e6,
        "model_device": next(llm.parameters()).device,
        "quantization": quantization,
        "lora_weights": lora_weights
    }

In [202]:
def load_huggingface_model(model_name="google/gemma-2-2b-it", quantization=8, force_gpu=True, lora_weights=None):
    """
    Load a model from Hugging Face Hub
    :param model_name: name of the model to load
    :param quantization: level of quantization (4, 8, or 16 bit)
    :param force_gpu: whether to force GPU usage
    :param lora_weights: path to LoRA adapter weights
    :return: model configuration
    """
    clean_up_memory()
    return optimize_model_configuration(quantization, model_name, force_gpu, lora_weights)


In [203]:
def load_lm_studio_connection(url=LM_STUDIO_URL, api_key="lm-studio"):
    """
    Test the connection to LM Studio
    :param url: URL of LM Studio API
    :param api_key: API key for LM Studio
    :return: connection info
    """
    from langchain_openai import ChatOpenAI
    
    try:
        model = ChatOpenAI(
            base_url=url,
            api_key=api_key,
            temperature=0.3,
            model="gemma-3-12b-it", 
        )
        
        response = model.invoke([{"role": "user", "content": "Hello, are you connected?"}])
        print(f"Successfully connected to LM Studio at {url}")
        print(f"Test response: {response.content[:50]}...")
        
        return {
            "status": "connected",
            "url": url,
            "model": model
        }
    except Exception as e:
        print(f"Error connecting to LM Studio: {e}")
        return {
            "status": "error",
            "error": str(e)
        }


# RAG Evaluation Functions

In [204]:
import time
import pandas as pd
import matplotlib.pyplot as plt
from embedding_model import EmbeddingModel
from langchain.vectorstores import Chroma
from langchain_openai import ChatOpenAI
def test_lm_studio_model(test_questions, model_name="gemma-3-12b-it"):
    """
    Test the model using LM Studio with your existing architecture
    :param test_questions: list of test questions
    :param model_name: name of the model in LM Studio
    :return: dict with results
    """
    from project_constants import LM_STUDIO_URL
    
    print(f"Testing LM Studio model: {model_name}")
    
    # Initialize embedding model and database
    embedding = EmbeddingModel()
    vectordb = Chroma(
        embedding_function=embedding,
        persist_directory=DATABASE_PATH,
    )
    print(f"Vector store loaded with {vectordb._collection.count()} documents")
    
    # Initialize LM Studio model
    model = ChatOpenAI(
        base_url=LM_STUDIO_URL,
        api_key="lm-studio",
        temperature=0.3,
        model=model_name,
    )
    
    results = []
    
    for i, question in enumerate(test_questions):
        print(f"[{i+1}/{len(test_questions)}] Processing: {question}")
        
        # Create chat history
        chat_history = [{"role": "user", "content": question}]
        
        # Start timing
        start_time = time.time()
        
        embedded_query = embedding.embed_query(question)
        retrieval_start = time.time()
        retrieved_chunks = vectordb.similarity_search_by_vector(embedded_query)
        retrieval_time = time.time() - retrieval_start
        
        # Format chunks
        formatted_chunks = ""
        for chunk in retrieved_chunks:
            formatted_chunks += f"- {chunk.page_content}\n"
        
        # Augment prompt
        augmented_prompt = f"{question}\nHere are some chunks of information that could help you, they might be out of order. You should use them only if they are relevant to my question.\nThe chunks are:{formatted_chunks}"
        chat_history[0]["content"] = augmented_prompt
        
        # Generate response
        generation_start = time.time()
        response = model.invoke(chat_history)
        generation_time = time.time() - generation_start
        
        # Calculate total time
        total_time = time.time() - start_time
        
        # Store result
        result = {
            "question": question,
            "response": response.content,
            "source": "lm_studio",
            "retrieved_chunks": retrieved_chunks,
            "retrieval_time": retrieval_time,
            "generation_time": generation_time,
            "total_time": total_time
        }
        
        print(f"Response: {result['response'][:100]}...")
        print(f"Time: {total_time:.2f}s (retrieval: {retrieval_time:.2f}s, generation: {generation_time:.2f}s)")
        
        results.append(result)
    
    return results

In [205]:
def test_huggingface_model(test_questions, model_name="google/gemma-2-2b-it", quantization=8, lora_weights=None):
    """
    Test the model loaded directly from Hugging Face
    :param test_questions: list of test questions
    :param model_name: name of the model in Hugging Face
    :param quantization: quantization level (4, 8, or 16)
    :param lora_weights: path to LoRA adapter weights
    :return: dict with results
    """
    print(f"Testing Hugging Face model: {model_name}" + (f" with LoRA adapter: {lora_weights}" if lora_weights else ""))
    
    # Load the HuggingFace model
    load_huggingface_model(model_name=model_name, quantization=quantization, lora_weights=lora_weights)
    
    global llm, tokenizer
    
    # Initialize embedding model and database
    embedding = EmbeddingModel()
    vectordb = Chroma(
        embedding_function=embedding,
        persist_directory=DATABASE_PATH,
    )
    print(f"Vector store loaded with {vectordb._collection.count()} documents")
    
    results = []
    
    for i, question in enumerate(test_questions):
        print(f"[{i+1}/{len(test_questions)}] Processing: {question}")
        
        # Start timing
        start_time = time.time()
        
        # Retrieval
        retrieval_start = time.time()
        embedded_query = embedding.embed_query(question)
        retrieved_chunks = vectordb.similarity_search_by_vector(embedded_query)
        retrieval_time = time.time() - retrieval_start
        
        # Format context
        context = ""
        for chunk in retrieved_chunks:
            context += f"{chunk.page_content}\n\n"
        
        prompt = f"""### Instruction: 
You are a knowledgeable history tutor. Answer the following question accurately based on the provided historical context.
Use ONLY the information provided in the context. If you don't know, say "I don't have enough information to answer that."

### Context:
{context}

### Question:
{question}

### Answer:
"""
        
        # Generate response
        generation_start = time.time()
        try:
            inputs = tokenizer(prompt, return_tensors="pt").to(llm.device)
            outputs = llm.generate(
                **inputs,
                max_new_tokens=512,
                temperature=0.3,
                do_sample=True
            )
            response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            print(f"Response: {response_text}...")
            
            # Extract just the answer part if possible
            if "### Answer:" in response_text:
                response_text = response_text.split("### Answer:")[1].strip()
            
            generation_time = time.time() - generation_start
        except Exception as e:
            print(f"Error generating response: {e}")
            response_text = f"Error: {str(e)}"
            generation_time = time.time() - generation_start
        
        # Calculate total time
        total_time = time.time() - start_time
        
        # Store result
        result = {
            "question": question,
            "response": response_text,
            "source": "huggingface_lora" if lora_weights else "huggingface",
            "model_name": model_name,
            "lora_weights": lora_weights,
            "retrieved_chunks": retrieved_chunks,
            "retrieval_time": retrieval_time,
            "generation_time": generation_time,
            "total_time": total_time
        }
        
        print(f"Response: {result['response'][:100]}...")
        print(f"Time: {total_time:.2f}s (retrieval: {retrieval_time:.2f}s, generation: {generation_time:.2f}s)")
        
        results.append(result)
    
    return results

In [206]:
def evaluate_with_deepeval(all_results, test_data,include_contextual=False):
    """
    Evaluate model results using DeepEval metrics
    :param all_results: List of results from different models
    :param test_data: List of dictionaries with questions and expected answers
    :return: Dictionary with evaluation results
    """
    global test_case, evaluation
    from deepeval import evaluate
    from deepeval.test_case import LLMTestCase
    from deepeval.metrics import AnswerRelevancyMetric, ContextualRelevancyMetric, HallucinationMetric
    
    print("Setting up DeepEval metrics...")
    metrics = [
        AnswerRelevancyMetric(threshold=0.7),
        HallucinationMetric(threshold=0.7)
    ]
    
    contextual_index = None
    if include_contextual:
        from deepeval.metrics import ContextualRelevancyMetric
        metrics.insert(1, ContextualRelevancyMetric(threshold=0.7))
        contextual_index = 1
    # Convert test_data to a lookup dictionary for easier access
    test_questions_dict = {item["question"]: item for item in test_data}
    
    # Group results by model source
    results_by_source = {}
    for result in all_results:
        source = result.get("source", "unknown")
        if source not in results_by_source:
            results_by_source[source] = []
        results_by_source[source].append(result)
    print(f"Found {len(results_by_source)} model sources")
    
    # Evaluate each model source
    evaluation_results = {}
    
    for source, source_results in results_by_source.items():
        print(f"\nEvaluating {source} with {len(source_results)} results...")
        source_evaluations = []
        
        for i, result in enumerate(source_results):
            # Get question and expected answer
            question = result.get("question", "")
            test_item = test_questions_dict.get(question, {})
            expected_answer = test_item.get("expected_answer", "")
            
            # Get model s response
            response = result.get("response", "")
            print(f"  Model response: {response[:30]}...")
            
            # Get context from retrieved chunks
            context = []
            if "retrieved_chunks" in result and result["retrieved_chunks"]:
                chunks = result["retrieved_chunks"]
                if hasattr(chunks[0], 'page_content'):
                    context = [chunk.page_content for chunk in chunks]
            
            print(f"  Test case {i+1}: Q={question[:30]}...")
            
            try:
                # Create test case with proper context and expected output
                test_case = LLMTestCase(
                    input=question,
                    actual_output=response,
                    expected_output=expected_answer,
                    retrieval_context=[context] if isinstance(context, str) else context,
                    context=[context] if isinstance(context, str) else context,
                )
                
                # Evaluate with metrics
                evaluation = evaluate(
                    test_cases=[test_case],
                    metrics=metrics
                )
                
                if evaluation and evaluation.test_results:
                    metrics_data = evaluation.test_results[0].metrics_data
                    
                    # Extract scores for each metric
                    result_item = {
                        "question_idx": i,
                        "answer_relevancy": metrics_data[0].score if len(metrics_data) > 0 else None,
                        "hallucination": metrics_data[-1].score if len(metrics_data) > 0 else None,
                    }
                    
                    # Only add contextual_relevancy if we included that metric
                    if include_contextual and contextual_index is not None:
                        result_item["contextual_relevancy"] = metrics_data[contextual_index].score if len(metrics_data) > contextual_index else None
                    else:
                        result_item["contextual_relevancy"] = None
                    
                    source_evaluations.append(result_item)
                    
                    print(f"    Answer Relevancy: {result_item['answer_relevancy']:.2f}")
                    if include_contextual:
                        print(f"    Contextual Relevancy: {result_item['contextual_relevancy']:.2f if result_item['contextual_relevancy'] is not None else 'N/A'}")
                    print(f"    Hallucination: {result_item['hallucination']:.2f}")
            except Exception as e:
                # e de la json -> print the json
                print(evaluation)
                # print(test_case.to_json())
                
                print(f"  Error evaluating case {i+1}: {str(e)}")
                continue 
        
        
         
        if source_evaluations:
            evaluation_results[source] = {
                "avg_answer_relevancy": sum(e["answer_relevancy"] for e in source_evaluations if e["answer_relevancy"] is not None) / 
                                       len([e for e in source_evaluations if e["answer_relevancy"] is not None]) if any(e["answer_relevancy"] is not None for e in source_evaluations) else 0,
                                       
                "avg_hallucination": sum(e["hallucination"] for e in source_evaluations if e["hallucination"] is not None) / 
                                    len([e for e in source_evaluations if e["hallucination"] is not None]) if any(e["hallucination"] is not None for e in source_evaluations) else 0,
                
                "details": source_evaluations,
                "num_evaluated": len(source_evaluations)
            }
            
            # Only calculate contextual_relevancy if we included that metric
            if include_contextual:
                valid_contextual = [e["contextual_relevancy"] for e in source_evaluations if e["contextual_relevancy"] is not None]
                if valid_contextual:
                    evaluation_results[source]["avg_contextual_relevancy"] = sum(valid_contextual) / len(valid_contextual)
                else:
                    evaluation_results[source]["avg_contextual_relevancy"] = 0
            else:
                evaluation_results[source]["avg_contextual_relevancy"] = None
            
            # Print average scores
            print(f"\n  {source} Average Results:")
            print(f"    Answer Relevancy: {evaluation_results[source]['avg_answer_relevancy']:.2f}")
            if include_contextual:
                print(f"    Contextual Relevancy: {evaluation_results[source]['avg_contextual_relevancy']:.2f if evaluation_results[source]['avg_contextual_relevancy'] is not None else 'N/A'}")
            print(f"    Hallucination: {evaluation_results[source]['avg_hallucination']:.2f}")
    
    return evaluation_results

In [207]:
def benchmark_performance(topic, model_source="huggingface", model_name=None, lora_weights=None):
    """
    Benchmark the performance of the RAG system
    :param topic: topic to query
    :param model_source: source of the model (huggingface or lm_studio)
    :param model_name: name of the model (if None, use default)
    :param lora_weights: path to LoRA adapter weights (only used if model_source is huggingface)
    :return: performance metrics
    """
    if model_name is None:
        model_name = "google/gemma-2-2b-it" if model_source == "huggingface" else "gemma-3-12b-it"
    
    # Initialize embedding model
    embedding = EmbeddingModel()
    vectordb = Chroma(
        embedding_function=embedding,
        persist_directory=DATABASE_PATH,
    )
    
    metrics = {
        "retrieval_time": 0,
        "generation_time": 0,
        "total_time": 0,
        "tokens_per_second": 0
    }
    
    # Measure overall time
    start_time = time.time()
    
    # Retrieve relevant documents
    retrieval_start = time.time()
    embedded_query = embedding.embed_query(topic)
    chunks = vectordb.similarity_search_by_vector(embedded_query, k=5)
    retrieval_time = time.time() - retrieval_start
    metrics["retrieval_time"] = retrieval_time
    
    if chunks:
        # Generate a response
        if model_source == "huggingface":
            # Load model with or without LoRA weights
            load_huggingface_model(model_name=model_name, lora_weights=lora_weights)
            global llm, tokenizer
            
            context = "\n\n".join([chunk.page_content for chunk in chunks])
            
            prompt = f"""### Instruction: 
You are a knowledgeable history tutor. Answer the following question accurately based on the provided historical context.
Use ONLY the information provided in the context. If you don't know, say "I don't have enough information to answer that."

### Context:
{context}

### Question:
{topic}

### Answer:
"""
            
            generation_start = time.time()
            inputs = tokenizer(prompt, return_tensors="pt").to(llm.device)
            outputs = llm.generate(
                **inputs,
                max_new_tokens=512,
                temperature=0.3,
                do_sample=True
            )
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Calculate token generation speed
            input_tokens = inputs.input_ids.shape[1]
            output_tokens = outputs.shape[1] - input_tokens
            
        else:  # lm_studio
            model = ChatOpenAI(
                base_url=LM_STUDIO_URL,
                api_key="lm-studio",
                temperature=0.3,
                model=model_name,
            )
            
            # Format chunks
            formatted_chunks = ""
            for chunk in chunks:
                formatted_chunks += f"- {chunk.page_content}\n"
            
            # Create augmented prompt
            augmented_prompt = f"{topic}\nHere are some chunks of information that could help you, they might be out of order. You should use them only if they are relevant to my question.\nThe chunks are:{formatted_chunks}"
            
            generation_start = time.time()
            chat_history = [{"role": "user", "content": augmented_prompt}]
            response = model.invoke(chat_history)
            
            # Estimate tokens for LM Studio (rough estimate)
            input_tokens = len(augmented_prompt.split())
            output_tokens = len(response.content.split())
        
        generation_time = time.time() - generation_start
        metrics["generation_time"] = generation_time
        
        # Calculate token generation metrics
        if output_tokens > 0 and generation_time > 0:
            metrics["tokens_per_second"] = output_tokens / generation_time
    
    # Calculate overall metrics
    metrics["total_time"] = time.time() - start_time
    
    return metrics

In [208]:
def visualize_evaluation_results(evaluation_results):
    """
    Visualize the evaluation results with improved distinction between models
    :param evaluation_results: evaluation results from evaluate_with_deepeval
    :return: None (displays plots)
    """
    import matplotlib.pyplot as plt
    import pandas as pd
    import numpy as np
    
    # Create dataframe for visualization
    data = []
    for source, results in evaluation_results.items():
        data.append({
            "Model": source,
            "Answer Relevancy": results.get("avg_answer_relevancy", 0),
            "Contextual Relevancy": results.get("avg_contextual_relevancy", 0) if results.get("avg_contextual_relevancy") is not None else 0,
            "Hallucination": results.get("avg_hallucination", 0)
        })
    
    df = pd.DataFrame(data)
    
    # Generate distinct colors for each model
    colors = plt.cm.viridis(np.linspace(0, 1, len(df)))
    
    # Plot with multiple subplots for better visibility
    fig, axes = plt.subplots(1, 2, figsize=(18, 8))
    
    # Bar chart for average scores
    ax1 = axes[0]
    
    # Use different pattern styles for each metric
    patterns = ['/', '\\', 'o']
    
    # Plot each metric with its own color scheme
    metrics = ["Answer Relevancy", "Contextual Relevancy", "Hallucination"]
    
    # Compute bar positions
    models = df["Model"].tolist()
    x = np.arange(len(models))
    width = 0.25
    
    # Plot each metric separately with distinct colors
    for i, metric in enumerate(metrics):
        ax1.bar(x + (i - 1) * width, df[metric], width, label=metric, 
                color=plt.cm.tab10(i), hatch=patterns[i % len(patterns)])
        
        # Add value labels on the bars
        for j, v in enumerate(df[metric]):
            ax1.text(x[j] + (i - 1) * width, v + 0.02, f'{v:.2f}', 
                    ha='center', va='bottom', fontsize=8, rotation=0)
    
    ax1.set_title("Model Performance by Metric")
    ax1.set_ylabel("Score")
    ax1.set_ylim(0, 1.1)  # Give some space for the labels
    ax1.set_xticks(x)
    ax1.set_xticklabels(models, rotation=45, ha='right')
    ax1.legend(loc='upper right')
    ax1.grid(axis='y', linestyle='--', alpha=0.6)
    
    # Radar chart for model comparison
    ax2 = axes[1]
    
    # Create angles for each metric
    N = len(metrics)
    angles = [n / float(N) * 2 * np.pi for n in range(N)]
    angles += angles[:1]  # Close the loop
    
    # Plot in polar coordinates
    ax2 = plt.subplot(122, polar=True)
    
    # Add lines for each model with distinct colors
    for i, row in df.iterrows():
        values = [row[metric] for metric in metrics]
        values += values[:1]  # Close the loop
        
        ax2.plot(angles, values, linewidth=2, linestyle='solid', marker='o', 
                 label=row["Model"], color=colors[i])
        ax2.fill(angles, values, alpha=0.2, color=colors[i])
    
    # Set labels on the axes
    ax2.set_xticks(angles[:-1])
    ax2.set_xticklabels(metrics)
    
    # Add legend with unique marker for each model
    ax2.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
    
    ax2.set_title("Model Comparison Radar Chart")
    
    plt.tight_layout()
    plt.savefig('model_evaluation_results.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Print summary table
    print("\nEvaluation Results Summary:")
    print(df.to_string(index=False))

In [209]:
def create_test_questions_json():
    """Create a JSON file with test questions and expected answers in historical summary style."""
    import json
    import os
    
    test_questions = [
        {
            "question": "What were the main causes of World War I?",
            "expected_answer": """The main causes of World War I can be traced to several interconnected factors. Nationalism played a significant role, particularly in the Balkans where ethnic tensions were high. This was closely tied to imperialism, as European powers competed for colonies and resources globally, creating rivalries and tensions. The complex system of alliances that developed in Europe meant that when conflicts arose, they quickly escalated as countries were obligated to support their allies. Germany's alliance with Austria-Hungary and Russia's commitment to Serbia exemplified this problem.

Militarism was another crucial factor, with European powers engaging in a significant arms race in the years preceding the war. The rapid build-up of armies and navies, particularly between Britain and Germany, created an atmosphere of mutual suspicion and fear.

The immediate trigger was the assassination of Archduke Franz Ferdinand of Austria-Hungary in Sarajevo on June 28, 1914, by a Serbian nationalist. This event set off a chain reaction through the alliance systems. Austria-Hungary declared war on Serbia, Russia mobilized to defend Serbia, Germany honored its alliance with Austria-Hungary, and soon most of Europe was drawn into the conflict. What might have remained a localized conflict became a global war due to these underlying tensions and entanglements."""
        },
        {
            "question": "How did the Industrial Revolution change society?",
            "expected_answer": """The Industrial Revolution fundamentally transformed society in ways that continue to shape our world today. Beginning in Britain in the late 18th century and spreading across Europe and North America, this period saw a shift from agrarian, handicraft economies to ones dominated by machine manufacturing.

Society experienced profound urbanization as people migrated from rural areas to cities seeking factory work. This led to the rapid growth of industrial centers and new social challenges. Working conditions in early factories were often harsh, with long hours, dangerous conditions, and child labor being common. These conditions eventually sparked labor movements and calls for reform.

The revolution created new social classes. The industrial middle class of factory owners and businessmen gained economic power, while a new working class formed from the factory workforce. Traditional social hierarchies based on land ownership were disrupted as wealth from manufacturing created new paths to social status.

Daily life changed dramatically with new technologies. Steam power, mechanized textile production, improved iron production, and eventually electricity transformed both manufacturing and domestic life. Transportation revolutions with railways and steamships connected previously isolated areas and facilitated trade.

Family structures evolved as work moved outside the home. Women's roles changed, with many working-class women employed in factories while middle-class women often became more confined to domestic spheres. Education expanded to meet the needs of an industrializing society, with basic literacy becoming more important.

The environmental impact was significant, with pollution, resource depletion, and urban crowding creating new health challenges. Overall, the Industrial Revolution marked a turning point in human history, accelerating economic growth while fundamentally altering social structures and daily life."""
        },
        {
            "question": "What was the significance of the French Revolution?",
            "expected_answer": """The French Revolution, which began in 1789, stands as one of history's most influential political events. Its significance stems from how it radically transformed not just France, but political thinking worldwide.

The Revolution overthrew the absolute monarchy that had ruled France for centuries, establishing the principle that government authority comes from the people rather than divine right. The Declaration of the Rights of Man and of the Citizen articulated revolutionary principles of universal rights, liberty, and equality that continue to inspire democratic movements globally.

Socially, the Revolution challenged the rigid class system of the Old Regime. The abolition of feudal privileges in August 1789 marked a decisive break with medieval social structures, promoting the ideal of a society based on merit rather than birth. The revolutionary slogan "Liberty, Equality, Fraternity" encapsulated these aspirations.

The Revolution's relationship with religion was complex and consequential. The Catholic Church's privileged position was dismantled, church lands were nationalized, and efforts were made to subordinate religion to the state. This created new understandings of secularism in public life that remain influential.

Politically, the Revolution introduced new concepts and vocabulary. Terms like "left wing" and "right wing" originated from the seating arrangements in the National Assembly. Revolutionary France also pioneered modern nationalism, transforming subjects into citizens with shared identity and direct relationship to the nation.

The Revolution spread beyond France through both conquest and inspiration. Napoleon's subsequent campaigns spread revolutionary principles across Europe, while colonial territories and other nations were inspired to seek their own revolutionary changes.

Despite the Terror and the eventual rise of Napoleon, the Revolution's ideals of liberty, equality, popular sovereignty, and nationalism profoundly shaped modern political thought and continue to resonate in contemporary society."""
        },

        {
            "question": "How did the Cold War affect international relations?",
            "expected_answer": """The Cold War fundamentally reshaped international relations from 1947 until 1991, creating patterns and institutions that continue to influence global politics today. At its core, this conflict between the United States and Soviet Union transformed a previously multipolar world into a bipolar one, with most nations aligning with one of the two superpowers.

This bipolar structure created a unique form of international stability through the concept of mutually assured destruction. As both superpowers developed extensive nuclear arsenals, direct conflict became unthinkable, leading to what historians call "the long peace" among major powers. However, this nuclear standoff also generated tremendous anxiety and required new diplomatic approaches to manage the risk of catastrophic war.

While direct confrontation was avoided, the Cold War spawned numerous proxy conflicts across Asia, Africa, and Latin America. Civil wars and regional conflicts in Korea, Vietnam, Angola, and elsewhere became battlegrounds for superpower competition, with devastating consequences for local populations. These conflicts were often intensified and prolonged by superpower involvement.

The period saw the emergence of formal alliance systems that institutionalized the division. NATO (1949) unified Western nations under American leadership, while the Warsaw Pact (1955) formalized Soviet control over Eastern Europe. These military alliances changed how nations conceptualized security, emphasizing collective defense and integrated military planning.

Decolonization occurred largely during the Cold War, with newly independent nations pressured to align with one bloc or another. The Non-Aligned Movement emerged as an attempt to maintain independence from superpower domination, though these countries often found themselves courted or coerced by both sides.

International organizations like the United Nations became forums for Cold War rivalries, with the Security Council often paralyzed by superpower vetoes. Yet these organizations also provided essential venues for dialogue, conflict management, and development assistance.

The rivalry extended to economic systems, with capitalism and communism presented as competing models for development. This ideological dimension influenced domestic policies worldwide as countries adopted aspects of either model, often with significant foreign assistance or pressure.

When the Soviet Union collapsed in 1991, the bilateral framework that had organized international relations for nearly half a century suddenly vanished, creating both new opportunities for cooperation and new sources of instability that continue to shape our world today."""
        },
        {
            "question": "How did Romania transition from communism to democracy in 1989?",
            "expected_answer": """Romania's transition from communism to democracy in 1989 stands out as one of Eastern Europe's most violent and complex revolutionary moments. Unlike the relatively peaceful transitions in neighboring countries, Romania's revolution involved significant bloodshed and dramatic public events that continue to shape its national memory.

Under Nicolae Ceaușescu, Romania experienced one of the harshest communist regimes in the Eastern Bloc. By the late 1980s, Romanians faced severe food shortages, energy rationing, and brutal repression from the Securitate secret police. While other Eastern European nations were already embracing reforms through Gorbachev's policies of glasnost and perestroika, Ceaușescu steadfastly maintained his totalitarian grip.

The revolution began in mid-December 1989 in Timișoara, when authorities attempted to evict Hungarian Reformed Church pastor László Tőkés, who had spoken against the regime. Local citizens gathered to protect him, triggering demonstrations that quickly spread. Security forces responded with violence, killing numerous protesters and creating martyrs for the revolutionary cause.

The decisive turning point came on December 21st, when Ceaușescu organized a mass rally in Bucharest to demonstrate his control. In an unprecedented moment broadcast live on television, the crowd began booing and chanting anti-government slogans. Ceaușescu's visible shock and confusion—captured on camera—shattered the aura of invincibility surrounding his regime. Within hours, full-scale revolution engulfed Bucharest.

As violence escalated, key military leaders defected to the revolutionary side, dramatically shifting the power balance. Ceaușescu and his wife Elena attempted to flee but were captured, subjected to a hasty trial, and executed on Christmas Day, with footage of their bodies shown on national television.

The National Salvation Front (NSF), led by Ion Iliescu, a former Communist Party official, quickly filled the power vacuum. While claiming revolutionary legitimacy, the NSF's composition—largely former communist officials—raised questions about the authenticity of Romania's break with its communist past. This "stolen revolution" narrative persists in Romanian political discourse.

Romania's post-revolutionary path proved challenging. Unlike some former Eastern Bloc countries that implemented rapid economic reforms, Romania experienced a more gradual, often painful transition. Economic hardship, corruption, and questions about justice for revolution-era crimes complicated its democratic development. Furthermore, the exact events of December 1989—including who was responsible for much of the violence—remain contested, with some historians suggesting elements of both genuine popular revolution and internal coup.

Nevertheless, Romania eventually established democratic institutions, joined NATO in 2004 and the European Union in 2007, marking significant milestones in its post-communist journey. Despite ongoing challenges with corruption and political instability, Romania's dramatic 1989 revolution remains a defining moment in its modern history—the violent end to four decades of communist rule and the beginning of its democratic transition."""
        },
        {
            "question": "What were the key events of the Spanish Civil War (1936-1939)?",
            "expected_answer": """The Spanish Civil War (1936-1939) represents one of Europe's most devastating domestic conflicts, serving as both a national tragedy and an international ideological battleground that foreshadowed World War II. This complex struggle set Republican forces loyal to the democratically elected government against Nationalist rebels led by General Francisco Franco.

The war emerged from Spain's polarized political landscape following the 1931 establishment of the Second Spanish Republic, which initiated ambitious reforms threatening traditional power centers. Land redistribution alienated large landowners, anticlericalism antagonized the Catholic Church, and military reforms unsettled army officers, while labor reforms and regional autonomy statutes further divided society. By 1936, Spain was essentially split between progressive, urban, secular forces and conservative, rural, Catholic traditionalists.

The conflict formally began in July 1936 when military garrisons in Spanish Morocco, led by General Franco, launched a pronunciamiento (military rebellion) against the Republican government. The rebels expected quick victory, but Spain split geographically and ideologically – Nationalists controlled Spain's northwest and south, while Republicans held Madrid, Barcelona, Valencia, and industrial regions. This division ensured a prolonged, brutal conflict.

The war quickly internationalized. Nazi Germany and Fascist Italy provided crucial military support to Franco, including the infamous bombing of Guernica in 1937 that inspired Picasso's masterpiece. The Soviet Union supported the Republic with arms and advisors, while approximately 40,000 foreign volunteers from 52 countries formed the International Brigades to defend the Republic. Western democracies, fearing wider conflict, adopted a policy of non-intervention that effectively disadvantaged the Republican side.

The conflict became characterized by atrocities on both sides. Nationalists conducted systematic purges in territories they captured, executing left-wing political opponents, intellectuals, and labor organizers. Republicans committed their own violence, particularly early in the war, with attacks against clergy and landowners. The war saw the tactical development of modern warfare, with the urban siege of Madrid, the introduction of coordinated air and ground attacks, and the devastating targeting of civilian populations.

By early 1939, Franco's forces had secured most of Spain. Barcelona fell in January, Madrid surrendered in March, and the war officially ended on April 1, 1939. The human cost was staggering – approximately 500,000 deaths from combat, execution, disease, and malnutrition, with another half-million refugees fleeing to France and elsewhere.

Franco established a dictatorship that lasted until his death in 1975. His regime initially aligned with the Axis powers but maintained official neutrality during World War II. The regime was characterized by political repression, censorship, economic autarky, and Catholic traditionalism. Only after Franco's death did Spain successfully transition to democracy.

The Spanish Civil War's significance extends beyond Spain's borders. Internationally, it represented the battleground between fascism and democracy at a time when Western powers were pursuing appeasement. For intellectuals and artists worldwide, it became the defining moral cause of the era, inspiring works from Hemingway, Orwell, and Picasso. Most significantly, the conflict served as a military testing ground for World War II tactics and technologies, making it, in many ways, the first battle of the larger war that would soon engulf Europe."""
        },
        {
            "question": "How did the unification of Germany in the 19th century reshape European politics?",
            "expected_answer": """The unification of Germany in the 19th century fundamentally transformed European politics, reconfiguring power dynamics that had persisted since the Peace of Westphalia in 1648. This process, culminating in 1871, created a powerful new nation-state in Central Europe that dramatically altered the continental balance of power and set the stage for intense rivalries that would eventually contribute to World War I.

Prior to unification, "Germany" existed as a cultural and linguistic concept encompassing dozens of independent states within the loose Germanic Confederation, with Austria and Prussia as the dominant powers. This fragmentation had long served the interests of neighboring countries, particularly France, which benefited from Central European disunity. The unification process challenged this established order through three critical phases, orchestrated largely by Prussian Minister President Otto von Bismarck's realpolitik approach—a pragmatic pursuit of power politics over ideological considerations.

The first decisive step came with the Danish War of 1864, where Prussia and Austria cooperatively seized the duchies of Schleswig and Holstein. This partnership proved short-lived, as Bismarck engineered the Austro-Prussian War of 1866, resulting in Prussia's swift victory and Austria's exclusion from German affairs. The North German Confederation formed under Prussian leadership, while Prussia annexed several smaller states, significantly expanding its territory.

Unification culminated during the Franco-Prussian War of 1870-71. Bismarck's edited version of the Ems Telegram provoked France into declaring war, enabling him to portray Prussia as defending German interests against foreign aggression. The overwhelming Prussian victory, including the humiliating capture of Emperor Napoleon III, created a wave of pan-German nationalism. On January 18, 1871, in the Hall of Mirrors at Versailles—the symbolic heart of French power—the German Empire was proclaimed with Prussia's King Wilhelm I as Kaiser.

The immediate geopolitical consequence was the emergence of a dominant new power in Europe. The German Empire possessed the continent's most powerful army, a rapidly industrializing economy, and Europe's largest population after Russia. This new entity fundamentally altered the European balance of power, ending the multipolarity that had characterized international relations and creating a system increasingly defined by alliance blocs.

For France, German unification represented a catastrophic strategic defeat. Beyond losing the provinces of Alsace and Lorraine, France faced a powerful, unified neighbor where previously a fragmented collection of states had provided a strategic buffer. The French desire for revanche (revenge) became a constant factor in European diplomacy.

German unification also reshaped Central and Eastern European politics. Austria-Hungary, excluded from German affairs, redirected its ambitions toward the Balkans, creating tensions with Russia that would contribute to the outbreak of World War I. Meanwhile, the demonstration of German strength encouraged Italy to complete its own unification by seizing Rome.

The domestic political configuration of unified Germany had profound implications for European stability. Although possessing democratic elements through the Reichstag, real power resided with the Kaiser and military leaders. This structure, combined with rapid industrialization and militarism, created a state capable of channeling national resources toward strategic objectives in ways other European powers found difficult to match.

Ultimately, German unification shattered the balance of power system that had largely maintained European stability since the Napoleonic Wars. The resulting security dilemmas, arms races, and alliance structures created a more rigid international system that proved unable to contain the crisis of 1914, linking the 1871 unification directly to the cataclysm of World War I and the subsequent reshaping of Europe in the 20th century."""
        },
        {
            "question": "What was the significance of the Hungarian Revolution of 1956?",
            "expected_answer": """The Hungarian Revolution of 1956 stands as one of the most significant challenges to Soviet authority in Eastern Europe during the Cold War, becoming a defining moment in Hungary's national identity and revealing the limits of de-Stalinization within the Soviet bloc. Though brutally crushed after just twelve days, this spontaneous uprising against communist rule profoundly influenced Cold War dynamics, Western perceptions of Soviet communism, and ultimately the path toward Eastern European independence.

The revolution emerged from Hungary's complex post-World War II experience. Soviet forces installed a communist government that implemented harsh Stalinist policies under Mátyás Rákosi, including collectivization, religious persecution, and political purges. After Stalin's death in 1953 and Khrushchev's "Secret Speech" denouncing Stalinist excesses in February 1956, reformist currents began flowing through Eastern Europe. In neighboring Poland, successful protests had just forced political concessions, inspiring Hungarian hopes.

On October 23, 1956, a student demonstration in Budapest calling for democratic reforms grew into mass protests when Hungarian State Security (ÁVH) officers fired on the crowd. The situation escalated rapidly, with citizens toppling the massive Stalin statue in Budapest's City Park—producing iconic images of revolution. Initially, Soviet forces stationed in Hungary attempted military intervention but were overwhelmed by fierce civilian resistance using improvised weapons and tactics.

By October 28, Soviet forces temporarily withdrew from Budapest, and a new government under reformist communist Imre Nagy took power. Nagy's government instituted sweeping changes—announcing Hungary's withdrawal from the Warsaw Pact, declaring neutrality, introducing democratic reforms, and releasing political prisoners. Cardinal József Mindszenty, a symbol of religious resistance imprisoned since 1949, was freed and quickly became an emblematic figure of the revolution.

The revolution's peak saw extraordinary expressions of freedom—independent newspapers flourished, non-communist political parties reemerged, and workers' councils exercised genuine democratic control in factories. Revolutionary committees effectively governed at local levels, demonstrating Hungarians' remarkable capacity for self-organization amid crisis. This period, though brief, represented the authentic democratic will of the Hungarian people.

The Soviet response, when it came, was overwhelming. On November 4, over 1,000 Soviet tanks entered Budapest, engaging in ferocious urban combat against lightly armed civilians and military defectors. Despite valiant resistance and desperate appeals for Western assistance, the revolution was crushed. The fighting killed approximately 2,500 Hungarians and 700 Soviet troops, with thousands more wounded.

In the aftermath, reprisals were severe. The Soviets installed János Kádár as Hungary's new leader, who presided over mass arrests, secret trials, and executions—including Nagy himself in 1958, despite promises of safe passage. Approximately 200,000 Hungarians fled as refugees, creating a substantial diaspora community that kept revolutionary memory alive internationally. Inside Hungary, the revolution entered a decades-long period as an unmentionable topic, referred to euphemistically as "the unfortunate events."

The international impact was profound. The uprising exposed deep fissures in Western communist parties, with many members resigning in protest against Soviet actions. For the United States and its allies, despite rhetoric about "rolling back" communism, the revolution painfully demonstrated the limits of Western willingness to directly challenge Soviet control in its sphere of influence. The timing—coinciding with the Suez Crisis—further complicated the international response, dividing Western attention and providing the Soviets with a simultaneous distraction.

The Hungarian Revolution's significance extended beyond its immediate failure. Kádár's subsequent "Goulash Communism" offered Hungarians greater economic freedom and reduced political repression in exchange for accepting Soviet dominance in foreign affairs—a pragmatic compromise emerging directly from 1956's lessons. More broadly, the revolution revealed that Eastern bloc stability rested on coercion rather than popular consent, a fundamental weakness that would ultimately contribute to communism's collapse in 1989, when Hungary led the way in opening its borders and dismantling the Iron Curtain."""
        },
        {
            "question": "How did Bulgaria's experience under Ottoman rule shape its national identity?",
            "expected_answer": """Bulgaria's nearly five-century experience under Ottoman rule (1396-1878) profoundly shaped its national identity, creating historical narratives, cultural patterns, and geopolitical orientations that continue to influence modern Bulgarian society. This lengthy period under Islamic imperial governance both preserved and transformed Bulgarian identity, producing complex legacies that would later fuel national revival and continue to resonate in contemporary politics.

The Ottoman conquest dramatically altered Bulgaria's medieval trajectory. The Second Bulgarian Empire, centered at Tarnovo, fell to Ottoman forces in 1396, ending an independent state that had been a significant regional power. Bulgarian elites faced systematic disempowerment—nobility was eliminated, the autocephalous Bulgarian Orthodox Church lost its independence to the Greek-dominated Constantinople Patriarchate, and local governance structures were replaced by Ottoman administrative systems.

Religious identity became the primary marker of difference under the Ottoman millet system, which organized subjects by faith rather than ethnicity. Bulgarians belonged to the Orthodox Christian millet, administered by Greek clergy who often suppressed Bulgarian language in liturgy and education. This created a dual subordination for Bulgarians—politically under Ottoman authorities and culturally under Greek ecclesiastical control—that would later influence Bulgarian nationalism's development against both Turkish and Greek influences.

The Ottoman period saw significant demographic changes in Bulgarian lands. Some regions experienced Turkish colonization, particularly in the east and along major transport routes. Urban centers developed distinct Muslim quarters, with mosques and other Islamic architecture permanently altering the built environment. Periodic conversion to Islam occurred, both forced and voluntary, creating communities like the Pomaks (Bulgarian-speaking Muslims) whose complex identities continue to challenge simplistic national narratives today.

Economic exploitation characterized much of Ottoman governance in Bulgarian territories. Heavy taxation, particularly the devshirme (child levy) that conscripted Christian boys into the Janissary corps, created enduring historical traumas. Agricultural production shifted toward Ottoman imperial needs, while Bulgarian peasants increasingly retreated to mountain villages to avoid excessive burdens. However, by the 18th century, some Bulgarians found economic opportunities within Ottoman trade networks, creating a merchant class that would later fund national revival efforts.

The 18th and early 19th centuries saw Ottoman power decline while Bulgarian cultural consciousness strengthened. This National Revival period (Възраждане/Vazrazhdane) was characterized by the spread of secular education in Bulgarian, the struggle for an independent Bulgarian church, and the development of a distinctive national literature. Figures like Paisii Hilendarski, whose "Slavonic-Bulgarian History" (1762) emphasized glorious medieval Bulgarian achievements, helped construct a national narrative centered on Ottoman oppression and the need for liberation.

Armed resistance periodically challenged Ottoman rule, from the hajduk tradition of mountain outlaws to major uprisings like the April Uprising of 1876. Though brutally suppressed—with massacres that horrified international opinion—these rebellions created national martyrs and eventually precipitated Russian intervention, leading to Bulgaria's liberation in 1878 following the Russo-Turkish War.

Post-liberation Bulgaria constructed national identity largely in opposition to Ottoman legacies. Historical narratives emphasized a "Turkish yoke" of oppression and resistance, while urban renewal often removed Ottoman architectural elements. The new state pursued policies of cultural homogenization, encouraging Turkish and Muslim emigration while assimilating remaining minority populations. However, Ottoman administrative practices, legal traditions, and cultural influences persisted, creating tensions between nationalist ideology and lived reality.

Today, Bulgaria's Ottoman past remains contentious in national discourse. Historical interpretations range from viewing the period as one of unmitigated oppression to more nuanced perspectives recognizing cultural synthesis and pragmatic coexistence. Ottoman architectural heritage faces challenges of preservation amid nationalist sentiments, while Turkish and Muslim minorities continue navigating complex identity politics. Bulgaria's geopolitical orientation—between East and West—similarly reflects the enduring impact of its Ottoman experience, as the nation continues defining itself in relation to this formative historical period that simultaneously represents both cultural trauma and inescapable heritage."""
        },
        {
            "question": "What role did Spain play in the European colonization of the Americas?",
            "expected_answer": """Spain played a pivotal, pioneering role in the European colonization of the Americas, establishing the first transatlantic empire and creating colonial patterns that would influence subsequent European imperial projects. From Columbus's arrival in 1492 until independence movements in the early 19th century, Spanish colonization fundamentally transformed the Western Hemisphere's demographic, cultural, religious, and political landscapes while simultaneously reshaping Spain itself and the global economy.

The Spanish colonial enterprise began with Christopher Columbus's voyages, sponsored by the Catholic Monarchs Ferdinand and Isabella shortly after Spain's unification and the completion of the Reconquista. Columbus's accidental encounter with the Caribbean initiated explosive Spanish expansion—within three decades, the Aztec and Inca empires had fallen to small contingents of conquistadors led by Hernán Cortés and Francisco Pizarro, respectively. By the mid-16th century, Spain controlled territories from modern-day California and Florida to Chile and Argentina, establishing the first empire where "the sun never set."

The early conquest phase was characterized by the encomienda system, which granted conquistadors authority over indigenous populations, ostensibly to Christianize them while extracting labor and tribute. This exploitative system, though eventually reformed under pressure from critics like Bartolomé de las Casas, set patterns of economic extraction and racial hierarchy that would define Spanish colonialism. The catastrophic demographic collapse of indigenous populations—primarily through disease but also through violence and exploitation—represents the conquest's most profound consequence, with population declines exceeding 90% in many regions.

Spanish colonialism's institutional framework developed distinct characteristics. The viceroyalty system initially established administrative centers in Mexico and Peru, later expanded to New Granada and Río de la Plata. The Council of the Indies in Spain maintained centralized control, while the Casa de Contratación regulated transatlantic commerce. This elaborate bureaucratic structure represented the most sophisticated colonial administration of its era, though in practice, the principle of "obedezco pero no cumplo" (I obey but do not comply) often allowed local authorities to adapt or ignore metropolitan directives.

The extraction of wealth—particularly silver from massive mining operations at Potosí (Bolivia) and Zacatecas (Mexico)—fundamentally altered global economics. Spain channeled unprecedented mineral wealth into European markets, fueling price revolutions, financing Habsburg imperial ambitions, and increasingly integrating global trade networks. Ironically, much of this wealth ultimately flowed to Spain's creditors and trade partners rather than developing domestic industries, contributing to Spain's long-term economic challenges despite its imperial dominance.

Religious conversion stood as a primary justification for Spanish colonialism. The Catholic Church established extensive mission systems, built monumental cathedrals, and created educational institutions throughout the colonies. Religious orders—particularly Franciscans, Dominicans, and later Jesuits—played crucial roles in colonial society, sometimes advocating for indigenous rights while simultaneously dismantling native belief systems. The resulting religious landscape blended Catholic orthodoxy with indigenous spiritual elements, creating distinctive syncretic practices that persist today.

Spanish colonialism produced complex new social structures. The sistema de castas categorized people according to racial ancestry, with peninsulares (Spanish-born) and criollos (American-born Spaniards) at the apex. Mestizos (mixed Spanish-indigenous), mulatos (mixed Spanish-African), indigenous peoples, and enslaved and free Africans occupied intermediate and lower positions, though considerable social fluidity existed in practice. These racial categorizations, while less rigid than later colonial systems, established frameworks for social stratification that long outlasted Spanish rule.

Urban development characterized Spanish colonialism, with hundreds of cities established in regular grid patterns reflecting Renaissance ideals. These cities served as centers of Spanish political control, economic activity, and cultural influence, with central plazas typically featuring government buildings and cathedrals symbolizing the alliance of crown and church. This urban emphasis contrasted with some later European colonial models and left an enduring imprint on Latin American settlement patterns.

By the late 18th century, Bourbon reforms attempted to modernize and rationalize colonial administration while extracting greater revenue, creating tensions that contributed to independence movements. Inspired by Enlightenment principles and the examples of American and French revolutions, Latin American independence movements led by figures like Simón Bolívar and José de San Martín ended most Spanish control by the 1820s, though Cuba and Puerto Rico remained Spanish until 1898.

Spain's colonial legacy remains profoundly embedded in Latin America's languages, legal systems, religious practices, architectural styles, and social structures. The complex heritage of conquest, cultural exchange, and resistance continues shaping identities and politics throughout the region, while debates about historical responsibility for colonial violence and exploitation remain relevant to contemporary relations between Spain and its former colonies."""
        },
        {
            "question": "How did the Hungarian Revolution of 1848 relate to broader European revolutionary movements?",
            "expected_answer": """The Hungarian Revolution of 1848 represented one of the most significant and complex episodes within the "Springtime of Peoples"—the wave of revolutionary movements that swept across Europe in 1848-49. While sharing ideological currents with other European uprisings, Hungary's revolution developed distinctive characteristics due to its unique position within the Habsburg Empire, its constitutional traditions, and its complex ethnic composition. This revolution demonstrates both the interconnectedness of European nationalist movements and their divergent trajectories based on local conditions.

The Hungarian Revolution emerged from the same fundamental causes that triggered uprisings across Europe—the rising power of liberalism and nationalism against autocratic systems, socioeconomic tensions as industrialization began transforming traditional societies, and the immediate catalyst of economic crisis. News of the February Revolution in Paris and subsequent uprising in Vienna in early March 1848 provided the spark for Hungarian action. Young radicals in Pest, led by poet Sándor Petőfi, articulated the "Twelve Points" demanding constitutional government, civil liberties, legal equality, and national autonomy within the Habsburg framework.

Unlike many European revolutions, Hungary's initially achieved remarkable bloodless success. The Habsburg authorities, facing multiple crises throughout their domains, conceded to many Hungarian demands. Emperor Ferdinand V sanctioned the "April Laws," which transformed Hungary from a semi-feudal system into a constitutional monarchy with its own ministry responsible to a parliamentary government. These reforms abolished serfdom, established civic equality, expanded suffrage, ended aristocratic tax exemptions, and created a Hungarian National Guard. The revolutionary government, led by moderate liberal Count Lajos Batthyány with Lajos Kossuth as its most dynamic voice, initially worked within a legal framework maintaining nominal allegiance to the Habsburg monarch as King of Hungary.

Hungary's revolution developed unique characteristics related to its historical constitution and territorial claims. Unlike other revolutionary movements demanding new rights, Hungarian leaders framed their actions as defending ancient constitutional liberties dating back centuries but increasingly violated by Habsburg centralization. They sought not to dismantle existing legal structures but to adapt and modernize them while reasserting Hungary's historic autonomy. This included claiming authority over Transylvania, Croatia, and other territories within the traditional lands of the Crown of St. Stephen, setting the stage for ethnic conflicts that would complicate revolutionary aims.

Indeed, Hungary's relationship with non-Magyar nationalities within its claimed territories created the revolution's most profound internal contradiction. While advancing liberal principles regarding individual rights, the revolutionary government maintained Magyar primacy in politics and administration, resisting demands for collective rights from Romanians, Slovaks, Serbs, and Croats. These minorities, constituting roughly 60% of Hungary's population, increasingly saw Hungarian nationalism as threatening their own national aspirations. By summer 1848, armed conflicts erupted between Hungarian forces and Croatian, Serbian, and Romanian movements, creating the paradoxical situation where Hungarian revolutionaries fighting for national liberation simultaneously suppressed other national movements within their borders.

The Habsburg monarchy exploited these ethnic tensions in its counterrevolutionary strategy. After regaining control in Vienna by autumn 1848, imperial forces under Croatian Ban Josip Jelačić invaded Hungary. The revolutionary government, pushed toward increasingly radical positions by these existential threats, moved beyond its original constitutional framework. In December 1848, Ferdinand abdicated in favor of Franz Joseph, but the Hungarian Diet refused to recognize the new monarch. By April 1849, the Hungarian National Assembly issued the Declaration of Independence, formally dethroning the Habsburgs and establishing Kossuth as governor-president of an independent Hungarian state—moving from reform to complete revolution.

The Hungarian revolutionary army, led by talented generals like Artúr Görgey, achieved remarkable military successes against imperial forces, controlling most of the country by spring 1849. This represented a level of revolutionary success unmatched elsewhere in Europe, where most uprisings had already been suppressed. However, this success prompted Tsar Nicholas I of Russia to intervene at Habsburg request. The entry of 200,000 Russian troops in June 1849 made Hungary's position untenable despite fierce resistance. By August 1849, Görgey surrendered at Világos, effectively ending the revolution.

The aftermath was severe. Hungarian revolutionary leaders who didn't escape into exile faced execution, including Prime Minister Batthyány and thirteen generals hanged at Arad. Hungary lost its constitutional autonomy, subjected to direct rule from Vienna until the Austro-Hungarian Compromise of 1867 restored much of its self-governance. Nevertheless, the revolution left enduring legacies—the abolition of serfdom remained permanent, and the experience became central to Hungarian national identity and political culture.

Within the broader European context, the Hungarian Revolution illustrated both the strength and limitations of the 1848 movements. It demonstrated liberalism's capacity to unite different social classes temporarily around shared political goals, yet also revealed the tensions between liberal universalism and exclusive nationalism. The revolution's ultimate failure highlighted the persistent strength of established powers and the disadvantages revolutionaries faced when divided by competing national claims. Unlike in Western Europe, where class conflict increasingly defined post-1848 politics, in Hungary and elsewhere in Central Europe, national questions remained paramount—a distinction that would shape these regions' divergent historical trajectories into the 20th century."""
        },
        {
            "question": "What impact did the Franco regime have on Spanish society and culture?",
            "expected_answer": """The Franco regime (1939-1975) profoundly transformed Spanish society and culture through nearly four decades of authoritarian rule, creating impacts that continue to reverberate in contemporary Spain. Following his victory in the Spanish Civil War, General Francisco Franco established a dictatorship characterized by nationalism, Catholic traditionalism, and anti-communism that sought to remake Spain according to conservative values while suppressing alternative visions. This extensive period of authoritarian control created deep cultural divides and collective traumas that Spain continues to navigate.

The regime's social agenda centered on what Franco termed "National-Catholicism"—a fusion of Spanish nationalism with conservative Catholicism that permeated all aspects of society. The Catholic Church received extensive privileges, regaining control over education and marriage while Catholic rituals and symbols dominated public life. This represented a dramatic reversal of the Second Republic's secularization efforts and reinforced traditional gender roles and family structures. Women faced particular restrictions, with Franco-era legislation eliminating many Republican-era rights, promoting domesticity, and establishing legal subordination to male authority.

Cultural expression underwent systematic control through extensive censorship. Literature, film, theater, and media required government approval, with works contradicting Catholic morality, questioning the regime, or expressing regional identities frequently banned. Many of Spain's most talented artists and intellectuals had fled into exile after the Civil War, creating a "Two Spains"—the official culture inside and the exiled culture abroad. Those who remained faced the choice between conformity, coded critique, or silence.

Economic policy evolved significantly over the regime's duration. The early period (1939-1950s) pursued autarky—economic self-sufficiency through state intervention and protectionism—contributing to devastating poverty and hunger known as "the years of hunger." By the late 1950s, technocrats associated with Opus Dei implemented liberalization measures, producing the "Spanish Miracle" of economic growth in the 1960s. This transformation from rural, agricultural society to urban, industrial one created massive internal migration, urbanization, and rising consumerism that gradually undermined traditional social structures the regime sought to preserve.

The regime's approach to Spain's cultural diversity proved particularly consequential. Franco imposed Castilian Spanish as the sole official language, suppressing Catalan, Basque, and Galician languages and regional identities. This centralization policy, framed as promoting Spanish unity, instead fueled regionalist and separatist sentiments that would surge after Franco's death. Cultural practices associated with regional identities were either prohibited, folklorized for tourism, or reframed as expressions of a unified Spanish identity rather than distinct cultural traditions.

Internationally, Spain experienced isolation following World War II due to Franco's Axis sympathies, though the Cold War eventually brought reintegration as Western powers prioritized anti-communism over democratic principles. This gradual international acceptance, alongside growing tourism from the 1960s, exposed Spaniards to foreign influences despite regime efforts to maintain cultural control. The resulting contrast between Spain's authoritarian system and neighboring democracies contributed to growing internal reform pressures.

Youth culture emerged as a critical battleground. By the late Franco period, younger generations without direct Civil War experience increasingly rejected regime values. Universities became centers of resistance, while new musical forms, artistic movements, and social behaviors challenged traditional norms. This generational shift created what sociologists termed "sociological Francoism"—formal adherence to regime structures alongside growing private rejection of its values.

The regime's final years witnessed both intensified repression and growing liberalization tensions. While the state executed political opponents as late as 1975, societal changes had already undermined many traditional structures. Franco's death initiated Spain's remarkable Transition to democracy, characterized by negotiated change rather than revolutionary rupture. This approach facilitated peaceful democratization but left many regime structures and personnel in place while establishing what some critics call a "pact of forgetting" regarding the dictatorship's crimes.

Contemporary Spain continues grappling with Franco's legacy. The 2007 Historical Memory Law addressed Civil War and dictatorship victims, but debates persist regarding proper historical reckoning. Franco's 2019 exhumation from his monumental Valley of the Fallen mausoleum symbolized ongoing efforts to redefine national memory. Meanwhile, aspects of Francoist ideology have experienced partial revival through right-wing populism, while regional separatism—particularly in Catalonia—reflects unresolved tensions regarding Spain's national identity and governance.

Ultimately, the Franco regime's impact on Spanish society and culture was profound and complex. While failing in its attempt to permanently reshape Spain according to its traditionalist vision, the dictatorship created enduring social divisions, psychological traumas, and institutional patterns that continue influencing contemporary Spain's political culture, regional relations, and collective memory—demonstrating how authoritarian systems produce consequences that outlast their formal existence."""
        }
    ]
    test_question=test_questions[0:2]
    # Save to JSON file
    with open('test_questions_with_answers.json', 'w') as f:
        json.dump(test_questions, f, indent=2)
    
    print(f"Created test questions JSON with {len(test_questions)} questions")
    return test_questions

In [210]:
import json
import os

os.environ["DEEPEVAL_USE_LOCAL_MODEL"] = "YES"

try:
    with open('test_questions_with_answers.json', 'r') as f:
        test_data = json.load(f)
except FileNotFoundError:
    test_data = create_test_questions_json()

# Extract just the questions for testing
test_questions = [item["question"] for item in test_data]

# Test with different model
print("Testing LM Studio model...")
lm_studio_results = test_lm_studio_model(test_questions, model_name="gemma-3-12b-it")

print("\nTesting base Hugging Face model...")
base_hf_results = test_huggingface_model(test_questions, model_name="google/gemma-2-2b-it", quantization=8)

print("\nTesting LoRA fine-tuned Hugging Face model...")
lora_path = "../models/with_current_prompt/models_r16_8_final"
lora_hf_results = test_huggingface_model(
    test_questions, 
    model_name="google/gemma-2-2b-it", 
    quantization=8,
    lora_weights=lora_path
)

#  Combine results for evaluation
all_results = lm_studio_results + base_hf_results + lora_hf_results

#  Evaluate with DeepEval
print("\nEvaluating results with DeepEval...")
evaluation_results = evaluate_with_deepeval(all_results, test_data)

# Visualize the results
visualize_evaluation_results(evaluation_results)

#  Benchmark performance
print("\nBenchmarking performance...")
lm_studio_benchmark = benchmark_performance("Europe History", model_source="lm_studio")
base_hf_benchmark = benchmark_performance("Europe History", model_source="huggingface")
lora_hf_benchmark = benchmark_performance("Europe History", model_source="huggingface", lora_weights=lora_path)

print("\nLM Studio benchmark results:")
for metric, value in lm_studio_benchmark.items():
    if isinstance(value, float):
        print(f"{metric}: {value:.2f}")
    else:
        print(f"{metric}: {value}")

print("\nBase Hugging Face benchmark results:")
for metric, value in base_hf_benchmark.items():
    if isinstance(value, float):
        print(f"{metric}: {value:.2f}")
    else:
        print(f"{metric}: {value}")

print("\nLoRA Hugging Face benchmark results:")
for metric, value in lora_hf_benchmark.items():
    if isinstance(value, float):
        print(f"{metric}: {value:.2f}")
    else:
        print(f"{metric}: {value}")

Testing LM Studio model...
Testing LM Studio model: gemma-3-12b-it
Vector store loaded with 4168 documents
[1/12] Processing: What were the main causes of World War I?
Response: The main causes of World War I can be summarized as follows, based on the provided information:

*  ...
Time: 34.84s (retrieval: 0.10s, generation: 32.61s)
[2/12] Processing: How did the Industrial Revolution change society?
Response: The Industrial Revolution brought about profound changes to society, fundamentally reshaping its str...
Time: 42.09s (retrieval: 0.06s, generation: 39.68s)
[3/12] Processing: What was the significance of the French Revolution?
Response: The French Revolution (1789-1799) was profoundly significant for France and Europe, marking a period...
Time: 35.52s (retrieval: 0.08s, generation: 33.38s)
[4/12] Processing: How did the Cold War affect international relations?
Response: The Cold War profoundly reshaped international relations, and the provided chunks of information ill...
Time: 63

KeyboardInterrupt: 

In [197]:



# run a prompt to see how the models generate
prompt= "What happend in the 20th century in Romania?"
# run the prompt on the models
lm_studio_model = "gemma-3-12b-it"


def run_lm_studio_model(question, model_name):
    embedding = EmbeddingModel()
    vectordb = Chroma(
        embedding_function=embedding,
        persist_directory=DATABASE_PATH,
    )
    print(f"Vector store loaded with {vectordb._collection.count()} documents")
    
    # Initialize LM Studio model
    model = ChatOpenAI(
        base_url=LM_STUDIO_URL,
        api_key="lm-studio",
        temperature=0.3,
        model=model_name,
    )
        
    chat_history = [{"role": "user", "content": question}]
    
    
    embedded_query = embedding.embed_query(question)
    retrieved_chunks = vectordb.similarity_search_by_vector(embedded_query)
    
    formatted_chunks = ""
    for chunk in retrieved_chunks:
        formatted_chunks += f"- {chunk.page_content}\n"
    
    # Augment prompt
    augmented_prompt = f"{question}\nHere are some chunks of information that could help you, they might be out of order. You should use them only if they are relevant to my question.\nThe chunks are:{formatted_chunks}"
    chat_history[0]["content"] = augmented_prompt
    
    response = model.invoke(chat_history)
    return response

def run_hf_model(question,model_name="google/gemma-2-2b-it", quantization=8, lora_weights=None):
    load_huggingface_model(model_name=model_name, quantization=quantization, lora_weights=lora_weights)
    
    global llm, tokenizer
    
    # Initialize embedding model and database
    embedding = EmbeddingModel()
    vectordb = Chroma(
        embedding_function=embedding,
        persist_directory=DATABASE_PATH,
    )
    print(f"Vector store loaded with {vectordb._collection.count()} documents")
    
    results = []
    
    embedded_query = embedding.embed_query(question)
    retrieved_chunks = vectordb.similarity_search_by_vector(embedded_query)
    
    # Format context
    context = ""
    for chunk in retrieved_chunks:
        context += f"{chunk.page_content}\n\n"
    
    prompt = f"""### Instruction: 
You are a knowledgeable history tutor. Answer the following question accurately based on the provided historical context.
Use ONLY the information provided in the context. If you don't know, say "I don't have enough information to answer that."

### Context:
{context}

### Question:
{question}

### Answer:
"""
    
    # Generate response
    try:
        inputs = tokenizer(prompt, return_tensors="pt").to(llm.device)
        outputs = llm.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.3,
            do_sample=True
        )
        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
    except Exception as e:
        print(f"Error generating response: {e}")
        response_text = f"Error: {str(e)}"
    
    # Calculate total time
    return response_text


lm_studio_response = run_lm_studio_model(prompt, model_name=lm_studio_model)
huggin_face_response= run_hf_model(prompt, model_name="google/gemma-2-2b-it", quantization=8, lora_weights=None)
lora_path = "../models/with_current_prompt/models_r16_8_final"
hugging_face_response_lora= run_hf_model(prompt, model_name="google/gemma-2-2b-it", quantization=8, lora_weights=lora_path)

print("\nLM Studio response:")
print(lm_studio_response)
print("\nHugging Face response:")
print(huggin_face_response)
print("\nHugging Face response with LoRA:")
print(hugging_face_response_lora)




Vector store loaded with 4168 documents
Model deleted
Tokenizer deleted
Memory cleaned up
Optimizing model configuration...
CUDA available: NVIDIA GeForce RTX 3080 Laptop GPU
CUDA memory: 8.59 GB
Using balanced settings (8-bit quantization)
Loading model...
Forcing model to load on GPU


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.54s/it]


Base model loaded successfully
Model loaded on: cuda:0
Vector store loaded with 4168 documents
Model deleted
Tokenizer deleted
Memory cleaned up
Optimizing model configuration...
CUDA available: NVIDIA GeForce RTX 3080 Laptop GPU
CUDA memory: 8.59 GB
Using balanced settings (8-bit quantization)
Loading model...
Forcing model to load on GPU
Loading base model with LoRA adapter from ../models/with_current_prompt/models_r16_8_final


Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.56s/it]


LoRA adapter loaded successfully
Model loaded on: cuda:0
Vector store loaded with 4168 documents

LM Studio response:
content='Here\'s a breakdown of what happened in Romania during the 20th century, based on the provided information:\n\n**Early 20th Century (Up to World War I):**\n\n*   **Territorial Disputes:** A significant conflict with Bulgaria over the Dobruja region led to war in 1913. Romania emerged victorious, gaining Southern Dobruja through the Treaty of Bucharest. This dispute continued and was resolved with the Treaty of Craiova in 1940, which involved a population exchange.\n*   **Political Instability:** Following World War I, Romania experienced significant political instability. The constitution granted the king considerable power, leading to frequent government changes (over 25 in one decade). This period was marked by the rise of antisemitic, ultra-nationalist, and quasi-fascist parties.\n*   **Economic Shifts:** French economic and political influence initially dom