# Installing Pacakages to Scrape Wikipedia Data in textual format

In [2]:
!pip install wikipedia-api

Collecting wikipedia-api
  Downloading wikipedia_api-0.8.1.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia-api
  Building wheel for wikipedia-api (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia-api: filename=Wikipedia_API-0.8.1-py3-none-any.whl size=15383 sha256=76df1f21aad979e87183c71df245b591b41f757bc94c93ae7c734d62e3369588
  Stored in directory: /root/.cache/pip/wheels/0b/0f/39/e8214ec038ccd5aeb8c82b957289f2f3ab2251febeae5c2860
Successfully built wikipedia-api
Installing collected packages: wikipedia-api
Successfully installed wikipedia-api-0.8.1


In [3]:
import wikipediaapi
import os

In [7]:

output_dir = "/content/drive/MyDrive/Datasets/Data for RAG"
os.makedirs(output_dir, exist_ok=True)
wiki = wikipediaapi.Wikipedia(
    language='en',
    user_agent='rag-ww2-project/1.0'
)
battle_titles = [
    "Genocide of Poles by Nazi Germany",
    "Romani Holocaust",
    "Aftermath of World War II",
     "Battle of France",
    "Battle of Britain",
    "Operation Barbarossa",
    "Siege of Leningrad",
    "Battle of Moscow",
    "Battle of El Alamein",
    "Battle of Stalingrad",
    "Operation Torch",
    "Battle of Kursk",
    "Italian Campaign (World War II)",
    "Normandy landings",
    "Battle for Caen",
    "Operation Market Garden",
    "Battle of the Bulge",
    "Battle of Berlin",
    "Attack on Pearl Harbor",
    "Battle of Midway",
    "Battle of Guadalcanal",
    "Battle of Iwo Jima",
    "Battle of Okinawa",
    "World War II",
    "Timeline of World War II (1939)",
    "Timeline of World War II (1940)",
    "Timeline of World War II (1941)",
    "Timeline of World War II (1942)",
    "Timeline of World War II (1943)",
    "Timeline of World War II (1944)",
    "Timeline of World War II (1945)",
    "List of World War II battles",
    "Military history of the United States during World War II",
      "End of World War II in Europe",
    "End of World War II in Asia",
    "List of World War II infantry weapons",
    "German military technology during World War II",
    "Allied technological cooperation during World War II",
    "Technology during World War II",
    "Genocide of Poles by Nazi Germany",
]
def extract_sections(sections, level=0):
    content = ""
    for section in sections:
        heading = f"{'#' * (level + 2)} {section.title}\n\n"
        body = section.text.strip() + "\n\n"
        content += heading + body
        content += extract_sections(section.sections, level + 1)
    return content
for title in battle_titles:
    page = wiki.page(title)
    if not page.exists():
        print(f"Page not found: {title}")
        continue
    print(f"Fetching: {title}")
    battle_text = f"# {page.title}\n\n"
    battle_text += f"## Summary\n\n{page.summary}\n\n"
    battle_text += extract_sections(page.sections)
    filename = title.replace(" ", "_").lower() + ".txt"
    with open(os.path.join(output_dir, filename), "w", encoding="utf-8") as f:
        f.write(battle_text)

print(" Wikipedia battle pages saved to:", output_dir)


Fetching: Genocide of Poles by Nazi Germany
Fetching: Romani Holocaust
Fetching: Aftermath of World War II
 Wikipedia battle pages saved to: /content/drive/MyDrive/Datasets/Data for RAG


# Setting up Open AI Environment

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tf_keras as keras

In [5]:
pip install openai



In [6]:
from openai import OpenAI

## Insert you Open AI API key here

In [7]:
os.environ['OPENAI_API_KEY'] =''

In [8]:
client = OpenAI()

In [9]:
model_list = client.models.list()

In [10]:
for model in model_list:
  print(model.id,model.created)

gpt-4-0613 1686588896
gpt-4 1687882411
gpt-3.5-turbo 1677610602
gpt-5-nano 1754426384
gpt-5 1754425777
gpt-5-mini-2025-08-07 1754425867
gpt-5-mini 1754425928
gpt-5-nano-2025-08-07 1754426303
davinci-002 1692634301
babbage-002 1692634615
gpt-3.5-turbo-instruct 1692901427
gpt-3.5-turbo-instruct-0914 1694122472
dall-e-3 1698785189
dall-e-2 1698798177
gpt-4-1106-preview 1698957206
gpt-3.5-turbo-1106 1698959748
tts-1-hd 1699046015
tts-1-1106 1699053241
tts-1-hd-1106 1699053533
text-embedding-3-small 1705948997
text-embedding-3-large 1705953180
gpt-4-0125-preview 1706037612
gpt-4-turbo-preview 1706037777
gpt-3.5-turbo-0125 1706048358
gpt-4-turbo 1712361441
gpt-4-turbo-2024-04-09 1712601677
gpt-4o 1715367049
gpt-4o-2024-05-13 1715368132
gpt-4o-mini-2024-07-18 1721172717
gpt-4o-mini 1721172741
gpt-4o-2024-08-06 1722814719
chatgpt-4o-latest 1723515131
o1-mini-2024-09-12 1725648979
o1-mini 1725649008
gpt-4o-realtime-preview-2024-10-01 1727131766
gpt-4o-audio-preview-2024-10-01 1727389042
gpt-4o-

## Splitting and Chunking using Langchain

In [138]:
!pip install -q langchain
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [139]:
my_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1200,
    chunk_overlap=100,
    length_function=len
)

## Processing txt, pdf, docx files from dataset directory




In [141]:
!pip install PyMuPDF



In [142]:
!pip install python-docx



In [151]:

import fitz
import unicodedata
from docx import Document

folder_path = '/content/drive/MyDrive/Datasets/Wiki data/New pdfs and docs'
all_chunks = []

def read_txt_file(filepath):
    with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
        return f.read()

def read_pdf_file(filepath):
    doc = fitz.open(filepath)
    return "\n".join([page.get_text() for page in doc])

def read_docx_file(filepath):
    doc = Document(filepath)
    return "\n".join([para.text for para in doc.paragraphs])

def clean_text(text):
    text = unicodedata.normalize('NFKD', text)
    return text.replace('�', '-').replace('\u200b', '')

for filename in os.listdir(folder_path):
    filepath = os.path.join(folder_path, filename)

    if filename.endswith(".txt"):
        raw_text = read_txt_file(filepath)
    elif filename.endswith(".pdf"):
        raw_text = read_pdf_file(filepath)
    elif filename.endswith(".docx"):
        raw_text = read_docx_file(filepath)
    else:
        continue

    raw_text = clean_text(raw_text)

    chunks = my_splitter.split_text(raw_text)

    for i, chunk in enumerate(chunks):
        all_chunks.append({
            "text": chunk,
            "source_file": filename,
            "metadata": {
                "battle": filename.rsplit('.', 1)[0].replace('_', ' ').title()
            }
        })

print(f" Total Chunks Created: {len(all_chunks)}")


✅ Total Chunks Created: 3612


In [152]:
def generate_ids(number, size):
  import string, random
  ids=[]
  for i in range(number):
    res = ''.join(random.choices(string.ascii_letters, k=size))
    ids.append(res)
    if len(set(ids)) != i+1:
      i-=1
      ids.pop(-1)

  return ids


In [3]:
# Empty Dataframe to store the chunked data in the required format
pre_upsert_df = pd.DataFrame(columns=['id', 'values', 'metadata'])

In [155]:
# To get embedding of the chunks
def get_embeddings(text, model='text-embedding-3-small'):
  text = text.replace("\n"," ")
  return client.embeddings.create(input=text, model = model).data[0].embedding

In [156]:
# Generating Id's and embeddings for each chunk and storing in dataframe
def load_chunks(df, split_text):
    ids = generate_ids(len(split_text), 7)
    for i, chunk in enumerate(split_text):
        text = chunk["text"]
        embedding = get_embeddings(text, model="text-embedding-3-small")
        df.loc[i] = [ids[i], embedding, {"text": text, **chunk["metadata"]}]
    return df


In [157]:
my_index = load_chunks(pre_upsert_df, all_chunks)

In [158]:
# Storing dataframe to be on safe side
my_index.to_json('/content/drive/MyDrive/all_chunks.json', orient='records', lines=True)
my_index.to_csv('/content/drive/MyDrive/all_chunks.csv',index=False)

# Upserting chunks in Pinecone Vector Database

In [14]:
!pip install pinecone

Collecting pinecone
  Downloading pinecone-7.3.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pinecone-plugin-assistant<2.0.0,>=1.6.0 (from pinecone)
  Downloading pinecone_plugin_assistant-1.7.0-py3-none-any.whl.metadata (28 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting packaging<25.0,>=24.2 (from pinecone-plugin-assistant<2.0.0,>=1.6.0->pinecone)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Downloading pinecone-7.3.0-py3-none-any.whl (587 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.6/587.6 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_assistant-1.7.0-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Downloading packagin

In [15]:
from pinecone import Pinecone

### Insert your Pinecone API here

In [16]:
os.environ['PINECONE_API_KEY'] = ''
pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))
index=pc.Index('rag-for-ww2')
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 6686}},
 'total_vector_count': 6686,
 'vector_type': 'dense'}

In [17]:
import pandas as pd
my_index_from_CSV = pd.read_csv('/content/drive/MyDrive/all_chunks.csv')

In [18]:
my_index_from_CSV

Unnamed: 0,id,values,metadata
0,aaLxBLN,"[-0.010026052594184875, 0.014568242244422436, ...",{'text': 'march 1933 a new chancellor of germa...
1,MxyoGGO,"[-0.02874813601374626, 0.03906797990202904, 0....","{'text': ""millions of souls standing behind th..."
2,SYFEAxX,"[-0.02210053615272045, 0.015012932941317558, 0...","{'text': ""italy and america met to decide on a..."
3,jQpiakn,"[0.007050313986837864, 0.025964835658669472, 0...",{'text': 'versailles settlement insisted that ...
4,ksOWUwN,"[0.00039436976658180356, 0.027033722028136253,...",{'text': 'economy and population intact and wi...
...,...,...,...
3607,GzgbiUw,"[-0.055723611265420914, 0.04352152347564697, 0...","{'text': ""bloodless invasion within hours Hitl..."
3608,qknCjeA,"[-0.0536881685256958, 0.05542891100049019, 0.0...",{'text': 'Hitler twice the Nazi dictator assur...
3609,vFlgzRA,"[-0.031805768609046936, 0.03596195578575134, 0...",{'text': 'which was ethnically different to th...
3610,iloFmQr,"[-0.045744240283966064, -0.010574365966022015,...","{'text': ""and indecisive opponents in Russia s..."


In [19]:
# Preprocessing dataframe
def prepare_DF(df):
  import json,ast
  try: df=df.drop('Unnamed: 0',axis=1)
  except: print('Unnamed Not Found')
  df['values']=df['values'].apply(lambda x: np.array([float(i) for i in x.replace("[",'').replace("]",'').split(',')]))
  df['metadata']=df['metadata'].apply(lambda x: ast.literal_eval(x))
  return df

In [20]:
index_df=prepare_DF(my_index_from_CSV)

Unnamed Not Found


In [21]:
def convert_data(chunk):
 'Converts a pandas dataframe to be a simple list of tuples, formatted how the `upsert()` method in the Pinecone Python client expects.'
 data = []
 for i in chunk.to_dict('records'):
  data.append(i)
 return data

In [22]:
def load_chunker(seq, size):
 'Yields a series of slices of the original iterable, up to the limit of what size is.'
 for pos in range(0, len(seq), size):
   yield seq.iloc[pos:pos + size]

In [168]:
#Upserting
for load_chunk in load_chunker(index_df,200 ):
  index.upsert(vectors=convert_data(load_chunk))

In [169]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 6686}},
 'total_vector_count': 6686,
 'vector_type': 'dense'}

## Sample testing

In [26]:
sample_text = "Who is churchil?"

In [27]:

test_embeddings=get_embeddings(sample_text, model='text-embedding-3-small')
index.query(vector=test_embeddings,top_k=5, include_metadata=True)

{'matches': [{'id': 'LeHgiyv',
              'metadata': {'battle': 'Axelrod A. Encyclopedia Of World War Ii '
                                     '(2007), Ocr (1)',
                           'text': 'the same year in which Churchill resigned '
                                   'his \n'
                                   'army commission to enter politics. '
                                   'Defeated in his \n'
                                   'first bid for Parliament, he took up '
                                   'journalism \n'
                                   'once again, sailing to South Africa on '
                                   'assignment \n'
                                   'for the Morning Post to cover the Second '
                                   'Boer War. \n'
                                   'Always eager to put himself in danger, '
                                   'Churchill \n'
                                   'was captured by the Boers but mana

In [28]:
test_embeddings=get_embeddings(sample_text, model='text-embedding-3-small')

In [29]:
contexts = [item['metadata']['text'] for item in index.query(vector=test_embeddings, top_k=5, include_metadata=True)['matches']]

In [172]:
contexts

['With victory, the initiative firmly passed to the Red Army. For the remainder of the war the Germans were limited to reacting to Soviet advances, and were never able to regain the initiative or launch a major offensive on the Eastern Front. British historian Robin Cross stated that "the Red Army had turned the tide at Stalingrad; it had seized the psychological advantage for the first time. But it was in the terrible killing grounds near Kursk that Hitler\'s panzers, and his ambitions, were dealt a blow from which they never recovered" and that "Failure at Kursk had dealt the Ostheer heavier psychological and material blows than it had suffered at Stalingrad". Further, the Western Allied landings in Italy opened a new front, further diverting German resources and attention.\nSoviet air forces dropped about 20,000 tons of bombs during the battle, 4 times more than in the Battle of Moscow, and 2.5 times more than in the Battle of Stalingrad.',
 "Both sides placed great strategic import

## Supporting functions for generating responses

In [30]:
# To get contexts
def get_context(query, embed_model='text-embedding-3-small',k=5):
  query_embeddings = get_embeddings(query, model=embed_model)
  pinecone_response = index.query(vector =query_embeddings, top_k=k, include_metadata=True)
  contexts =[item['metadata']['text'] for item in pinecone_response['matches']]
  return contexts, query

In [31]:
# To generate responses using GPT model
def ask_gpt(system_prompt, user_prompt, model="gpt-3.5-turbo", temp=0.7):
  temperature=temp
  completion = client.chat.completions.create(
      model=model,
      temperature=temp,
      messages=[
          {"role":"system",#this sets the personality and instructions
           "content":system_prompt},
          {"role":"user",
           "content":user_prompt#Query goes here
           }])
  lines = (completion.choices[0].message.content).split("\n")
  lists = (textwrap.TextWrapper(width=90, break_long_words=False).wrap(line) for line in lines)
  return "\n".join("\n".join(list) for list in lists)



In [None]:
# Embedder
def get_embeddings(text, model='text-embedding-ada-002'):
  text = text.replace("\n"," ")
  return client.embeddings.create(input=text, model = model).data[0].embedding

In [56]:
# Augmented query
def augmented_query(user_query, embed_model="text-embedding-3-small", k=5):
  contexts, query = get_context(user_query, embed_model=embed_model, k=k)
  return"\n\n ------ \n\n".join(contexts)+"\n\n ---- \n\n"+query

## Generating responses, Primer help build system prompt to control and guard rail output responses

In [33]:
import textwrap

In [61]:
# Try your prompts according to your use case
primer = f"""
You are a specialized World War II military historian with comprehensive knowledge of battles, campaigns, and combat operations from 1939-1945. You provide detailed, accurate responses based strictly on the provided historical documents.

Your expertise includes:
- Battle tactics, strategies, and outcomes
- Military unit movements and engagements
- Key commanders and their decisions
- Casualty figures and operational statistics
- Geographic and temporal context of battles
- Equipment, weapons, and technology used

Guidelines for responses:
- Answer questions using only the information from the provided  documents.
- stick relevant to the question asked, do not add irrelevant information just to fill the gap
- Provide specific details when available (dates, unit names, casualty numbers, locations
- If a question contains historical inaccuracies, politely correct them with accurate information from the sources
- When discussing battles, include relevant context about objectives, forces involved, and strategic significance
- If the specific information cannot be found in the battle documents, respond: "Oops! This information is not available in the provided my records at this point of time."
- Never speculate or add information not present in the source documents
- Maintain the authoritative tone of a military historian while being accessible to general audiences
Focus on delivering comprehensive, well-structured responses that demonstrate deep understanding of WWII military operations based on the documentary evidence provided.

"""

In [62]:
ask_gpt(system_prompt=primer, user_prompt = augmented_query(" Who is Mahatma Gandhi?"))

"Mahatma Gandhi, born Mohandas Karamchand Gandhi, was a prominent leader of the Indian\nindependence movement against British colonial rule. He advocated for nonviolent civil\ndisobedience as a means to achieve independence. Gandhi's strategy of nonviolent\nresistance, known as Satyagraha, inspired movements for civil rights and freedom across\nthe world. Gandhi's influence extended beyond India and he became an internationally\nrecognized symbol of peace and nonviolent protest."

##RAGAS Evaluation

In [38]:
%pip -q install ragas pandas datasets

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/190.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m184.3/190.6 kB[0m [31m14.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [39]:
import getpass

In [41]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision
)
from datasets import Dataset
ragas_data  = [
    {
        "question": "How did the Treaty of Versailles contribute to World War II?",
        "contexts": ['profoundly tragic documents. It created the politi-\ncal, economic, and emotional climate that pro-\nmoted the rise of Adolf Hitler and the Nazi \nParty, making a second world war virtually inevi-\ntable. Almost immediately Germany flouted the \ntreaty and began to rearm. The 100,000-man limi-\ntation put on the army was used by German mili-\ntary planners to create an elite, all-volunteer \nFührerheer, an “army of leaders,” which would \nbecome the core of the formidable army with \nwhich Hitler fought World War II. After Hitler \nassumed the office of chancellor in 1933, rearma-\nment became progressively more blatant, and the \ntimid, war-weary former Allies did nothing to \nenforce the provisions of the Treaty of Versailles.\nFurther reading: Boemeke, Manfred F., ed. The Treaty \nof Versailles: A Reassessment after 75 Years. New York: \nCambridge University Press, 1998; Kleine-Ahlbrandt, \nW. Laird. The Burden of Victory: France, Britain, and the \nEnforcement of the Versailles Peace, 1919–1925. Lanham, \nMd.: University Press of America, 1995.\nVichy government\nOn June 10, as the Battle of France was coming \nto its climax with the Germans closing in on Paris,',
 'the subjoined Covenant of the League of Nations, \nfounded that international body. The treaty, how-\never, levied excessively punitive conditions against \nGermany, which created the economic and cultural \nclimate in which the outbreak of another world \nwar was virtually assured.\nThe Treaty of Versailles was signed on June 28, \n1919, at Versailles, France, and was largely the work \nof the so-called Big Four, U.S. president Woodrow \nWilson, French premier Georges Clemenceau, Brit-\nish prime minister David Lloyd George, and Italy’s \nVersailles, Treaty of  859',
 'On May 7, 1919, the treaty was presented to a \nGerman delegation headed by Foreign Minister \nUlrich Graf von Brockdorff-Rantzau. The delega-\ntion denounced it, protesting that it abrogated the \nFourteen Points, which had been the basis of the \narmistice on November 11, 1918. Brockdorff-Rant-\nzau further declared that Germany was unable to \npay the reparations demanded. Germany’s chancel-\nlor, Philipp Scheidemann, likewise denounced the \ntreaty when it was presented to him. In response, \nthe Allies initiated a naval blockade of Germany. \nScheidemann and Brockdorff-Rantzau resigned in \nprotest on June 21, and that same day at Scapa Flow, \nGerman sailors scuttled all 50 warships of High \nSeas Fleet to keep the vessels from becoming Allied \nprizes. A new German chancellor, Gustav Bauer, \nsent another delegation to Versailles and, on June \n28, signed the document under protest, informing \nthe Allies that the treaty was being accepted only to \nend the hardships (mostly severe food shortages) \ncaused by the “inhuman” naval blockade.\nThe Treaty of Versailles is one of history’s most \nprofoundly tragic documents. It created the politi-\ncal, economic, and emotional climate that pro-',
 'and to partisan, guerrilla, and resistance activity.\nWorld War II devastated Europe and Asia and \nleft a world-shaping legacy in its turbulent wake. \nAs a result of the war, the power of the Soviet \nUnion was extended to many nations of eastern \nEurope, and communism also triumphed in China \nand established footholds in parts of Korea and \nVietnam. The world experienced a profound shift \nin power and influence away from the old states \nof western Europe and toward the United States \nand the Soviet Union, which, through some five \ndecades following the war, were the only global \nsuperpowers, each armed with another momen-\ntous product of the war: nuclear (and, later, ther-\nmonuclear) weapons.\nWorld War II is best understood as an exten-\nsion of the earlier global cataclysm that was World \nWar I (1914–18), which left many territorial issues \nunresolved even as it created a host of new cultural \nand economic incentives for war. The article entitled \n“Causes of World War II” and the articles treat-\ning France, Germany, Italy, Japan, and the United \nStates in this encyclopedia provide discussion of the \nbackground against which World War II developed,',
 'hatreds. It ended with the Treaty of Versailles, \nwhich imposed on Germany nationally humiliat-\ning and economically ruinous terms, creating the \ndesperate conditions in which a charismatic dicta-\ntor could readily find acceptance for a political, \ncultural, and mythic program that promised \nnational and racial regeneration. Moreover, the \nGreat War had created such general devastation \nthat the putative victors suffered as much as the \nvanquished. The collective sentiment prevailing \namong the Western democracies was antiwar. This \ngave Hitler and the Japanese militarists the leeway \nthey needed to establish the early phases of their \nexpansionist programs, including rearmament and \nactual conquest, virtually unopposed.\nIf World War I caused general hardship and \npolitical instability in Europe, it also changed the \npolitical structure of much of the rest of the world \nbeyond Europe by substantially undermining the \nold colonial order and stimulating a wide variety of \nnationalist and independence movements. Between \nthe wars, much of the world oscillated violently \namong competing political ideologies. There was \ncontinual crisis, crisis that was greatly exacerbated'],
        "answer": "The Treaty of Versailles contributed to World War II in the following ways:- The treaty imposed excessively punitive conditions against Germany, creating an economic and cultural climate that virtually assured the outbreak of another world war. - The limitations put on the German army by the treaty were used to create an elite, all-volunteer army that became the core of the formidable army with which Hitler fought World War II. - After Hitler became chancellor in 1933, rearmament became progressively more blatant as Germany flouted the treaty's restrictions, and the former Allies did not enforce the provisions of the Treaty of Versailles. - The treaty created conditions in which a charismatic dictator like Hitler could find acceptance for a political,cultural, and mythic program that promised national and racial regeneration.",
        "ground_truths": ["The Treaty of Versailles contributed to World War II by imposing harsh penalties on Germany after World War I, including territorial losses, military restrictions, and heavy reparations. These conditions led to severe economic hardship, national humiliation, and political instability in Germany, creating fertile ground for the rise of Adolf Hitler and the Nazi Party. Hitler exploited public resentment toward the treaty, promising to overturn its terms, which became a central theme in his expansionist policies that led to World War II."]
    },
    {
        "question": "What was the purpose of the D-Day invasion?",
        "contexts":['The Normandy landings were the landing operations and associated airborne operations on 6 June 1944 of the Allied invasion of Normandy in Operation Overlord during the Second World War. Codenamed Operation Neptune and often referred to as D-Day (after the military term), it is the largest seaborne invasion in history. The operation began the liberation of France, and the rest of Western Europe, and laid the foundations of the Allied victory on the Western Front.',
 'Normandy landings (D-day)\nThe Allied invasion of western Europe, launched \non June 6, 1944, was the implementation of Oper-\nation Overlord (and the phase of Overlord \ndevoted specifically to the initial assault, Operation \nNeptune), the product of some two years of plan-\nning, training, and buildup of personnel, equip-\nment, and supplies.\nBy May 1944, 47 divisions—about 800,000 \ncombat troops—had been assembled at embarka-\ntion points in Britain, ready to cross the English \nChannel to designated beaches in Normandy, \nFrance. The operation was under the overall com-\nmand of U.S. general Dwight David Eisenhower, \nsupreme Allied commander, Europe, whose head-\nquarters was designated Supreme Headquarters, \nAllied Expeditionary Forces (SHAEF). Directly \nunder Eisenhower was British general Bernard \nLaw Montgomery, who had field command of all \nAllied ground forces.\nThe Allies had chosen to land along a 50-mile \nexpanse of Norman coast, from Caen west to the \nbase of the Cotentin Peninsula. Tactically, this area \nwas divided into five beaches, code-named, from \neast to west, Sword (to be assaulted by the British \n3rd Division), Juno (Canadian 3rd Division), Gold',
 'Planning for the operation began in 1943. In the months leading up to the invasion, the Allies conducted a substantial military deception, codenamed Operation Bodyguard, to mislead the Germans as to the date and location of the main Allied landings. The weather on the day selected for D-Day was not ideal, and the operation had to be delayed 24 hours; a further postponement would have meant a delay of at least two weeks, as the planners had requirements for the phase of the moon, the tides, and time of day, that meant only a few days each month were deemed suitable. German leader Adolf Hitler placed Field Marshal Erwin Rommel in command of German forces and developing fortifications along the Atlantic Wall in anticipation of an invasion. US president Franklin D. Roosevelt placed Major General Dwight D. Eisenhower in command of Allied forces.',
 'The Normandy landings were the largest seaborne invasion in history, with nearly 5,000 landing and assault craft, 289 escort vessels, and 277 minesweepers participating. Nearly 160,000 troops crossed the English Channel on D-Day, with 875,000 men disembarking by the end of June. Allied casualties on the first day were at least 10,000, with 4,414 confirmed dead and the Germans had 4,000-9,000 casualties (killed, wounded, missing, or captured). The Germans never achieved Hitler\'s stated aim of "throwing the Allies back into the sea" on D-Day or at any time thereafter.',
 'Operation Overlord was the name assigned to the establishment of a large-scale lodgement on the continent. The first phase, the amphibious invasion and establishment of a secure foothold, was codenamed Operation Neptune. To gain the air superiority needed to ensure a successful invasion, the Allies undertook a bombing campaign (codenamed Operation Pointblank) that targeted German aircraft production, fuel supplies, and airfields. Elaborate deceptions, codenamed Operation Bodyguard, were undertaken in the months leading up to the invasion to prevent the Germans from learning the timing and location of the invasion.'],
        "answer": "The D-Day invasion, also known as Operation Overlord, aimed to liberate Western Europe from Nazi occupation by establishing a major Allied foothold in Normandy, France.",
        "ground_truths": ["The purpose of the D-Day invasion was to implement Operation Overlord, specifically the phase known as Operation Neptune, which aimed at establishing a large-scale lodgement on the continent of Europe. This operation was intended to begin the liberation of France and the rest of Western Europe, laying the foundations for the Allied victory on the Western Front during World War II."]
        },
    {
        "question": " What was the Holocaust and how many people were killed?",
        "contexts":['California Press, 1990.\nHolocaust, the\nHolocaust is a derivation of the Greek word holo-\nkauston, which, in turn, is a translation from \nHebrew ’olah, or burnt sacrifice. The word reflects \nthe ultimate fate of those killed in German con-\ncentration and extermination camps, crema-\ntion. In modern usage, the Holocaust is sometimes \nreferred to by the Hebrew word Shoah.\nAs discussed in the Final Solution, the Holo-\ncaust was the product of Adolf Hitler’s extreme \nanti-Semitism as it was manifested in the policies \nof the Nazi Party (NSDAP). For Hitler and the \nNazis, purging the Jews from German life, and, \nultimately, from Europe and the rest of the world, \nwas necessary to the advancement of the German \npeople. Jews were deemed Untermenschen (“sub-\nhumans”), and, therefore, a menace to the German, \nor Aryan, “race,” which aspired to the status of \nUbermenschen (“supermen”). Hitler became chan-\ncellor of Germany on January 30, 1933. On April 1, \nhe instituted a nationwide boycott of Jewish busi-\nnesses, which was followed, days later, by the \nremoval of Jews from the civil service, and, days \nafter this, by restrictions on Jewish attendance at',
 '### Genocide, concentration camps, and slave labour\n\nNazi Germany, under the dictatorship of Adolf Hitler, was responsible for murdering about 6 million Jews in what is now known as the Holocaust. They also murdered an additional 4 million others who were deemed "unworthy of life" (including the disabled and mentally ill, Soviet prisoners of war, Romani, homosexuals, Freemasons, and Jehovah\'s Witnesses) as part of a program of deliberate extermination, in effect becoming a "genocidal state". Soviet POWs were kept in especially unbearable conditions, and 3.6 million Soviet POWs out of 5.7 million died in Nazi camps during the war. In addition to concentration camps, death camps were created in Nazi Germany to exterminate people on an industrial scale. Nazi Germany extensively used forced labourers; about 12 million Europeans from German-occupied countries were abducted and used as a slave work force in German industry, agriculture and war economy.',
 'Holocaust, in which approximately 6 million \nJews perished during World War II. It must be rec-\nognized that implementation of the Final Solu-\ntion, the genocide of the Jews, was not merely an \naspect, let alone side effect, of World War II, but \nwas, for Germany, a cause and a war aim, for only \nin the context of world war and conquest could \nthe Holocaust called for by the Final Solution be \nperpetrated.\nThe Final Solution to the “Jewish Question” \ngrew out of Hitler’s pledge to “free” Germany of \nJews and Jewish influence (which Hitler deliber-\nately confounded with Marxism and communism). \nHitler conflated German nationalism with a doc-\ntrine of German “Aryanism,” a heritage of superior \nracial purity, which the Jewish “race” threatened to \npollute. He and other Nazis demonized Jews as \nalien, subversive, and generally dangerous. Hitler \nposed to the German people the Jewish Question \n(Judenfrage): What was to be done to make Ger-\nmany “Jew-free” (Judenrein)? The initial “answer” \nwas internal exile, the expulsion of Jews from rural \nGermany, from villages and small towns, and their \nconcentration in the larger cities. The next “answer”',
 'Organised crimes against civilians, including women and children, were carried out on a huge scale by the German police and military forces, as well as the local collaborators. Under the command of the Reich Security Main Office, the Einsatzgruppen killing squads conducted large-scale massacres of Jews and communists in conquered Soviet territories. Holocaust historian Raul Hilberg puts the number of Jews murdered by "mobile killing operations" at 1,400,000. The original instructions to kill "Jews in party and state positions" were broadened to include "all male Jews of military age" and then expanded once more to "all male Jews regardless of age". By the end of July, the Germans were regularly killing women and children. On 18 December 1941, Himmler and Hitler discussed the "Jewish question", and Himmler noted the meeting\'s result in his appointment book: "To be annihilated as partisans". According to Christopher Browning, "annihilating Jews and solving the so-called \'Jewish question\' under the cover of killing partisans was the agreed-upon convention between Hitler and Himmler". In accordance with Nazi policies against "inferior" Asian peoples, Turkmens were also persecuted.',
 'Kovno (now Kaunas), Lithuania, on October 28, \nwhere 9,000 Jews were killed, including some 5,000 \nchildren. Historians believe that Einsatzgruppen \nkilled more than 1 million people, the vast major-\nity Jews, all by shooting.\nDespite the staggering numbers killed in the \nSoviet Union, it was decided that shootings on \nsuch a massive scale were both impractical and too \npublic. (Indeed, when the Red Army counterat-\ntacked in the Ukraine, the Germans hastily \nattempted to dig up the mass graves of those \nslaughtered, so that the bodies could be burned \nand the evidence of the atrocity thereby destroyed.) \nTherefore, on January 20, 1942, Reinhard Heydrich \nconvened the Wannsee Conference to begin the \nimplementation of an efficient and more secretive \nmechanism for genocide. What followed was the \nconstruction of death camps in Poland, to which \nJews and others were transported, usually by rail, in \nbox cars or cattle cars, for mass execution. The first \ncamp dedicated to extermination was at Chelmno, \nPoland, which used mobile gas vans to kill victims. \nThe condemned were packed into the vehicles, \nwhich had their exhaust rerouted into the cargo'],
        "answer": "The Holocaust was the genocide of approximately 6 million Jews during World War II, carried out by\nNazi Germany. - In addition to the Jews, an additional 4 million others were murdered by Nazi\nGermany, including disabled individuals, mentally ill, Soviet prisoners of war, Romani, homosexuals,\nFreemasons, and Jehovah's Witnesses. - Historians believe that Einsatzgruppen killed more than 1\nmillion people, the majority of whom were Jews, through shooting executions",
        "ground_truths":["The Holocaust (1933–1945) was the systematic, state-sponsored persecution and murder of six million European Jews by the Nazi German regime and its allies and collaborators."]
        },
    {
        "question": " What happened at the Yalta Conference?",
        "contexts":["they had assigned crude percentages of influence  \nbritain was to have ninety percent of the \ninfluence in greece bulgaria and romania  \nwere to be ninety percent soviet yalta agreed the \nshape of the future united nations organization  \nyalta agreed to divide germany into zones \nof occupation [Music] the conference also  \ndecided the post-war fate of poland poland's \nfreedom was the original cause of the war  \npoland had been britain's ally at a time when \nthe soviet union of nazi germany had made pacts  \nof friendship as the leaders met polish troops \nwere dying and fighting alongside the armies of  \namerica and britain yet at yalta in return for a \npromise by stalin to enter the war against japan  \nthe soviet union was to be given \neastern poland the polish government  \nthat had fled in 1939 was to be abandoned \nin favor of a communist puppet government  \nat yalta the alliance was already dissolving \ninto self-interest roosevelt made a secret deal  \nwith stalin hidden from churchill to reward \nthe ussr even more with territory in asia  \nyalta was to condemn many thousands to die \nstalin demanded the return of any soviet  \ncitizens in the west prisoners of war soviet",
 '883\n★\nYalta Conference and Agreement\nThe Yalta Conference took place in February 1945 \nat the Soviet Black Sea resort of Yalta, among the \nBig Three: Franklin D. Roosevelt, Winston \nChurchill, and Joseph Stalin. As the war \nagainst Germany was coming to a close, the \nUnited States and Great Britain sought to bring \nthe Soviet Union into the still-unfinished war \nagainst Japan. The contest there was no longer \nseriously in doubt; the Japanese had been virtu-\nally defeated militarily. Nevertheless, they contin-\nued to fight, inflicting terrible casualties on the \nAmericans and, to a lesser extent, on British and \nCommonwealth troops. Indeed, more Americans \nwere being killed—by a defeated enemy no less—\nin this closing phase of the Pacific war than in all \nthe combat between December 7, 1941, and the \nbeginning of 1945.\nUp to this point, with his hands more than full \nfighting the German invasion of the Soviet \nUnion then prosecuting an offensive against Ger-\nmany, Stalin had avoided war with Japan. At Yalta, \nhowever, Roosevelt persuaded him to agree to \ndeclare war against Japan “two or three months” \nafter the surrender of Germany. In return, the',
 'in the Berlin suburb of Potsdam, its principal par-\nticipants were President Harry S. Truman, Soviet \npremier Joseph Stalin, and (at the beginning of \nthe conference) British prime minister Winston \nChurchill, whose place was later taken by Clem-\nent Attlee, when he replaced Churchill as prime \nminister.\nThe major subjects of the conference were the \nEuropean peace settlements; the urgently pressing \nissue of administering a defeated and substantially \ndestroyed Germany; the determination of Polish \nboundaries; the terms of the occupation of Austria; \nthe Soviet role in Eastern Europe; reparations; and, \nnot least, the continued prosecution of the war \nagainst Japan.\nThe conference produced the Potsdam Declara-\ntion. With regard to Germany, the declaration \nasserted the Allies’ intention to give the “German \npeople . . . the opportunity to prepare for the even-\ntual reconstruction of their life on a democratic \nand peaceful basis.” Four zones of occupation were \ndemarcated in Germany, each to be administered \nby military governments under the commanders in \nchief of the U.S., British, Soviet, and French armies \nof occupation. Austria was also divided into four',
 'of January 1, 1942, a document that stated Allied \nwar aims.\nAs for planning the “United Nations” as an \ninternational body to replace the defunct League \nof Nations, Roosevelt, Churchill, and the Soviet \nUnion’s Joseph Stalin took the lead. The first \nconcrete step in the creation of the organization \ncame in during August 21–October 7, 1944, at \nthe Dumbarton Oaks Conference, in Washington, \nD.C., at the Dumbarton Oaks Estate. Here diplo-\nmats and international scholars from the United \nStates, Great Britain, the Soviet Union, and China \nhammered out the contours of the United Nations. \nThey agreed on its purpose, its general structure, \nand, in principle, its function, but had serious dis-\nagreements over membership and voting—mainly \nbecause the Soviets insisted that each constituent \nrepublic of the USSR be given an individual mem-\nbership and a vote—something that would have \nyielded to the Soviets’ disproportionate control \nover the decisions of the UN. At the Yalta Con-\nference, held in the Crimean Black Sea resort \ntown of Yalta during February 1945, Roosevelt, \nChurchill, and Stalin laid out the nature and \nauthority of the Security Council and also reached',
 'Churchill, and Stalin laid out the nature and \nauthority of the Security Council and also reached \na tentative compromise on the number of Soviet \nrepublics to be granted independent memberships \nin the UN. They also agreed that the UN would \ninclude a trusteeship system to succeed the League \nof Nations mandate system. The Yalta decisions \nwere combined with the Dumbarton Oaks pro-\nposals as the basis for discussion at the United \nNations Conference on International Organiza-\ntion (UNCIO), which convened—even before \nWorld War II had ended—at San Francisco on \nApril 25, 1945.\nAttended by representatives of 50 countries, the \nconference produced the final Charter of the \nUnited Nations. That document was signed on \nJune 26 and promulgated on October 24, 1945, a \nlittle more than a month after the Japanese surren-\nder ended the war.\nFurther reading: Eichelberger, Clark M. Organizing for \nPeace: A Personal History of the Founding of the United \nNations. New York: Harper & Row, 1977; Schlesinger, \nStephen C. Act of Creation—The Founding of the United \nNations: A Story of Superpowers, Secret Agents, Wartime \nAllies and Enemies, and Their Quest for a Peaceful World.'],
        "answer": "The Yalta Conference took place in February 1945 at the Soviet Black Sea resort of Yalta. - The\nBig Three, Franklin D. Roosevelt, Winston Churchill, and Joseph Stalin, were the main participants.\n- At Yalta, the United States and Great Britain sought to bring the Soviet Union into the war\nagainst Japan. - Roosevelt persuaded Stalin to agree to declare war against Japan after the\nsurrender of Germany. - During the conference, they discussed the post-war fate of Poland and the\ndivision of Germany into zones of occupation. - The Yalta decisions laid out the nature and\nauthority of the United Nations Security Council and included a trusteeship system to succeed the\nLeague of Nations mandate system.",
        "ground_truths":["The Holocaust (1933–1945) was the systematic, state-sponsored persecution and murder of six million European Jews by the Nazi German regime and its allies and collaborators."]
        }
]



In [43]:
for item in ragas_data:
    item["reference"] = "\n".join(item["contexts"])
ds = Dataset.from_list(ragas_data)

In [45]:

from ragas import evaluate
result = evaluate(ds, metrics=[
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall
])
result


Evaluating:   0%|          | 0/16 [00:00<?, ?it/s]

{'faithfulness': 1.0000, 'answer_relevancy': 0.9504, 'context_precision': 1.0000, 'context_recall': 0.8023}