In [19]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.llms import HuggingFacePipeline

# Load tokenizer and model (this runs locally)
model_name = "google/flan-t5-large"  # or "google/flan-t5-xl" if you have RAM

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create HF pipeline
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7
)

# Wrap in LangChain
llm = HuggingFacePipeline(pipeline=pipe)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  llm = HuggingFacePipeline(pipeline=pipe)


In [23]:
print(llm.invoke("What is anarchism?"))

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


anarcho-syndicalism


In [6]:
from datasets import load_dataset
from itertools import islice

# Load streaming dataset without slicing
dataset_streaming = load_dataset(
    "wiki_snippets",
    name="wikipedia_en_100_0",
    split="train",
    streaming=True
)

# Manually take the first 500 records
dataset = list(islice(dataset_streaming, 500))

Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/53 [00:00<?, ?it/s]

In [7]:
sample = next(iter(dataset))
print(sample)
print(sample.keys())

{'_id': '{"datasets_id": 0, "wiki_id": "Anarchism", "sp": 0, "sc": 0, "ep": 2, "ec": 129}', 'datasets_id': 0, 'wiki_id': 'Anarchism', 'start_paragraph': 0, 'start_character': 0, 'end_paragraph': 2, 'end_character': 129, 'article_title': 'Anarchism', 'section_title': 'Start', 'passage_text': 'Anarchism is a political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive forms of hierarchy. Anarchism calls for the abolition of the state, which it holds to be unnecessary, undesirable, and harmful. As a historically left-wing movement, placed on the farthest left of the political spectrum, it is usually described alongside communalism and libertarian Marxism as the libertarian wing (libertarian socialism) of the socialist movement, and has a strong historical association with anti-capitalism and socialism.  Humans lived in societies without formal hierarchies long before the establishment of formal states, realms, or empires. With the'}
dict_keys(['_i

In [8]:
from langchain.docstore.document import Document

documents = []

for row in dataset:
    row_dict = dict(row)
    content = row_dict.get("passage_text", "")
    title = row_dict.get("article_title", "unknown")
    section = row_dict.get("section_title", "unknown")

    if content:  # only keep non-empty content
        documents.append(
            Document(
                page_content=content,
                metadata={"title": title, "section": section}
            )
        )

In [9]:
from langchain.text_splitter import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)

In [10]:
print(f"Number of chunks created: {len(docs)}")
print(docs[0])

Number of chunks created: 500
page_content='Anarchism is a political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive forms of hierarchy. Anarchism calls for the abolition of the state, which it holds to be unnecessary, undesirable, and harmful. As a historically left-wing movement, placed on the farthest left of the political spectrum, it is usually described alongside communalism and libertarian Marxism as the libertarian wing (libertarian socialism) of the socialist movement, and has a strong historical association with anti-capitalism and socialism.  Humans lived in societies without formal hierarchies long before the establishment of formal states, realms, or empires. With the' metadata={'title': 'Anarchism', 'section': 'Start'}


In [11]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS

# Embedding model
embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Build FAISS vector store from chunks
vectorstore = FAISS.from_documents(docs, embedding)

In [12]:
retriever = vectorstore.as_retriever()

In [32]:
vectorstore.save_local("faiss_index")

In [35]:
vectorstore = FAISS.load_local(
    "faiss_index",
    embedding,
    allow_dangerous_deserialization=True
)

In [24]:
from langchain.chains import RetrievalQA

# Create the RAG pipeline (retriever + flan-t5-xl)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [27]:
def ask_question(question):
    """
    Pass a question through the RAG pipeline and return the answer.
    """
    result = qa_chain.invoke({"query": question})
    
    print(f"Question: {question}")
    print(f"Answer: {result['result']}")
    
    # Optionally print where the answer was retrieved from
    print("\nSource documents:")
    for doc in result['source_documents']:
        print(f"- Title: {doc.metadata['title']}, Section: {doc.metadata['section']}")
        print(f"  Chunk: {doc.page_content[:200]}...\n")
    
    return result

In [28]:
ask_question("What are the key principles of anarchism?")
ask_question("Explain Abraham Lincoln's views on Native American policy using historical examples.")
ask_question("What is the purpose of the United Nations?")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: What are the key principles of anarchism?
Answer: There is no consensus on which principles are core and commentators describe multiple anarchisms, rather than a singular anarchism, in which common principles are shared between schools of anarchism while each group prioritizes those principles differently.

Source documents:
- Title: Anarchism, Section: Start
  Chunk: Anarchism is a political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive forms of hierarchy. Anarchism calls for the abolition of the state, which it holds...

- Title: Anarchism, Section: Anarchism by country
  Chunk: Anarchism by country  Governance without government  List of anarchist political ideologies  List of books about anarchism...

- Title: Anarchism, Section: Modern era
  Chunk: exist today, making it difficult to describe the contemporary anarchist movement. While theorists and activists have established "relatively stable constellations of anarchist pri

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: Explain Abraham Lincoln's views on Native American policy using historical examples.
Answer: During his presidency, Lincoln's policy toward Indians was driven by politics. He used the Indian Bureau as a source of patronage, making appointments to his loyal followers in Minnesota and Wisconsin. He he focused on the moral basis of republicanism.

Source documents:
- Title: Abraham Lincoln, Section: Republican values
  Chunk: of republican values has been stressed by historians such as John Patrick Diggins, Harry V. Jaffa, Vernon Burton, Eric Foner, and Herman J. Belz. Lincoln called the Declaration of Independence—which e...

- Title: Abraham Lincoln, Section: General McClellan & Native American policy
  Chunk: Eric Foner argues that:  Native American policy Lincoln's experience with Indians followed the death of his grandfather Abraham by Indian assailants, in the presence of his father and uncles. Lincoln ...

- Title: Abraham Lincoln, Section: Republican values
  Chunk: he f

{'query': 'What is the purpose of the United Nations?',
 'result': 'unanswerable',
 'source_documents': [Document(id='09c53873-e9dc-4de8-b646-95735f82c3d8', metadata={'title': 'Anarchism', 'section': 'Evolutionary tactics'}, page_content='without taking part in the discussion themselves or promoting a specific point. Minorities usually accept rough consensus, except when they feel the proposal contradicts anarchist ethics, goals and values. Anarchists usually form small groups (5–20 individuals) to enhance autonomy and friendships among their members. These kinds of groups more often than not interconnect with each other, forming larger networks. Anarchists still support and participate in strikes, especially wildcat strikes as these are leaderless strikes not organised centrally by a syndicate.  As in the past, newspapers and journals are used, and anarchists have gone online in the World Wide Web'),
  Document(id='287037cd-d051-4759-aa1c-7b1962f178a5', metadata={'title': 'Anarchism',

In [31]:
from openpyxl import Workbook

sample_questions = [
    "What is anarchism?",
    "What are the criticisms of anarchism?",
    "What was Abraham Lincoln’s Native American policy?",
    "What are the natural wonders in Alabama?",
    "Who discovered black holes?",
    "How has anarchism evolved in the modern era?",
    "What was the significance of the 1999 Seattle WTO protests?",
    "Who won the FIFA World Cup in 2022?"
]

wb = Workbook()
ws = wb.active
ws.append(["Question", "Answer"])

for q in sample_questions:
    result = ask_question(q)
    ws.append([q, result["result"]])

wb.save("rag_chatbot_output.xlsx")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: What is anarchism?
Answer: political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive forms of hierarchy

Source documents:
- Title: Anarchism, Section: Start
  Chunk: Anarchism is a political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive forms of hierarchy. Anarchism calls for the abolition of the state, which it holds...

- Title: Anarchism, Section: Start
  Chunk: contrasted to socialist forms which are state-oriented or from above. Scholars of anarchism generally highlight anarchism's socialist credentials and criticise attempts at creating dichotomies between...

- Title: Anarchism, Section: Start
  Chunk: human society. Criticism of anarchism include claims that it is internally inconsistent, violent, or utopian.  Etymology, terminology, and definition   The etymological origin of anarchism is from the...

- Title: Anarchism, Section: Evolutionary tactics
  Chunk: schools of thoug

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: What are the criticisms of anarchism?
Answer: Major definitional Marxist Criticism that "anarchism cannot win", believing that it lacks the ability to properly implement its ideas

Source documents:
- Title: Anarchism, Section: Start
  Chunk: Anarchism is a political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive forms of hierarchy. Anarchism calls for the abolition of the state, which it holds...

- Title: Anarchism, Section: Start
  Chunk: contrasted to socialist forms which are state-oriented or from above. Scholars of anarchism generally highlight anarchism's socialist credentials and criticise attempts at creating dichotomies between...

- Title: Anarchism, Section: Analysis & Anarchism by country
  Chunk: Marxist Criticism that "anarchism cannot win", believing that it lacks the ability to properly implement its ideas. The Marxist criticism of anarchism is that it has a utopian character because all in...

- Title: Anarchism

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: What are the natural wonders in Alabama?
Answer: Horseshoe Bend National Military Park near Alexander City; Little River Canyon National Preserve near Fort Payne; Russell Cave National Monument in Bridgeport; Tuskegee Airmen National Historic Site in Tuskegee; and Tuskegee Institute National Historic Site near Tuskegee

Source documents:
- Title: Alabama, Section: Etymology
  Chunk: Ridge-and-Valley Appalachians of the north to the Piedmont, Canebrake, and Black Belt of the central region to the Gulf Coastal Plain and beaches along the Gulf of Mexico in the south. The state is us...

- Title: Alabama, Section: Etymology
  Chunk: Alabama's land consists of  of forest or 67% of the state's total land area. Suburban Baldwin County, along the Gulf Coast, is the largest county in the state in both land area and water area.  Areas ...

- Title: Alabama, Section: Industry
  Chunk: is home to various attractions, natural features, parks and events that attract visitors from around th

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: Who discovered black holes?
Answer: unanswerable

Source documents:
- Title: Alabama, Section: Etymology
  Chunk: the Wetumpka crater, the site of "Alabama's greatest natural disaster". A -wide meteorite hit the area about 80 million years ago. The hills just east of downtown Wetumpka showcase the eroded remains ...

- Title: Abraham Lincoln, Section: General McClellan & Native American policy
  Chunk: Eric Foner argues that:  Native American policy Lincoln's experience with Indians followed the death of his grandfather Abraham by Indian assailants, in the presence of his father and uncles. Lincoln ...

- Title: Alabama, Section: Etymology
  Chunk: contains the Natchez Trace Parkway, the Selma To Montgomery National Historic Trail, and the Trail of Tears National Historic Trail.  Notable natural wonders include: the "Natural Bridge" rock, the lo...

- Title: Alabama, Section: Etymology
  Chunk: Phil Campbell, home to two waterfalls, six natural bridges and allegedly served as

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: How has anarchism evolved in the modern era?
Answer: During the French Revolution, partisan groups such as the Enragés and the saw a turning point in the fermentation of anti-state and federalist sentiments

Source documents:
- Title: Anarchism, Section: Start
  Chunk: Anarchism is a political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive forms of hierarchy. Anarchism calls for the abolition of the state, which it holds...

- Title: Anarchism, Section: Before the establishment of towns and cities, an established authority did not exist. It was after the creation of institutions of authority that anarchistic ideas espoused as a reaction. The most notable precursors to anarchism in the ancient world were in China and Greece. In China, philosophical anarchism (the discussion on the legitimacy of the state) was delineated by Taoist philosophers Zhuang Zhou and Laozi. Alongside Stoicism, Taoism has been said to have had "significant a

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Question: What was the significance of the 1999 Seattle WTO protests?
Answer: rioting, property destruction and violent confrontations with the police

Source documents:
- Title: Anarchism, Section: Modern era
  Chunk: anarchism began to take form at this time and influenced anarchism's move from a Eurocentric demographic. This coincided with its failure to gain traction in Northern Europe and its unprecedented heig...

- Title: Anarchism, Section: Modern era
  Chunk: Other organisational tactics pioneered in this time include affinity groups, security culture and the use of decentralised technologies such as the Internet. A significant event of this period was the...

- Title: Alabama, Section: Start
  Chunk: Crow laws to disenfranchise and discriminate against African Americans from the late 19th century up until the 1960s.   In the early 20th century, despite the growth of major industries and urban cent...

- Title: Alabama, Section: Etymology
  Chunk: were plagued with allegations