### Import RAG Evaluation Utilities

In [19]:
from rag_evaluation import(
    load_pipeline,
    generate_predictions,
    run_evaluation,
    write_results,
    compute_accuracy,
    check_faithfulness,
    results_to_csv,
    results_to_csv_flat
)

from langchain_core.tracers.context import tracing_v2_enabled


In [20]:
qa_chain, chat_model = load_pipeline(return_source_documents=False)

In [21]:
import logging
# This will log all langsmith internals to stdout
langsmith_logger = logging.getLogger("langsmith")
langsmith_logger.setLevel(logging.DEBUG)
logging.basicConfig() 

In [23]:
examples = [
    {
        "query": "What does the speaker say about tips and tricks for IELTS listening?",
        "answer": "Tips and tricks do not work and only prolong your IELTS journey. :contentReference[oaicite:0]{index=0}:contentReference[oaicite:1]{index=1}"
    },
    {
        "query": "How much English do most students actually listen to each week?",
        "answer": "Very few students even listen for one hour per week. :contentReference[oaicite:2]{index=2}:contentReference[oaicite:3]{index=3}"
    },
    {
        "query": "What is 'active listening' according to the video?",
        "answer": "Focusing on one aspect at a time—e.g., writing down unfamiliar words and checking their spelling—to improve that specific skill. :contentReference[oaicite:4]{index=4}:contentReference[oaicite:5]{index=5}"
    },
    {
        "query": "Why must your spelling be 100% correct when transferring answers?",
        "answer": "Any single-letter mistake makes the answer wrong, so you must be very careful. :contentReference[oaicite:6]{index=6}:contentReference[oaicite:7]{index=7}"
    },
    {
        "query": "What is the purpose of keeping a notebook during practice?",
        "answer": "To record mistakes, spot patterns on review, and correct them over time. :contentReference[oaicite:8]{index=8}:contentReference[oaicite:9]{index=9}"
    },
    {
        "query": "Besides listening, what other abilities does the listening test assess?",
        "answer": "It also tests reading, thinking, spelling, grammar, and vocabulary. :contentReference[oaicite:10]{index=10}:contentReference[oaicite:11]{index=11}"
    },
    {
        "query": "Why is improving vocabulary essential for reaching band 8?",
        "answer": "Without strong vocabulary, no strategy or tips will get you to near-native level. :contentReference[oaicite:12]{index=12}:contentReference[oaicite:13]{index=13}"
    },
    {
        "query": "What does the speaker recommend combining with daily listening?",
        "answer": "Daily reading practice to reinforce vocabulary growth. :contentReference[oaicite:14]{index=14}:contentReference[oaicite:15]{index=15}"
    },
    {
        "query": "What does 'practicing strategically' mean?",
        "answer": "Focusing on targeted strategies rather than simply doing endless practice tests to avoid wasting time. :contentReference[oaicite:16]{index=16}:contentReference[oaicite:17]{index=17}"
    },
    {
        "query": "Why have a strategy for each question type?",
        "answer": "So you automatically know what to do on test day, reducing cognitive load and boosting accuracy. :contentReference[oaicite:18]{index=18}:contentReference[oaicite:19]{index=19}"
    },
    {
        "query": "What activity helps improve spelling through active listening?",
        "answer": "Writing down words you’re unsure of during listening, then checking and reviewing their spelling. :contentReference[oaicite:20]{index=20}:contentReference[oaicite:21]{index=21}"
    },
    {
        "query": "What does the speaker say about using translations to learn vocabulary?",
        "answer": "Translations waste time; you must learn word usage, collocations, and grammar in English. :contentReference[oaicite:22]{index=22}:contentReference[oaicite:23]{index=23}"
    },
    {
        "query": "When should you read the questions during the listening test?",
        "answer": "Before the recording starts, so you already have an idea of possible answers. :contentReference[oaicite:24]{index=24}:contentReference[oaicite:25]{index=25}"
    },
    {
        "query": "Why listen to a range of accents?",
        "answer": "To get comfortable with different accents like Scottish or Canadian, as all can appear in the test. :contentReference[oaicite:26]{index=26}:contentReference[oaicite:27]{index=27}"
    },
    {
        "query": "What’s wrong with cramming 12 hours of listening right before the test?",
        "answer": "Inconsistent marathon sessions are far less effective than consistent daily practice. :contentReference[oaicite:28]{index=28}:contentReference[oaicite:29]{index=29}"
    },
    {
        "query": "What should you do if you’re unfamiliar with a certain accent?",
        "answer": "Find podcasts or videos featuring that accent—e.g., Canadian news for Canadian accent. :contentReference[oaicite:30]{index=30}:contentReference[oaicite:31]{index=31}"
    },
    {
        "query": "Which resources does the speaker suggest for daily listening?",
        "answer": "Podcasts, radio, YouTube, TED Talks, or real conversations if you’re in an English-speaking country. :contentReference[oaicite:32]{index=32}:contentReference[oaicite:33]{index=33}"
    },
    {
        "query": "Besides vocabulary, what else can active listening improve?",
        "answer": "Grammar, pronunciation, and accent awareness by focusing on specific language features. :contentReference[oaicite:34]{index=34}:contentReference[oaicite:35]{index=35}"
    },
    {
        "query": "How can your phone help you improve listening habits?",
        "answer": "Many apps and iPhone updates track your listening time, helping you build daily consistency. :contentReference[oaicite:36]{index=36}:contentReference[oaicite:37]{index=37}"
    },
    {
        "query": "What is the speaker’s view on using books for listening practice?",
        "answer": "Books won’t help your listening skills; you must use actual audio content. :contentReference[oaicite:38]{index=38}:contentReference[oaicite:39]{index=39}"
    },
    {
        "query": "Why is using synonyms important in IELTS?",
        "answer": "This information is not mentioned in the transcript of the video."
    },
    {
        "query": "What is a synonym for the word 'say' mentioned in the video?",
        "answer": "This information is not mentioned in the transcript of the video."
    },
    {
        "query": "How can you paraphrase 'give' in a more polite way?",
        "answer": "This information is not mentioned in the transcript of the video."
    },
    {
        "query": "What advice does the video give about using the word 'for example'?",
        "answer": "This information is not mentioned in the transcript of the video."
    }
]


In [None]:
# from langchain.evaluation.qa import QAEvalChain, QAGenerateChain
# TRANSCRIPT_FILE= './transcripts/How_to_get_a_Band_8_in_IELTS_listening.txt'
# # 1. Load the full transcript text
# with open(TRANSCRIPT_FILE, 'r', encoding='utf-8') as f:
#         transcript = f.read()

#     # 2. Initialize the generation chain
# gen_chain = QAGenerateChain.from_llm(chat_model)

#     # 3. Loop to generate one QA pair per iteration
# inputs = [{"doc": transcript}]
# examples = gen_chain.generate(inputs)

# examples

In [24]:
with tracing_v2_enabled():  
    predictions = generate_predictions(qa_chain, examples)
    predictions



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m

In [25]:
predictions

[{'prediction': 'The speaker mentions that tips or tricks do not work for improving IELTS listening skills. \n\nRelevance score: 5'},
 {'prediction': 'Most students actually listen to even one hour per week. \n\nRelevance score: 5'},
 {'prediction': "Active listening is when you focus on one thing at a time and do something while you're listening. It involves writing down words you're unsure of the spelling, checking them in a dictionary, and actively working to improve specific areas like vocabulary, grammar, accent, pronunciation, and spelling. \n\nRelevance score: 5"},
 {'prediction': 'Your spelling must be 100% correct when transferring answers because even one letter out of place will be considered wrong. \n\nRelevance score: 5'},
 {'prediction': 'The purpose of keeping a notebook during practice is to record mistakes, review them, identify patterns, and correct them. \n\nRelevance score: 5'},
 {'prediction': 'The listening test also assesses reading, thinking, spelling, grammar, 

In [26]:
results = run_evaluation(chat_model, examples, predictions)

In [None]:
results

In [27]:
accuracy = compute_accuracy(results)
accuracy

95.83333333333334

In [None]:
write_results(examples, predictions, results)

In [28]:
qa_chain_2, chat_model = load_pipeline(return_source_documents=True)

In [29]:
with tracing_v2_enabled():  
    results = check_faithfulness(examples= examples, qa_chain=qa_chain_2, llm= chat_model)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m

In [30]:
results

[{'query': 'What does the speaker say about tips and tricks for IELTS listening?',
  'answer': 'The speaker says that tips or tricks do not work for IELTS listening. \n\nRelevance score: 5',
  'context': "# Transcript for How_to_get_a_Band_8_in_IELTS_listeningyou improve your IELTS score or which podcast will help you improve your IELTS score or which TED Talks. It's about consistently doing it every single day. Ten minutes a day or 30 minutes a day or one hour a day is way more powerful and more impactful than panicking a week before your test and listening to 12 hours of podcasts or 12 hours of TED Talks a week before your test. It's not going to help you. And also the great thing about everything being on our mobiles, everything being on our phones is you can listen to this anywhere. On the way to work, on the way back from work, at lunch time,He is giving us simple. Good, good advice. Yeah, because it's really difficult for me to find questions when all of you are just having littl

In [31]:
results_to_csv_flat(results, './evaluation/faithfulness_results.csv')