In [8]:
import os
import json
import uuid
import time
import requests
from tqdm import tqdm
import pandas as pd

In [10]:
def run_notebook_test(run_label, api_url, course_id, input_file, moodle_token=None, output_folder=None, mode=None):
    #output_folder = "philosophy"
    os.makedirs(output_folder, exist_ok=True)
    file_tag = f"_{run_label}" if run_label else ""
    filename = f"results{file_tag}.json"
    output_path = os.path.join(output_folder, filename)
    
    print(f"Test Start: '{run_label}'")
    print(f"Results will be saved in: {output_path}")
    
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            test_data = json.load(f)
    except FileNotFoundError:
        print(f"ERROR: Input file not found: {input_file}")
        return None

    results = []
    headers = {"Content-Type": "application/json"}
    if moodle_token:
        headers["Authorization"] = f"Bearer {moodle_token}"
    
    n = 0
    for item in tqdm(test_data, desc="Test progress"):
        n += 1
        question = item.get("question")
        ground_truth = item.get("ground_truth")
        
        if not question: 
            continue

        session_id = str(uuid.uuid4())
        
        payload = {
            "message": question,
            "courseId": str(course_id), 
            "sessionId": session_id,
            "history": [],
            "mode": mode
        }

        start_time = time.time()
        bot_answer = ""
        contexts = []
        error_msg = None
        duration = 0 

        try:
            response = requests.post(api_url, json=payload, headers=headers, timeout=180)
            
            if response.status_code == 200:
                duration = time.time() - start_time
                resp_data = response.json()
                bot_answer = resp_data.get("response", "")
                raw_sources = resp_data.get("sources", [])
                if raw_sources:
                    for src in raw_sources:
                        text_content = src.get("chunkText", "")
                        if not text_content:
                            text_content = str(src)
                        contexts.append(text_content)
            else:
                error_msg = f"HTTP {response.status_code}"
                bot_answer = f"ERROR: {error_msg}"
                if response.status_code != 422: 
                    print(f"\nSERVER ERROR: {response.status_code} for question: {question}")

        except Exception as e:
            error_msg = str(e)
            bot_answer = "ERROR: Timeout/Connection"
            duration = 180
            print(f"\n Timeout or Connection Error: {e}")
        
        if n == 1:
            print(f"\n First context preview (truncated): {str(contexts)[:200]}...")

        results.append({
            "config_label": run_label,
            "question": question,
            "answer": bot_answer,
            "contexts": contexts,
            "ground_truth": ground_truth,
            "latency": round(duration, 2),
            "error": error_msg
        })

        time.sleep(2.0)

    try:
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        print(f"Success! JSON saved at: {output_path}")
    except Exception as e:
        print(f"Error saving JSON file: {e}")

    return results

In [11]:
def run_pipeline_test(run_label, api_url, course_id, input_file, moodle_token=None, output_folder="philosophy"):
    results_list = run_notebook_test(
        run_label=run_label,
        api_url=api_url,
        course_id=course_id,
        input_file=input_file,
        moodle_token=moodle_token,
        output_folder=output_folder
    )
    if results_list is not None:
        df_results = pd.DataFrame(results_list)
        print(f"\nŚredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
        display(df_results[['question', 'answer', 'latency', 'contexts']].head())
    return df_results    

# statistics course

In [None]:
API_URL = "https://chatbot-backend-production-4b66.up.railway.app/chat"  # Lub adres Railway
TEST_COURSE_ID = "11"                      # ID kursu z materiałami
MOODLE_TOKEN = ''               # Opcjonalnie token
INPUT_FILE = "question_answer_statistics.json"

In [23]:
RUN_LABEL = "statistics_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist08_temp01" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist08_temp01'
Results will be saved in: statistics\results_statistics_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist08_temp01.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A ...


Test progress: 100%|██████████| 15/15 [11:23<00:00, 45.56s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist08_temp01.json

Średni czas odpowiedzi: 43.55s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, Yule revive...",22.25,[cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic ...
1,What are the three components that make up the...,"Okay, let's break down the components of the e...",25.38,[and out-of-sample) for supervised learning\n\...
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",49.64,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"According to the course materials, the `e1071`...",150.84,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
4,What specific node impurity measure is used by...,"Based on the provided context, the CART (Class...",35.05,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_1000_overlap_200_voya...,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, Yule revive...",[cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic ...,The name 'logistic' was revived by Yule in a p...,22.25,
1,statistics_results_chunk_1000_overlap_200_voya...,What are the three components that make up the...,"Okay, let's break down the components of the e...",[and out-of-sample) for supervised learning\n\...,According to the Bias-Variance decomposition t...,25.38,
2,statistics_results_chunk_1000_overlap_200_voya...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,49.64,
3,statistics_results_chunk_1000_overlap_200_voya...,Which R package is required to implement Suppo...,"According to the course materials, the `e1071`...",[http://ocw.mit.edu\n\n15.097 Prediction: Mach...,The 'e1071' package is the additional R packag...,150.84,
4,statistics_results_chunk_1000_overlap_200_voya...,What specific node impurity measure is used by...,"Based on the provided context, the CART (Class...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...","For classification splitting, the CART algorit...",35.05,
5,statistics_results_chunk_1000_overlap_200_voya...,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...",[tall the arules package and load it. See Sect...,"Before starting, the user must choose the valu...",47.48,
6,statistics_results_chunk_1000_overlap_200_voya...,How does the Naïve Bayes algorithm handle high...,"Okay, I can help you understand how the Naïve ...",[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,29.94,
7,statistics_results_chunk_1000_overlap_200_voya...,Describe the logic behind using the 'Gain Rati...,The course material explains that while Inform...,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,Information Gain tends to favor attributes tha...,31.03,
8,statistics_results_chunk_1000_overlap_200_voya...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down why Lemma 1 tells us th...",[Clustering\n\nMIT 15.097 Course Notes Cynthia...,Lemma 1 proves that for any set C and any repr...,50.57,
9,statistics_results_chunk_1000_overlap_200_voya...,What are the three options C4.5 considers when...,"Okay, I can help you with that! According to t...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,"At each node, C4.5 recursively evaluates three...",16.1,


In [24]:
RUN_LABEL = "statistics_results_chunk_1000_overlap_200_voyagelarge2_gemini20_flash_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_1000_overlap_200_voyagelarge2_gemini20_flash_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_1000_overlap_200_voyagelarge2_gemini20_flash_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['http://ocw.mit.edu\n\n15.097 Prediction: Machine Learning and Statistics\n\nSpring 2012\n\nFor information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.', 'Regul...


Test progress: 100%|██████████| 15/15 [11:28<00:00, 45.88s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_1000_overlap_200_voyagelarge2_gemini20_flash_topk10_dist08_temp03.json

Średni czas odpowiedzi: 43.87s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, Yule revived th...",17.09,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
1,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...",19.15,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",51.54,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,Based on the provided text:\n\n* **Support V...,151.3,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"Based on the provided context, the CART (Class...",25.25,"[o play tennis, analyze C-section risk, etc.\n..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_1000_overlap_200_voya...,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, Yule revived th...",[http://ocw.mit.edu\n\n15.097 Prediction: Mach...,The name 'logistic' was revived by Yule in a p...,17.09,
1,statistics_results_chunk_1000_overlap_200_voya...,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...",[http://ocw.mit.edu\n\n15.097 Prediction: Mach...,According to the Bias-Variance decomposition t...,19.15,
2,statistics_results_chunk_1000_overlap_200_voya...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,51.54,
3,statistics_results_chunk_1000_overlap_200_voya...,Which R package is required to implement Suppo...,Based on the provided text:\n\n* **Support V...,[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,151.3,
4,statistics_results_chunk_1000_overlap_200_voya...,What specific node impurity measure is used by...,"Based on the provided context, the CART (Class...","[o play tennis, analyze C-section risk, etc.\n...","For classification splitting, the CART algorit...",25.25,
5,statistics_results_chunk_1000_overlap_200_voya...,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...",[ic regression. Using the same notation as in ...,"Before starting, the user must choose the valu...",54.52,
6,statistics_results_chunk_1000_overlap_200_voya...,How does the Naïve Bayes algorithm handle high...,That's a great question! You're right to focus...,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,33.78,
7,statistics_results_chunk_1000_overlap_200_voya...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the benefit of using 'Gain...","[o play tennis, analyze C-section risk, etc.\n...",Information Gain tends to favor attributes tha...,30.15,
8,statistics_results_chunk_1000_overlap_200_voya...,"According to Lemma 1 in the clustering notes, ...",Lemma 1 provides a way to understand why placi...,[¯y = Ey[y|x].\n\nSo we know for each x what t...,Lemma 1 proves that for any set C and any repr...,48.9,
9,statistics_results_chunk_1000_overlap_200_voya...,What are the three options C4.5 considers when...,C4.5 considers three options when deciding whe...,"[o play tennis, analyze C-section risk, etc.\n...","At each node, C4.5 recursively evaluates three...",15.84,


In [25]:
RUN_LABEL = "statistics_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A ...


Test progress: 100%|██████████| 15/15 [09:21<00:00, 37.46s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03.json

Średni czas odpowiedzi: 35.45s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",18.17,[cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic ...
1,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...",24.68,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",47.59,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,Based on the provided course materials:\n\n* ...,28.66,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",35.56,[Example of how K-Means could converge to the ...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_1000_overlap_200_text...,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",[cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic ...,The name 'logistic' was revived by Yule in a p...,18.17,
1,statistics_results_chunk_1000_overlap_200_text...,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,24.68,
2,statistics_results_chunk_1000_overlap_200_text...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,47.59,
3,statistics_results_chunk_1000_overlap_200_text...,Which R package is required to implement Suppo...,Based on the provided course materials:\n\n* ...,[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,28.66,
4,statistics_results_chunk_1000_overlap_200_text...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[Example of how K-Means could converge to the ...,"For classification splitting, the CART algorit...",35.56,
5,statistics_results_chunk_1000_overlap_200_text...,Explain the two iterative steps of the K-Means...,"Okay, I can definitely help you understand the...",[tall the arules package and load it. See Sect...,"Before starting, the user must choose the valu...",45.0,
6,statistics_results_chunk_1000_overlap_200_text...,How does the Naïve Bayes algorithm handle high...,That's a great question! You're right to think...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,30.28,
7,statistics_results_chunk_1000_overlap_200_text...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the logic behind using the...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,Information Gain tends to favor attributes tha...,39.35,
8,statistics_results_chunk_1000_overlap_200_text...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down how Lemma 1 justifies t...",[Clustering\n\nMIT 15.097 Course Notes Cynthia...,Lemma 1 proves that for any set C and any repr...,38.54,
9,statistics_results_chunk_1000_overlap_200_text...,What are the three options C4.5 considers when...,C4.5 considers these three options when decidi...,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,"At each node, C4.5 recursively evaluates three...",37.43,


In [26]:
RUN_LABEL = "statistics_results_chunk_1000_overlap_200_text_to_embedding_large_gemini20_flash_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_1000_overlap_200_text_to_embedding_large_gemini20_flash_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_1000_overlap_200_text_to_embedding_large_gemini20_flash_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A ...


Test progress: 100%|██████████| 15/15 [09:18<00:00, 37.20s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_1000_overlap_200_text_to_embedding_large_gemini20_flash_topk10_dist08_temp03.json

Średni czas odpowiedzi: 35.19s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",20.61,[cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic ...
1,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...",30.65,[Rtest is also called the true risk or the tes...
2,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",39.23,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,Based on the provided course materials:\n\n* ...,30.26,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",49.3,[I have to warn you that C4.5 and CART are not...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_1000_overlap_200_text...,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",[cw.mit.edu/fairuse.\n\n−yif(xi)\n• “logistic ...,The name 'logistic' was revived by Yule in a p...,20.61,
1,statistics_results_chunk_1000_overlap_200_text...,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...",[Rtest is also called the true risk or the tes...,According to the Bias-Variance decomposition t...,30.65,
2,statistics_results_chunk_1000_overlap_200_text...,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,39.23,
3,statistics_results_chunk_1000_overlap_200_text...,Which R package is required to implement Suppo...,Based on the provided course materials:\n\n* ...,[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,30.26,
4,statistics_results_chunk_1000_overlap_200_text...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[I have to warn you that C4.5 and CART are not...,"For classification splitting, the CART algorit...",49.3,
5,statistics_results_chunk_1000_overlap_200_text...,Explain the two iterative steps of the K-Means...,"Okay, I can definitely help you understand the...",[tall the arules package and load it. See Sect...,"Before starting, the user must choose the valu...",24.59,
6,statistics_results_chunk_1000_overlap_200_text...,How does the Naïve Bayes algorithm handle high...,The course materials explain that Naïve Bayes ...,[K-Nearest Neighbors\n\n• Classify using the m...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,43.95,
7,statistics_results_chunk_1000_overlap_200_text...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the reasoning behind using...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,Information Gain tends to favor attributes tha...,27.61,
8,statistics_results_chunk_1000_overlap_200_text...,"According to Lemma 1 in the clustering notes, ...",Lemma 1 provides a way to understand why placi...,[Clustering\n\nMIT 15.097 Course Notes Cynthia...,Lemma 1 proves that for any set C and any repr...,43.45,
9,statistics_results_chunk_1000_overlap_200_text...,What are the three options C4.5 considers when...,C4.5 considers three options when deciding whe...,"[o play tennis, analyze C-section risk, etc.\n...","At each node, C4.5 recursively evaluates three...",35.16,


In [27]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:48<00:00, 39.26s/it]


Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03.json

Średni czas odpowiedzi: 37.24s


Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",32.37,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...
1,What are the three components that make up the...,"Okay, I can help you understand the decomposit...",24.86,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",48.59,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",32.7,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",39.0,[istance) to the centroid (want it to be small...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...,The name 'logistic' was revived by Yule in a p...,32.37,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you understand the decomposit...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,24.86,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,48.59,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,32.7,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[istance) to the centroid (want it to be small...,"For classification splitting, the CART algorit...",39.0,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can definitely help you understand the...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",37.71,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,"That's a great question! You're right, high-di...",[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,42.76,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The course materials explain that while Inform...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,23.46,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down how Lemma 1 shows that ...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,46.54,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can help you with that! According to t...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",27.14,


In [28]:
RUN_LABEL = "statistics_results_chunk_2000_overlap_400_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_2000_overlap_400_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_2000_overlap_400_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['> norm_vec <- rnorm(n=10, mean=5, sd=2)\n> exp_vec <- rexp(n=100, rate=3)\n> pois_vec <- rpois(n=50, lambda=6)\n> unif_vec <- runif(n=20, min=1, max=9)\n> bin_vec <- rbinom(n=20, size=1000, prob=0.7...


Test progress: 100%|██████████| 15/15 [09:44<00:00, 38.99s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_2000_overlap_400_text_to_embedding_small_gemini20_flash_topk10_dist08_temp03.json

Średni czas odpowiedzi: 36.97s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, the name ""logistic...",18.84,"[> norm_vec <- rnorm(n=10, mean=5, sd=2)\n> ex..."
1,What are the three components that make up the...,"Okay, great question!\n\nAccording to the cour...",30.9,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",51.76,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,Based on the provided course materials:\n\n* ...,21.22,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"Based on the course materials, the CART (Class...",46.03,[Rule Mining and the Apriori Algorithm\n\nMIT ...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_2000_overlap_400_text...,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, the name ""logistic...","[> norm_vec <- rnorm(n=10, mean=5, sd=2)\n> ex...",The name 'logistic' was revived by Yule in a p...,18.84,
1,statistics_results_chunk_2000_overlap_400_text...,What are the three components that make up the...,"Okay, great question!\n\nAccording to the cour...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,30.9,
2,statistics_results_chunk_2000_overlap_400_text...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,51.76,
3,statistics_results_chunk_2000_overlap_400_text...,Which R package is required to implement Suppo...,Based on the provided course materials:\n\n* ...,[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,21.22,
4,statistics_results_chunk_2000_overlap_400_text...,What specific node impurity measure is used by...,"Based on the course materials, the CART (Class...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,"For classification splitting, the CART algorit...",46.03,
5,statistics_results_chunk_2000_overlap_400_text...,Explain the two iterative steps of the K-Means...,"Okay, I can definitely help you understand the...",[ost of the following algorithms (as well as l...,"Before starting, the user must choose the valu...",24.86,
6,statistics_results_chunk_2000_overlap_400_text...,How does the Naïve Bayes algorithm handle high...,That's a great question! The Naïve Bayes algor...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,49.5,
7,statistics_results_chunk_2000_overlap_400_text...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the rationale behind using...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,Information Gain tends to favor attributes tha...,28.21,
8,statistics_results_chunk_2000_overlap_400_text...,"According to Lemma 1 in the clustering notes, ...","Lemma 1 states: cost(C; z) = cost(C, mean(C)) ...",[It turns out that the f ∗ that minimizes the ...,Lemma 1 proves that for any set C and any repr...,54.84,
9,statistics_results_chunk_2000_overlap_400_text...,What are the three options C4.5 considers when...,C4.5 considers these three options when decidi...,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,"At each node, C4.5 recursively evaluates three...",22.87,


In [29]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk15_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk15_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk15_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:45<00:00, 39.02s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk15_dist08_temp03.json

Średni czas odpowiedzi: 37.01s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",19.59,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...
1,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...",32.35,"[ β1X1 + β2X2 + · · · + βkXk + ε,\n\nwhere ε i..."
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",44.22,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,Based on the provided text:\n\n* **Support V...,33.96,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",46.11,[Rule Mining and the Apriori Algorithm\n\nMIT ...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...,The name 'logistic' was revived by Yule in a p...,19.59,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you with that!\n\nBased on th...","[ β1X1 + β2X2 + · · · + βkXk + ε,\n\nwhere ε i...",According to the Bias-Variance decomposition t...,32.35,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,44.22,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,Based on the provided text:\n\n* **Support V...,[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,33.96,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,"For classification splitting, the CART algorit...",46.11,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can definitely help you understand the...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",27.3,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,The course material explains that Naïve Bayes ...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,41.34,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the logic behind using the...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,Information Gain tends to favor attributes tha...,35.93,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...",Lemma 1 provides a way to understand why placi...,[Clustering\n\nMIT 15.097 Course Notes Cynthia...,Lemma 1 proves that for any set C and any repr...,38.52,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can help you with that! According to t...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,"At each node, C4.5 recursively evaluates three...",25.25,


In [None]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk5_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk5_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk5_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer’s “The Origin of Logistic Regression”\n\nOrigins: 19th Century.\n\n• Studying growth of pop...


Test progress: 100%|██████████| 15/15 [09:16<00:00, 37.13s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk5_dist08_temp03.json

Średni czas odpowiedzi: 35.12s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",23.37,[Logistic Regression\n\nMIT 15.097 Course Note...
1,What are the three components that make up the...,"Okay, I can help you understand the decomposit...",23.41,[It turns out that the f ∗ that minimizes the ...
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",46.94,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",21.53,"[nclude “base,” “utils,” “graphics,” and “stat..."
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",50.0,[I have to warn you that C4.5 and CART are not...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",[Logistic Regression\n\nMIT 15.097 Course Note...,The name 'logistic' was revived by Yule in a p...,23.37,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you understand the decomposit...",[It turns out that the f ∗ that minimizes the ...,According to the Bias-Variance decomposition t...,23.41,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,46.94,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...","[nclude “base,” “utils,” “graphics,” and “stat...",The 'e1071' package is the additional R packag...,21.53,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[I have to warn you that C4.5 and CART are not...,"For classification splitting, the CART algorit...",50.0,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...",[an objects assigned to different clusters. We...,"Before starting, the user must choose the valu...",29.53,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,"Okay, I can help you understand how the Naïve ...",[Na¨ıve Bayes\n\nMIT 15.097 Course Notes Cynth...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,36.32,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the rationale behind using...",[Two examples of de cision trees. Opti on\n\nf...,Information Gain tends to favor attributes tha...,34.93,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down why Lemma 1 tells us th...","[While we’re analyzing, we’ll need to consider...",Lemma 1 proves that for any set C and any repr...,36.82,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can help you with that! Based on the c...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",30.85,


In [31]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist06_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist06_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist06_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer’s “The Origin of Logistic Regression”\n\nOrigins: 19th Century.\n\n• Studying growth of pop...


Test progress: 100%|██████████| 15/15 [09:48<00:00, 39.21s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist06_temp03.json

Średni czas odpowiedzi: 37.19s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, the name ""logistic...",19.03,[Logistic Regression\n\nMIT 15.097 Course Note...
1,What are the three components that make up the...,"Okay, I can help you with that! Based on the c...",31.28,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",51.82,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",26.53,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",49.83,[I have to warn you that C4.5 and CART are not...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, the name ""logistic...",[Logistic Regression\n\nMIT 15.097 Course Note...,The name 'logistic' was revived by Yule in a p...,19.03,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you with that! Based on the c...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,31.28,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,51.82,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,26.53,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[I have to warn you that C4.5 and CART are not...,"For classification splitting, the CART algorit...",49.83,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",32.22,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,"Okay, I can explain how the Naïve Bayes algori...",[Na¨ıve Bayes\n\nMIT 15.097 Course Notes Cynth...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,42.49,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The course materials explain that while Inform...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,24.77,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down how Lemma 1 shows that ...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,52.12,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can explain the three options C4.5 con...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",21.51,


In [32]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist05_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist05_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist05_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer’s “The Origin of Logistic Regression”\n\nOrigins: 19th Century.\n\n• Studying growth of pop...


Test progress: 100%|██████████| 15/15 [09:46<00:00, 39.07s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist05_temp03.json

Średni czas odpowiedzi: 37.06s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",32.7,[Logistic Regression\n\nMIT 15.097 Course Note...
1,What are the three components that make up the...,"Okay, let's break down the decomposition of th...",24.16,[It turns out that the f ∗ that minimizes the ...
2,"In the context of the Apriori algorithm, what ...",The downward closure property of support is a ...,48.32,[We want to find all strong rules. These are r...
3,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",34.54,"[nclude “base,” “utils,” “graphics,” and “stat..."
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,30.95,[CART - Classification and Regression Trees (B...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",[Logistic Regression\n\nMIT 15.097 Course Note...,The name 'logistic' was revived by Yule in a p...,32.7,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, let's break down the decomposition of th...",[It turns out that the f ∗ that minimizes the ...,According to the Bias-Variance decomposition t...,24.16,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...",The downward closure property of support is a ...,[We want to find all strong rules. These are r...,Downward closure is a monotonicity property st...,48.32,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...","[nclude “base,” “utils,” “graphics,” and “stat...",The 'e1071' package is the additional R packag...,34.54,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,[CART - Classification and Regression Trees (B...,"For classification splitting, the CART algorit...",30.95,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Alright, let's break down the K-Means algorith...",[The K-Means Algorithm\n\nChoose the value of ...,"Before starting, the user must choose the valu...",37.63,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,"Okay, I can help you understand how the Naive ...",[s\nover an unknown distribution over X × Y.\n...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,44.35,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the rationale behind using...","[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,31.14,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down why Lemma 1 tells us th...","[While we’re analyzing, we’ll need to consider...",Lemma 1 proves that for any set C and any repr...,39.09,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can clarify the options C4.5 considers...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",42.1,


In [33]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist045_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist045_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist045_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['He published in 3 papers between 1838 and 1847. The first paper demonstrated that the curve agrees very well with the actual course of the population in France, Belgium, Essex, and Russia for period...


Test progress: 100%|██████████| 15/15 [09:23<00:00, 37.60s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist045_temp03.json

Średni czas odpowiedzi: 35.59s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, Yule revived th...",19.5,[He published in 3 papers between 1838 and 184...
1,What are the three components that make up the...,"Okay, I can help you understand the decomposit...",30.24,[lp us figure out how to minimize it.\nThis is...
2,"In the context of the Apriori algorithm, what ...","The 'downward closure' property of support, in...",56.44,[We want to find all strong rules. These are r...
3,Which R package is required to implement Suppo...,Based on the material provided:\n\nTo implemen...,19.13,"[nclude “base,” “utils,” “graphics,” and “stat..."
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,48.8,[]


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, Yule revived th...",[He published in 3 papers between 1838 and 184...,The name 'logistic' was revived by Yule in a p...,19.5,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you understand the decomposit...",[lp us figure out how to minimize it.\nThis is...,According to the Bias-Variance decomposition t...,30.24,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The 'downward closure' property of support, in...",[We want to find all strong rules. These are r...,Downward closure is a monotonicity property st...,56.44,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,Based on the material provided:\n\nTo implemen...,"[nclude “base,” “utils,” “graphics,” and “stat...",The 'e1071' package is the additional R packag...,19.13,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,[],"For classification splitting, the CART algorit...",48.8,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the iterative ...",[The K-Means Algorithm\n\nChoose the value of ...,"Before starting, the user must choose the valu...",20.34,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,"That's a great question! You're right, high-di...",[e random process) of how x is generated.\n\nN...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,47.08,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the logic behind using the...",[Actually Patrons has the highest gain among t...,Information Gain tends to favor attributes tha...,36.96,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, I can help you understand why the mean i...","[While we’re analyzing, we’ll need to consider...",Lemma 1 proves that for any set C and any repr...,44.14,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can help you with that!\n\nBased on th...","[Gin\n\nf p bet\n\nclassifica\n\ny,\n\nrop\n\n...","At each node, C4.5 recursively evaluates three...",28.35,


In [6]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist07_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist07_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist07_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:08<00:00, 36.58s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist07_temp03.json

Średni czas odpowiedzi: 34.57s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",22.69,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...
1,What are the three components that make up the...,"Okay, I can help you understand the three comp...",24.58,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",44.48,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",18.88,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",43.6,[istance) to the centroid (want it to be small...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...,The name 'logistic' was revived by Yule in a p...,22.69,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you understand the three comp...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,24.58,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support, in...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,44.48,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,18.88,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[istance) to the centroid (want it to be small...,"For classification splitting, the CART algorit...",43.6,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can definitely help you understand the...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",40.88,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,That's a great question! It touches on a pract...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,36.9,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The course materials explain that while Inform...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,25.03,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down why Lemma 1 tells us th...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,47.81,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can help you with the options C4.5 con...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",22.41,


In [7]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_llama_3_1_8B_instruct_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_llama_3_1_8B_instruct_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_llama_3_1_8B_instruct_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [10:02<00:00, 40.17s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_llama_3_1_8B_instruct_topk10_dist08_temp03.json

Średni czas odpowiedzi: 38.16s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, the term ""l...",35.87,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...
1,What are the three components that make up the...,"Based on the provided course materials, the th...",25.02,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","In the context of the Apriori algorithm, the d...",46.82,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"According to the course materials, the require...",20.85,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"Based on the course materials, the specific no...",48.67,[istance) to the centroid (want it to be small...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, the term ""l...",[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...,The name 'logistic' was revived by Yule in a p...,35.87,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Based on the provided course materials, the th...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,25.02,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","In the context of the Apriori algorithm, the d...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,46.82,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"According to the course materials, the require...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,20.85,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,"Based on the course materials, the specific no...",[istance) to the centroid (want it to be small...,"For classification splitting, the CART algorit...",48.67,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,I'd be happy to explain the two iterative step...,"[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",39.09,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,"According to the course materials, when dealin...",[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,43.84,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The Gain Ratio is an alternative to Informatio...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,23.88,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","According to Lemma 1, the cost function for a ...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,50.85,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"According to the course materials, when decidi...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",24.49,


In [8]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_mistral_nemo_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_mistral_nemo_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_mistral_nemo_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [10:07<00:00, 40.52s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_mistral_nemo_topk10_dist08_temp03.json

Średni czas odpowiedzi: 38.51s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,The term 'logistic' was revived in 1925 by Udn...,26.84,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...
1,What are the three components that make up the...,"Based on the provided context, the decompositi...",35.27,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...",The 'downward closure' property of support in ...,57.92,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"Based on the provided course materials, here a...",30.35,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,The specific node impurity measure used by the...,40.43,[istance) to the centroid (want it to be small...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,The term 'logistic' was revived in 1925 by Udn...,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...,The name 'logistic' was revived by Yule in a p...,26.84,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Based on the provided context, the decompositi...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,35.27,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...",The 'downward closure' property of support in ...,[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,57.92,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Based on the provided course materials, here a...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,30.35,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,The specific node impurity measure used by the...,[istance) to the centroid (want it to be small...,"For classification splitting, the CART algorit...",40.43,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,The K-Means algorithm is an iterative clusteri...,"[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",41.91,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,The Naïve Bayes algorithm handles this situati...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,29.82,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The logic behind using the 'Gain Ratio' instea...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,36.7,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","According to Lemma 1 in the clustering notes, ...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,48.71,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,The three options that C4.5 considers when dec...,"[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",21.92,


In [9]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gpt4o_mini_topk10_dist08_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gpt4o_mini_topk10_dist08_temp03'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gpt4o_mini_topk10_dist08_temp03.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [10:34<00:00, 42.31s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gpt4o_mini_topk10_dist08_temp03.json

Średni czas odpowiedzi: 40.30s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,The term 'logistic' was revived by Yule in a p...,24.95,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...
1,What are the three components that make up the...,The decomposition of the expected prediction e...,31.04,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...",The 'downward closure' property of support is ...,49.02,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"To implement Support Vector Machines (SVM), yo...",31.1,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,45.9,[istance) to the centroid (want it to be small...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,The term 'logistic' was revived by Yule in a p...,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...,The name 'logistic' was revived by Yule in a p...,24.95,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,The decomposition of the expected prediction e...,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,31.04,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...",The 'downward closure' property of support is ...,[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,49.02,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"To implement Support Vector Machines (SVM), yo...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,31.1,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,[istance) to the centroid (want it to be small...,"For classification splitting, the CART algorit...",45.9,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,The K-Means algorithm consists of two main ite...,"[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",33.15,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,The Naïve Bayes algorithm addresses the challe...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,46.49,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The logic behind using the Gain Ratio instead ...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,34.51,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","According to Lemma 1, the mean is the optimal ...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,38.48,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,C4.5 considers three options when deciding whe...,"[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",39.64,


In [10]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp01" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp01'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp01.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): []...


Test progress: 100%|██████████| 15/15 [17:04<00:00, 68.28s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp01.json

Średni czas odpowiedzi: 66.27s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the materials I have, it was Raymond ...",60.83,[]
1,What are the three components that make up the...,"Okay, I can help you with that! Based on the c...",75.6,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",65.0,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",64.31,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",80.2,[istance) to the centroid (want it to be small...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the materials I have, it was Raymond ...",[],The name 'logistic' was revived by Yule in a p...,60.83,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you with that! Based on the c...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,75.6,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,65.0,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,64.31,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,"For classification tasks, the CART (Classifica...",[istance) to the centroid (want it to be small...,"For classification splitting, the CART algorit...",80.2,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",58.3,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,That's a great question! It touches on a pract...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,64.26,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The course materials explain that while Inform...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,51.02,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down how Lemma 1 shows that ...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,82.46,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can help you with that! According to t...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",81.29,


In [11]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp05" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp05'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp05.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [16:03<00:00, 64.21s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp05.json

Średni czas odpowiedzi: 62.20s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, the name ""l...",70.63,[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...
1,What are the three components that make up the...,"Okay, I can help you with that! Based on the m...",66.52,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
2,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",68.1,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",54.97,[R for Machine Learning\n\nAllison Chang\n\n1 ...
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,47.87,[]


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, the name ""l...",[f(xi)\n• “logistic loss” log2 1 + e\n⇐= logis...,The name 'logistic' was revived by Yule in a p...,70.63,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you with that! Based on the m...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...",According to the Bias-Variance decomposition t...,66.52,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,68.1,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Okay, I can help you with that!\n\nBased on th...",[R for Machine Learning\n\nAllison Chang\n\n1 ...,The 'e1071' package is the additional R packag...,54.97,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,[],"For classification splitting, the CART algorit...",47.87,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can definitely help you understand the...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...","Before starting, the user must choose the valu...",52.8,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,That's a great question! It touches on a pract...,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,62.82,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,The course materials explain that while Inform...,"[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,78.47,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down how Lemma 1 shows that ...",[an objects assigned to different clusters. We...,Lemma 1 proves that for any set C and any repr...,53.8,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can explain the three options C4.5 con...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",52.98,


In [12]:
RUN_LABEL = "statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp07" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp07'
Results will be saved in: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp07.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): []...


Test progress: 100%|██████████| 15/15 [12:37<00:00, 50.49s/it]

Success! JSON saved at: statistics\results_statistics_results_chunk_512_overlap_100_text_to_embedding_small_gemini20_flash_topk10_dist08_temp07.json

Średni czas odpowiedzi: 48.47s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the materials I have, it was Raymond ...",92.86,[]
1,What are the three components that make up the...,"Okay, I can help you with that!\n\nThe decompo...",52.5,[]
2,"In the context of the Apriori algorithm, what ...","The 'downward closure' property of support, in...",54.19,[]
3,Which R package is required to implement Suppo...,"Based on the tutorial content, the `e1071` pac...",43.79,[]
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,42.09,[]


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,statistics_results_chunk_512_overlap_100_text_...,Who revived the term 'logistic' in 1925 to des...,"Based on the materials I have, it was Raymond ...",[],The name 'logistic' was revived by Yule in a p...,92.86,
1,statistics_results_chunk_512_overlap_100_text_...,What are the three components that make up the...,"Okay, I can help you with that!\n\nThe decompo...",[],According to the Bias-Variance decomposition t...,52.5,
2,statistics_results_chunk_512_overlap_100_text_...,"In the context of the Apriori algorithm, what ...","The 'downward closure' property of support, in...",[],Downward closure is a monotonicity property st...,54.19,
3,statistics_results_chunk_512_overlap_100_text_...,Which R package is required to implement Suppo...,"Based on the tutorial content, the `e1071` pac...",[],The 'e1071' package is the additional R packag...,43.79,
4,statistics_results_chunk_512_overlap_100_text_...,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,[],"For classification splitting, the CART algorit...",42.09,
5,statistics_results_chunk_512_overlap_100_text_...,Explain the two iterative steps of the K-Means...,"Okay, I can certainly help you understand the ...",[],"Before starting, the user must choose the valu...",22.65,
6,statistics_results_chunk_512_overlap_100_text_...,How does the Naïve Bayes algorithm handle high...,That's a great question! It touches on a commo...,[],Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,62.8,
7,statistics_results_chunk_512_overlap_100_text_...,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the logic behind using Gai...",[],Information Gain tends to favor attributes tha...,54.52,
8,statistics_results_chunk_512_overlap_100_text_...,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down why Lemma 1 in the clus...",[],Lemma 1 proves that for any set C and any repr...,22.25,
9,statistics_results_chunk_512_overlap_100_text_...,What are the three options C4.5 considers when...,"Okay, I can help you with that!\n\nBased on th...",[],"At each node, C4.5 recursively evaluates three...",46.88,


## Chat prompts tests
fixed parameters: CHAT_MODEL="google/gemini-2.0-flash-001", EMBED_MODEL="text-embedding-3-small", MAX_TOKENS="1024", RAG_CHUNK_OVERLAP="100", RAG_CHUNK_SIZE="512", RAG_DISTANCE_THRESHOLD="0.8"
,RAG_TOP_K="10", TEMPERATURE="0.3"


In [5]:
RUN_LABEL = "chat_prompt_baseline" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

Test Start: 'chat_prompt_baseline'
Results will be saved in: statistics/prompts\results_chat_prompt_baseline.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:49<00:00, 39.31s/it]


Success! JSON saved at: statistics/prompts\results_chat_prompt_baseline.json


TypeError: list indices must be integers or slices, not str

In [7]:
RUN_LABEL = "chat_prompt_fewshot" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

  output_folder="statistics\prompts"


Test Start: 'chat_prompt_fewshot'
Results will be saved in: statistics\prompts\results_chat_prompt_fewshot.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:37<00:00, 38.48s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_fewshot.json





In [8]:
RUN_LABEL = "chat_prompt_cot" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

  output_folder="statistics\prompts"


Test Start: 'chat_prompt_cot'
Results will be saved in: statistics\prompts\results_chat_prompt_cot.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:37<00:00, 38.48s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_cot.json





In [9]:
RUN_LABEL = "chat_prompt_socratic" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

  output_folder="statistics\prompts"


Test Start: 'chat_prompt_socratic'
Results will be saved in: statistics\prompts\results_chat_prompt_socratic.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:22<00:00, 37.52s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_socratic.json





In [10]:
RUN_LABEL = "chat_prompt_retrieval-optimized" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

  output_folder="statistics\prompts"


Test Start: 'chat_prompt_retrieval-optimized'
Results will be saved in: statistics\prompts\results_chat_prompt_retrieval-optimized.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:37<00:00, 38.51s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_retrieval-optimized.json





In [11]:
RUN_LABEL = "chat_prompt_error_correcting" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

  output_folder="statistics\prompts"


Test Start: 'chat_prompt_error_correcting'
Results will be saved in: statistics\prompts\results_chat_prompt_error_correcting.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:37<00:00, 38.47s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_error_correcting.json





In [12]:
RUN_LABEL = "chat_prompt_adaptive" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

  output_folder="statistics\prompts"


Test Start: 'chat_prompt_adaptive'
Results will be saved in: statistics\prompts\results_chat_prompt_adaptive.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:34<00:00, 38.31s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_adaptive.json





In [13]:
RUN_LABEL = "chat_prompt_analogy" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts"
)

  output_folder="statistics\prompts"


Test Start: 'chat_prompt_analogy'
Results will be saved in: statistics\prompts\results_chat_prompt_analogy.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['f(xi)\n• “logistic loss” log2 1 + e\n⇐= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 — tj <0)\nexp)\n3 04,1 +exp yo)\n—— max(0,1-¥"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090...


Test progress: 100%|██████████| 15/15 [09:50<00:00, 39.34s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_analogy.json





In [5]:
RUN_LABEL = "chat_prompt_mode_quick" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts",
    mode="quick"
)

  output_folder="statistics\prompts",


Test Start: 'chat_prompt_mode_quick'
Results will be saved in: statistics\prompts\results_chat_prompt_mode_quick.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer’s “The Origin of Logistic Regression”\n\nOrigins: 19th Century.\n\n• Studying growth of pop...


Test progress:  40%|████      | 6/15 [05:26<08:26, 56.32s/it]


SERVER ERROR: 502 for question: How does the Naïve Bayes algorithm handle high-dimensional data where a specific feature value might not appear in the training set for a given class?


Test progress: 100%|██████████| 15/15 [12:35<00:00, 50.38s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_mode_quick.json





In [6]:
RUN_LABEL = "chat_prompt_mode_deep" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts",
    mode="deep"
)

  output_folder="statistics\prompts",


Test Start: 'chat_prompt_mode_deep'
Results will be saved in: statistics\prompts\results_chat_prompt_mode_deep.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer’s “The Origin of Logistic Regression”\n\nOrigins: 19th Century.\n\n• Studying growth of pop...


Test progress: 100%|██████████| 15/15 [14:47<00:00, 59.16s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_mode_deep.json





In [7]:
RUN_LABEL = "chat_prompt_mode_coach" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics\prompts",
    mode="coach"
)

  output_folder="statistics\prompts",


Test Start: 'chat_prompt_mode_coach'
Results will be saved in: statistics\prompts\results_chat_prompt_mode_coach.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer’s “The Origin of Logistic Regression”\n\nOrigins: 19th Century.\n\n• Studying growth of pop...


Test progress: 100%|██████████| 15/15 [14:28<00:00, 57.93s/it]

Success! JSON saved at: statistics\prompts\results_chat_prompt_mode_coach.json





# final configuration test after reranker implementation

In [13]:
RUN_LABEL = "final_configuration" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'final_configuration'
Results will be saved in: statistics\results_final_configuration.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['es of the limiting population Ωof 6.6 million for Belgium and 40 million\nfor France were a little off- these populations are now 11 million for Belgium\nand 65 million for France. In another paper,...


Test progress: 100%|██████████| 15/15 [13:46<00:00, 55.10s/it]

Success! JSON saved at: statistics\results_final_configuration.json

Średni czas odpowiedzi: 53.09s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",33.51,[es of the limiting population Ωof 6.6 million...
1,What are the three components that make up the...,"Okay, let's break down the decomposition of th...",51.09,[xpectation is taken with respect to a new ran...
2,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support is ...",58.53,[We want to find all strong rules. These are r...
3,Which R package is required to implement Suppo...,"Based on the provided text, the `e1071` packag...",51.15,[4.6 Na¨ıve Bayes\n\nInstall and load the e107...
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,55.78,[]


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,final_configuration,Who revived the term 'logistic' in 1925 to des...,"Based on the course materials, the name ""logis...",[es of the limiting population Ωof 6.6 million...,The name 'logistic' was revived by Yule in a p...,33.51,
1,final_configuration,What are the three components that make up the...,"Okay, let's break down the decomposition of th...",[xpectation is taken with respect to a new ran...,According to the Bias-Variance decomposition t...,51.09,
2,final_configuration,"In the context of the Apriori algorithm, what ...","The ""downward closure"" property of support is ...",[We want to find all strong rules. These are r...,Downward closure is a monotonicity property st...,58.53,
3,final_configuration,Which R package is required to implement Suppo...,"Based on the provided text, the `e1071` packag...",[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,The 'e1071' package is the additional R packag...,51.15,
4,final_configuration,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,[],"For classification splitting, the CART algorit...",55.78,
5,final_configuration,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...",[an objects assigned to different clusters. We...,"Before starting, the user must choose the valu...",55.29,
6,final_configuration,How does the Naïve Bayes algorithm handle high...,That's a great question! It relates to a commo...,[],Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,64.67,
7,final_configuration,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the reasoning behind using...",[ log2(1 −p).\n\nIf the probabilities were [1/...,Information Gain tends to favor attributes tha...,49.88,
8,final_configuration,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down why Lemma 1 in the clus...",[],Lemma 1 proves that for any set C and any repr...,59.6,
9,final_configuration,What are the three options C4.5 considers when...,"Okay, I can help you with that!\n\nBased on th...","[Gin\n\nf p bet\n\nclassifica\n\ny,\n\nrop\n\n...","At each node, C4.5 recursively evaluates three...",52.18,


In [14]:
RUN_LABEL = "final_configuration_improved_reranker" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'final_configuration_improved_reranker'
Results will be saved in: statistics\results_final_configuration_improved_reranker.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['onomer turned statistician,\nknew it produced impossible values and asked his pupil Pierre-Fran¸cois Verhulst (1804-1849) to help him work on a more realistic model. They chose\n\nd W(t) = βW(t) −Φ(...


Test progress: 100%|██████████| 15/15 [13:00<00:00, 52.04s/it]

Success! JSON saved at: statistics\results_final_configuration_improved_reranker.json

Średni czas odpowiedzi: 50.03s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, Yule revive...",30.78,"[onomer turned statistician,\nknew it produced..."
1,What are the three components that make up the...,"Okay, let's break down the components of the e...",42.4,[lp us figure out how to minimize it.\nThis is...
2,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",55.04,[Rule Mining and the Apriori Algorithm\n\nMIT ...
3,Which R package is required to implement Suppo...,Based on the course materials provided:\n\n* ...,58.96,"[nclude “base,” “utils,” “graphics,” and “stat..."
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,47.35,"[A decisi\n\non tree for t\n\nant exampl e, us..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,final_configuration_improved_reranker,Who revived the term 'logistic' in 1925 to des...,"According to the course materials, Yule revive...","[onomer turned statistician,\nknew it produced...",The name 'logistic' was revived by Yule in a p...,30.78,
1,final_configuration_improved_reranker,What are the three components that make up the...,"Okay, let's break down the components of the e...",[lp us figure out how to minimize it.\nThis is...,According to the Bias-Variance decomposition t...,42.4,
2,final_configuration_improved_reranker,"In the context of the Apriori algorithm, what ...","The downward closure property of support, in t...",[Rule Mining and the Apriori Algorithm\n\nMIT ...,Downward closure is a monotonicity property st...,55.04,
3,final_configuration_improved_reranker,Which R package is required to implement Suppo...,Based on the course materials provided:\n\n* ...,"[nclude “base,” “utils,” “graphics,” and “stat...",The 'e1071' package is the additional R packag...,58.96,
4,final_configuration_improved_reranker,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,"[A decisi\n\non tree for t\n\nant exampl e, us...","For classification splitting, the CART algorit...",47.35,
5,final_configuration_improved_reranker,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...",[an objects assigned to different clusters. We...,"Before starting, the user must choose the valu...",39.01,
6,final_configuration_improved_reranker,How does the Naïve Bayes algorithm handle high...,That's a great question! It touches on a pract...,[s\nover an unknown distribution over X × Y.\n...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,65.66,
7,final_configuration_improved_reranker,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the logic behind using the...",[ log2(1 −p).\n\nIf the probabilities were [1/...,Information Gain tends to favor attributes tha...,40.04,
8,final_configuration_improved_reranker,"According to Lemma 1 in the clustering notes, ...","Okay, I can help you understand why the mean i...",[The K-Means Algorithm\n\nChoose the value of ...,Lemma 1 proves that for any set C and any repr...,53.16,
9,final_configuration_improved_reranker,What are the three options C4.5 considers when...,"Okay, I can help you with that! Based on the c...","[-section risk, etc.\n\nHow to build a decisio...","At each node, C4.5 recursively evaluates three...",55.54,


In [15]:
RUN_LABEL = "final_configuration_threshold_minus_5" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'final_configuration_threshold_minus_5'
Results will be saved in: statistics\results_final_configuration_threshold_minus_5.json


Test progress:   0%|          | 0/15 [00:00<?, ?it/s]


 First context preview (truncated): ['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer’s “The Origin of Logistic Regression”\n\nOrigins: 19th Century.\n\n• Studying growth of pop...


Test progress:  13%|█▎        | 2/15 [01:33<10:14, 47.29s/it]


SERVER ERROR: 504 for question: In the context of the Apriori algorithm, what is the 'downward closure' property of support?


Test progress:  67%|██████▋   | 10/15 [10:04<05:03, 60.63s/it]


SERVER ERROR: 502 for question: Compare the fundamental goals and requirements of supervised learning versus unsupervised learning using examples from the text.


Test progress: 100%|██████████| 15/15 [14:01<00:00, 56.07s/it]

Success! JSON saved at: statistics\results_final_configuration_threshold_minus_5.json

Średni czas odpowiedzi: 46.65s





Unnamed: 0,question,answer,latency,contexts
0,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",41.32,[Logistic Regression\n\nMIT 15.097 Course Note...
1,What are the three components that make up the...,"Okay, let's break down the decomposition of th...",48.04,[lp us figure out how to minimize it.\nThis is...
2,"In the context of the Apriori algorithm, what ...",ERROR: HTTP 504,0.0,[]
3,Which R package is required to implement Suppo...,Based on the provided text:\n\n* **Support V...,35.62,"[nclude “base,” “utils,” “graphics,” and “stat..."
4,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,38.27,"[A decisi\n\non tree for t\n\nant exampl e, us..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,final_configuration_threshold_minus_5,Who revived the term 'logistic' in 1925 to des...,"Based on the provided text, Yule revived the n...",[Logistic Regression\n\nMIT 15.097 Course Note...,The name 'logistic' was revived by Yule in a p...,41.32,
1,final_configuration_threshold_minus_5,What are the three components that make up the...,"Okay, let's break down the decomposition of th...",[lp us figure out how to minimize it.\nThis is...,According to the Bias-Variance decomposition t...,48.04,
2,final_configuration_threshold_minus_5,"In the context of the Apriori algorithm, what ...",ERROR: HTTP 504,[],Downward closure is a monotonicity property st...,0.0,HTTP 504
3,final_configuration_threshold_minus_5,Which R package is required to implement Suppo...,Based on the provided text:\n\n* **Support V...,"[nclude “base,” “utils,” “graphics,” and “stat...",The 'e1071' package is the additional R packag...,35.62,
4,final_configuration_threshold_minus_5,What specific node impurity measure is used by...,The CART (Classification and Regression Trees)...,"[A decisi\n\non tree for t\n\nant exampl e, us...","For classification splitting, the CART algorit...",38.27,
5,final_configuration_threshold_minus_5,Explain the two iterative steps of the K-Means...,"Okay, I can help you understand the K-Means al...",[an objects assigned to different clusters. We...,"Before starting, the user must choose the valu...",68.39,
6,final_configuration_threshold_minus_5,How does the Naïve Bayes algorithm handle high...,"Okay, I can help you understand how the Naïve ...",[4.6 Na¨ıve Bayes\n\nInstall and load the e107...,Naïve Bayes uses 'Laplace smoothing' (a Bayesi...,45.53,
7,final_configuration_threshold_minus_5,Describe the logic behind using the 'Gain Rati...,"Okay, I can explain the logic behind using the...","[-section risk, etc.\n\nHow to build a decisio...",Information Gain tends to favor attributes tha...,84.53,
8,final_configuration_threshold_minus_5,"According to Lemma 1 in the clustering notes, ...","Okay, let's break down why Lemma 1 implies tha...",[Clustering\n\nMIT 15.097 Course Notes Cynthia...,Lemma 1 proves that for any set C and any repr...,64.98,
9,final_configuration_threshold_minus_5,What are the three options C4.5 considers when...,"Okay, I can help you with that! According to t...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,"At each node, C4.5 recursively evaluates three...",46.78,


In [16]:
INPUT_FILE_EXTENDED = "extenede_question_answers.json"

In [17]:
RUN_LABEL = "extended_test_statistics" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE_EXTENDED,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'extended_test_statistics'
Results will be saved in: statistics\results_extended_test_statistics.json


Test progress:   0%|          | 0/50 [00:00<?, ?it/s]


 First context preview (truncated): ['upervised learning\n\nTraining: training data are input, and model f is the output.\n\ni=1 =⇒ Algorithm =⇒ f.\n\n{(xi, yi)}m\n\nTesting: You want to predict y for a new x, where (x, y) comes from th...


Test progress:  34%|███▍      | 17/50 [13:46<26:52, 48.85s/it]


SERVER ERROR: 502 for question: Compare Decision Trees and Association Rule Mining in terms of the fundamental machine learning categories (Supervised vs. Unsupervised) they belong to.


Test progress:  38%|███▊      | 19/50 [14:38<20:13, 39.13s/it]


SERVER ERROR: 502 for question: Based on the course materials, how would you implement a CART model in R and evaluate its performance on new data?


Test progress:  64%|██████▍   | 32/50 [24:42<15:08, 50.46s/it]


SERVER ERROR: 502 for question: Which R command is used to load a package like 'e1071' into your current session, and how does this process differ from installation?


Test progress: 100%|██████████| 50/50 [38:47<00:00, 46.56s/it]

Success! JSON saved at: statistics\results_extended_test_statistics.json

Średni czas odpowiedzi: 44.50s





Unnamed: 0,question,answer,latency,contexts
0,What is the fundamental difference between sup...,"Okay, I can help you understand the difference...",30.2,[upervised learning\n\nTraining: training data...
1,How is 'training error' defined in the course ...,"Based on the course materials, training error ...",43.56,"[performs (on average) on {(xi, yi)}i.\n\nm m ..."
2,What are the three specific components that ma...,"Okay, I can help you with that!\n\nBased on th...",56.4,[It turns out that the f ∗ that minimizes the ...
3,Explain the concept of 'overfitting' and descr...,"Okay, I can certainly help you understand the ...",50.38,[Illustration\n\nIn one of the figures in the ...
4,How does the 'Structural Risk Minimization' (S...,"Okay, I can help you understand how Structural...",43.82,[Illustration\n\nIn one of the figures in the ...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,extended_test_statistics,What is the fundamental difference between sup...,"Okay, I can help you understand the difference...",[upervised learning\n\nTraining: training data...,Hi there! I'd be happy to help you distinguish...,30.2,
1,extended_test_statistics,How is 'training error' defined in the course ...,"Based on the course materials, training error ...","[performs (on average) on {(xi, yi)}i.\n\nm m ...",That's a great question to start with! Trainin...,43.56,
2,extended_test_statistics,What are the three specific components that ma...,"Okay, I can help you with that!\n\nBased on th...",[It turns out that the f ∗ that minimizes the ...,"This is a bit complex, but let's break it down...",56.4,
3,extended_test_statistics,Explain the concept of 'overfitting' and descr...,"Okay, I can certainly help you understand the ...",[Illustration\n\nIn one of the figures in the ...,Overfitting is a very important concept to und...,50.38,
4,extended_test_statistics,How does the 'Structural Risk Minimization' (S...,"Okay, I can help you understand how Structural...",[Illustration\n\nIn one of the figures in the ...,I'm glad you asked! Structural Risk Minimizati...,43.82,
5,extended_test_statistics,"In the context of Decision Trees, how does the...","Okay, I can explain how pruning relates to ove...",[Illustration\n\nIn one of the figures in the ...,Pruning is a wonderful example of managing the...,53.36,
6,extended_test_statistics,Explain the Bias-Variance tradeoff using the e...,"Okay, I can explain the bias-variance tradeoff...",[Computational learning theory addresses how t...,This is such a helpful analogy! Imagine you ar...,44.11,
7,extended_test_statistics,Why can't we simply calculate the bias or vari...,It's a great question to ask why we can't dire...,[Computational learning theory addresses how t...,You've hit on a very practical challenge! In r...,55.21,
8,extended_test_statistics,How does the 'Regularized Learning Expression'...,"Okay, I can help you understand how the Regula...",[Illustration\n\nIn one of the figures in the ...,I'd love to explain this 'omnipresent' formula...,42.56,
9,extended_test_statistics,Compare how K-Nearest Neighbors (K-NN) and Dec...,"Okay, I can help you understand how K-NN and D...","[K-NN\n\n15.097 MIT, Spring 2012, Cynthia Rudi...","Both algorithms have ways to handle noise, but...",51.89,


In [18]:
RUN_LABEL = "extended_test_statistics_threshold_08" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE_EXTENDED,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: 'extended_test_statistics_threshold_08'
Results will be saved in: statistics\results_extended_test_statistics_threshold_08.json


Test progress:   0%|          | 0/50 [00:00<?, ?it/s]


 First context preview (truncated): ['Fundamentals of Learning\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nImportant Problems in Data Mining\n\n1. Finding patterns (correlations) in large datasets\n-e.g. (Diapers → Beer). Use Apriori!\n...


Test progress:   8%|▊         | 4/50 [03:19<37:41, 49.16s/it]


SERVER ERROR: 502 for question: How does the 'Structural Risk Minimization' (SRM) framework suggest we avoid overfitting?


Test progress:  62%|██████▏   | 31/50 [26:06<16:28, 52.04s/it]


SERVER ERROR: 502 for question: What is a 'centroid' in the K-Means algorithm, and how is its location mathematically determined for a cluster C?


Test progress:  86%|████████▌ | 43/50 [35:45<06:02, 51.78s/it]


SERVER ERROR: 502 for question: How does the K-Means algorithm ensure that its cost function monotonically decreases during its execution steps?


Test progress: 100%|██████████| 50/50 [41:11<00:00, 49.43s/it]

Success! JSON saved at: statistics\results_extended_test_statistics_threshold_08.json

Średni czas odpowiedzi: 47.39s





Unnamed: 0,question,answer,latency,contexts
0,What is the fundamental difference between sup...,"Okay, I can certainly help you understand the ...",45.49,[Fundamentals of Learning\n\nMIT 15.097 Course...
1,How is 'training error' defined in the course ...,"In the course materials, ""training error"" is d...",47.73,[upervised learning\n\nTraining: training data...
2,What are the three specific components that ma...,"Okay, I can help you understand the decomposit...",59.33,"[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n..."
3,Explain the concept of 'overfitting' and descr...,"Okay, I can certainly help you understand the ...",38.91,[upervised learning\n\nTraining: training data...
4,How does the 'Structural Risk Minimization' (S...,ERROR: HTTP 502,0.0,[]


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,extended_test_statistics_threshold_08,What is the fundamental difference between sup...,"Okay, I can certainly help you understand the ...",[Fundamentals of Learning\n\nMIT 15.097 Course...,Hi there! I'd be happy to help you distinguish...,45.49,
1,extended_test_statistics_threshold_08,How is 'training error' defined in the course ...,"In the course materials, ""training error"" is d...",[upervised learning\n\nTraining: training data...,That's a great question to start with! Trainin...,47.73,
2,extended_test_statistics_threshold_08,What are the three specific components that ma...,"Okay, I can help you understand the decomposit...","[• Input: {(xi, yi)}m\n\ni=1, xi ∈X , yi ∈ R\n...","This is a bit complex, but let's break it down...",59.33,
3,extended_test_statistics_threshold_08,Explain the concept of 'overfitting' and descr...,"Okay, I can certainly help you understand the ...",[upervised learning\n\nTraining: training data...,Overfitting is a very important concept to und...,38.91,
4,extended_test_statistics_threshold_08,How does the 'Structural Risk Minimization' (S...,ERROR: HTTP 502,[],I'm glad you asked! Structural Risk Minimizati...,0.0,HTTP 502
5,extended_test_statistics_threshold_08,"In the context of Decision Trees, how does the...","Okay, I can help you understand how pruning re...",[Illustration\n\nIn one of the figures in the ...,Pruning is a wonderful example of managing the...,59.99,
6,extended_test_statistics_threshold_08,Explain the Bias-Variance tradeoff using the e...,"Okay, I can explain the bias-variance tradeoff...",[Computational learning theory addresses how t...,This is such a helpful analogy! Imagine you ar...,40.36,
7,extended_test_statistics_threshold_08,Why can't we simply calculate the bias or vari...,It's a great question to ask why we can't just...,[Computational learning theory addresses how t...,You've hit on a very practical challenge! In r...,56.45,
8,extended_test_statistics_threshold_08,How does the 'Regularized Learning Expression'...,"Okay, I can help you understand how the Regula...",[Illustration\n\nIn one of the figures in the ...,I'd love to explain this 'omnipresent' formula...,51.24,
9,extended_test_statistics_threshold_08,Compare how K-Nearest Neighbors (K-NN) and Dec...,"Okay, I can help you understand how K-NN and D...","[K-NN\n\n15.097 MIT, Spring 2012, Cynthia Rudi...","Both algorithms have ways to handle noise, but...",57.42,


In [19]:
INPUT_FILE_EXTENDED = "30_questions_answers_statistics.json"

In [20]:
RUN_LABEL ="30_questions_answers_statistics_threshold08"
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE_EXTENDED,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: '30_questions_answers_statistics_threshold08'
Results will be saved in: statistics\results_30_questions_answers_statistics_threshold08.json


Test progress:   0%|          | 0/31 [00:00<?, ?it/s]


 First context preview (truncated): ['some practice, this kind of environment makes it easy to quickly code scripts and functions for various statistical purposes. To get the most out of this tutorial, follow the examples\nby typing the...


Test progress: 100%|██████████| 31/31 [21:19<00:00, 41.29s/it]

Success! JSON saved at: statistics\results_30_questions_answers_statistics_threshold08.json

Średni czas odpowiedzi: 39.28s





Unnamed: 0,question,answer,latency,contexts
0,How do I create a vector in R?,"Okay, I can help you with that! Based on the m...",19.99,"[some practice, this kind of environment makes..."
1,What is the difference between installing and ...,"Okay, I can certainly explain the difference b...",26.76,[ and new algorithms are constantly added to t...
2,When should I use a data frame instead of a ma...,"Based on the course materials, here's when you...",39.41,[You are encouraged to download your own datas...
3,"In K-Means clustering, what is the mathematica...",The K-Means algorithm aims to find cluster cen...,48.84,"[ in other clusters.\n\n• Kmeans, Kmedians\n\n..."
4,Can you explain Lemma 1 from the clustering no...,"Okay, let's break down Lemma 1 from the cluste...",31.05,[Clustering\n\nMIT 15.097 Course Notes Cynthia...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,30_questions_answers_statistics_threshold08,How do I create a vector in R?,"Okay, I can help you with that! Based on the m...","[some practice, this kind of environment makes...",According to the 'R for Machine Learning' docu...,19.99,
1,30_questions_answers_statistics_threshold08,What is the difference between installing and ...,"Okay, I can certainly explain the difference b...",[ and new algorithms are constantly added to t...,"Based on the R tutorial, you only need to inst...",26.76,
2,30_questions_answers_statistics_threshold08,When should I use a data frame instead of a ma...,"Based on the course materials, here's when you...",[You are encouraged to download your own datas...,A data frame is similar to a matrix but is pre...,39.41,
3,30_questions_answers_statistics_threshold08,"In K-Means clustering, what is the mathematica...",The K-Means algorithm aims to find cluster cen...,"[ in other clusters.\n\n• Kmeans, Kmedians\n\n...",The goal of K-Means is to minimize the cost de...,48.84,
4,30_questions_answers_statistics_threshold08,Can you explain Lemma 1 from the clustering no...,"Okay, let's break down Lemma 1 from the cluste...",[Clustering\n\nMIT 15.097 Course Notes Cynthia...,Lemma 1 states that for any set $C$ and any re...,31.05,
5,30_questions_answers_statistics_threshold08,What are the two main steps of the K-Means alg...,The K-Means algorithm iteratively refines clus...,"[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...",The algorithm repeats two steps: 1. Assignment...,44.42,
6,30_questions_answers_statistics_threshold08,Why does the cost in K-Means monotonically dec...,"Okay, let's break down why the cost in the K-M...",[an objects assigned to different clusters. We...,"According to Lemma 3, the cost decreases becau...",50.08,
7,30_questions_answers_statistics_threshold08,What is a major disadvantage of K-Means that H...,"Based on the provided context, a major disadva...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...",A major issue with K-Means is that as the valu...,28.02,
8,30_questions_answers_statistics_threshold08,What are the three measures of 'node impurity'...,"Okay, I can help you with that!\n\nBased on th...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,The notes list three measures: 1. Entropy: $-p...,38.02,
9,30_questions_answers_statistics_threshold08,How does C4.5 calculate 'Information Gain' for...,"Okay, I can explain how C4.5 calculates Inform...",[ause it has a lot of “information”:\n\nA deci...,Information Gain is the expected reduction in ...,43.21,


In [21]:
RUN_LABEL ="30_questions_answers_statistics_threshold065"
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE_EXTENDED,
    moodle_token=MOODLE_TOKEN,
    output_folder="statistics")

Test Start: '30_questions_answers_statistics_threshold065'
Results will be saved in: statistics\results_30_questions_answers_statistics_threshold065.json


Test progress:   0%|          | 0/31 [00:00<?, ?it/s]


 First context preview (truncated): ['some practice, this kind of environment makes it easy to quickly code scripts and functions for various statistical purposes. To get the most out of this tutorial, follow the examples\nby typing the...


Test progress: 100%|██████████| 31/31 [21:20<00:00, 41.30s/it]

Success! JSON saved at: statistics\results_30_questions_answers_statistics_threshold065.json

Średni czas odpowiedzi: 39.29s





Unnamed: 0,question,answer,latency,contexts
0,How do I create a vector in R?,"Okay, I can help you with that! Based on the m...",22.09,"[some practice, this kind of environment makes..."
1,What is the difference between installing and ...,"Okay, I can definitely clarify the difference ...",32.36,[ and new algorithms are constantly added to t...
2,When should I use a data frame instead of a ma...,"Based on the course materials, here's when you...",33.65,[You are encouraged to download your own datas...
3,"In K-Means clustering, what is the mathematica...",The K-Means algorithm aims to find cluster cen...,44.39,"[ in other clusters.\n\n• Kmeans, Kmedians\n\n..."
4,Can you explain Lemma 1 from the clustering no...,"Okay, I can definitely help you understand Lem...",34.78,[Clustering\n\nMIT 15.097 Course Notes Cynthia...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,30_questions_answers_statistics_threshold065,How do I create a vector in R?,"Okay, I can help you with that! Based on the m...","[some practice, this kind of environment makes...",According to the 'R for Machine Learning' docu...,22.09,
1,30_questions_answers_statistics_threshold065,What is the difference between installing and ...,"Okay, I can definitely clarify the difference ...",[ and new algorithms are constantly added to t...,"Based on the R tutorial, you only need to inst...",32.36,
2,30_questions_answers_statistics_threshold065,When should I use a data frame instead of a ma...,"Based on the course materials, here's when you...",[You are encouraged to download your own datas...,A data frame is similar to a matrix but is pre...,33.65,
3,30_questions_answers_statistics_threshold065,"In K-Means clustering, what is the mathematica...",The K-Means algorithm aims to find cluster cen...,"[ in other clusters.\n\n• Kmeans, Kmedians\n\n...",The goal of K-Means is to minimize the cost de...,44.39,
4,30_questions_answers_statistics_threshold065,Can you explain Lemma 1 from the clustering no...,"Okay, I can definitely help you understand Lem...",[Clustering\n\nMIT 15.097 Course Notes Cynthia...,Lemma 1 states that for any set $C$ and any re...,34.78,
5,30_questions_answers_statistics_threshold065,What are the two main steps of the K-Means alg...,The K-Means algorithm iteratively refines clus...,"[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...",The algorithm repeats two steps: 1. Assignment...,45.51,
6,30_questions_answers_statistics_threshold065,Why does the cost in K-Means monotonically dec...,"Okay, let's break down why the cost in K-Means...",[an objects assigned to different clusters. We...,"According to Lemma 3, the cost decreases becau...",43.9,
7,30_questions_answers_statistics_threshold065,What is a major disadvantage of K-Means that H...,"Based on the provided context, a major disadva...","[ame(cbind(y,x1,x2)))\n\n4.4\nK-Means Clusteri...",A major issue with K-Means is that as the valu...,42.28,
8,30_questions_answers_statistics_threshold065,What are the three measures of 'node impurity'...,"Okay, I can help you with that!\n\nBased on th...",[Decision Trees\n\nMIT 15.097 Course Notes Cyn...,The notes list three measures: 1. Entropy: $-p...,41.28,
9,30_questions_answers_statistics_threshold065,How does C4.5 calculate 'Information Gain' for...,"Okay, I can explain how C4.5 calculates Inform...",[ause it has a lot of “information”:\n\nA deci...,Information Gain is the expected reduction in ...,34.37,


## quiz prompts

In [10]:
QUIZ_API_URL = "https://chatbot-backend-production-4b66.up.railway.app/quiz/generate/11"
QUIZ_COURSE_ID = "11"
QUIZ_NUM_QUESTIONS = 10

In [11]:
import datetime as dt

In [12]:
def run_quiz_test_final(run_label, api_url, num_questions=10, topic=None, moodle_token=None, material_ids=None):
    """
    Final quiz test function - tests general (full course), topic-specific, lub quiz z wybranych materiałów.
    Args:
        topic: quiz tematyczny (None = quiz ogólny)
        material_ids: lista ID materiałów (np. [5,6]) - jeśli podana, quiz tylko z tych materiałów
    """
    import os

    file_tag = f"_{run_label}" if run_label else ""
    output_folder = "prompts_quizzes"
    os.makedirs(output_folder, exist_ok=True)
    output_filename = os.path.join(output_folder, f"quiz_results{file_tag}.json")
    print(f"Quiz Test Start '{run_label}'")
    print(f"Results will be saved in file: {output_filename}")
    results = []
    headers = {"Content-Type": "application/json"}
    if moodle_token:
        headers["Authorization"] = f"Bearer {moodle_token}"

    # Quiz z wybranych materiałów (np. 5 i 6)
    if material_ids is not None:
        print(f"Generating quiz ONLY for materials: {material_ids} ...")
        payload = {
            "materialIds": material_ids,
            "numQuestions": num_questions
        }
        start_time = time.time()
        quiz_data = None
        error_msg = None
        duration = 0
        try:
            response = requests.post(api_url, json=payload, headers=headers, timeout=300)
            if response.status_code == 200:
                duration = time.time() - start_time
                quiz_data = response.json()
            else:
                error_msg = f"HTTP {response.status_code}"
                print(f"SERVER ERROR: {response.status_code} for materials {material_ids} quiz")
        except Exception as e:
            error_msg = str(e)
            duration = 300
            print(f"\nTimeout for materials {material_ids} quiz")
        results.append({
            "config_label": run_label,
            "test_type": f"materials_{'_'.join(map(str, material_ids))}",
            "materialIds": material_ids,
            "quiz_data": quiz_data,
            "num_questions_requested": num_questions,
            "num_questions_generated": len(quiz_data.get("questions", [])) if quiz_data else 0,
            "latency": round(duration, 2),
            "error": error_msg
        })
        print("Generating quiz 2/3: Full Course (General)...")
        payload_general = {
            "materialIds": [],
            "topic": topic,
            "numQuestions": num_questions
        }
        start_time = time.time()
        quiz_data = None
        error_msg = None
        duration = 0
        try:
            response = requests.post(api_url, json=payload_general, headers=headers, timeout=300)
            if response.status_code == 200:
                duration = time.time() - start_time
                quiz_data = response.json()
            else:
                error_msg = f"HTTP {response.status_code}"
                print(f"SERVER ERROR: {response.status_code} for general quiz")
        except Exception as e:
            error_msg = str(e)
            duration = 300
            print(f"\nTimeout for general quiz")
        results.append({
            "config_label": run_label,
            "test_type": "full_course",
            "topic": None,
            "quiz_data": quiz_data,
            "num_questions_requested": num_questions,
            "num_questions_generated": len(quiz_data.get("questions", [])) if quiz_data else 0,
            "latency": round(duration, 2),
            "error": error_msg
        })
        time.sleep(3.0)
        # Test 2: Topic-specific quiz
        if topic:
            print(f"Generating quiz 3/3: Topic '{topic}' ...")
            payload_topic = {
                "materialIds": [],
                "topic": topic,
                "numQuestions": num_questions
            }
            start_time = time.time()
            quiz_data = None
            error_msg = None
            duration = 0
            try:
                response = requests.post(api_url, json=payload_topic, headers=headers, timeout=300)
                if response.status_code == 200:
                    duration = time.time() - start_time
                    quiz_data = response.json()
                else:
                    error_msg = f"HTTP {response.status_code}"
                    print(f"SERVER ERROR: {response.status_code} for topic: {topic}")
            except Exception as e:
                error_msg = str(e)
                duration = 300
                print(f"\nTimeout for topic: {topic}")
            results.append({
                "config_label": run_label,
                "test_type": "topic",
                "topic": topic,
                "quiz_data": quiz_data,
                "num_questions_requested": num_questions,
                "num_questions_generated": len(quiz_data.get("questions", [])) if quiz_data else 0,
                "latency": round(duration, 2),
                "error": error_msg
            })
            time.sleep(3.0)

    with open(output_filename, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    df = pd.DataFrame(results)
    try:
        import openpyxl
        excel_name = output_filename.replace(".json", ".xlsx")
        df.to_excel(excel_name, index=False)
        print(f"Success! Results saved in file: {excel_name}")
    except Exception as e:
        print(f"Error saving Excel file: {e}")
    return df


In [29]:
FINAL_TOPIC = "decision trees"
FINAL_NUM_QUESTIONS = 15

In [30]:
RUN_LABEL = "quiz_prompt_baseline" 
df_results = run_quiz_test_final(
    run_label = RUN_LABEL,
    api_url=QUIZ_API_URL, 
    num_questions=FINAL_NUM_QUESTIONS, 
    topic=FINAL_TOPIC, 
    moodle_token=MOODLE_TOKEN, 
    material_ids=[5,6])

Quiz Test Start 'quiz_prompt_baseline'
Results will be saved in file: prompts_quizzes\quiz_results_quiz_prompt_baseline.json
Generating quiz ONLY for materials: [5, 6] ...
Generating quiz 2/3: Full Course (General)...
Generating quiz 3/3: Topic 'decision trees' ...
Success! Results saved in file: prompts_quizzes\quiz_results_quiz_prompt_baseline.xlsx


In [31]:
RUN_LABEL = "quiz_prompt_constrained" 
df_results = run_quiz_test_final(
    run_label = RUN_LABEL,
    api_url = QUIZ_API_URL, 
    num_questions=FINAL_NUM_QUESTIONS, 
    topic=FINAL_TOPIC, 
    moodle_token=MOODLE_TOKEN, 
    material_ids=[5,6])

Quiz Test Start 'quiz_prompt_constrained'
Results will be saved in file: prompts_quizzes\quiz_results_quiz_prompt_constrained.json
Generating quiz ONLY for materials: [5, 6] ...
Generating quiz 2/3: Full Course (General)...
Generating quiz 3/3: Topic 'decision trees' ...
Success! Results saved in file: prompts_quizzes\quiz_results_quiz_prompt_constrained.xlsx


In [32]:
RUN_LABEL = "quiz_prompt_socratic" 
df_results = run_quiz_test_final(
    run_label = RUN_LABEL,
    api_url = QUIZ_API_URL, 
    num_questions=FINAL_NUM_QUESTIONS, 
    topic=FINAL_TOPIC, 
    moodle_token=MOODLE_TOKEN, 
    material_ids=[5,6])

Quiz Test Start 'quiz_prompt_socratic'
Results will be saved in file: prompts_quizzes\quiz_results_quiz_prompt_socratic.json
Generating quiz ONLY for materials: [5, 6] ...
Generating quiz 2/3: Full Course (General)...
Generating quiz 3/3: Topic 'decision trees' ...
Success! Results saved in file: prompts_quizzes\quiz_results_quiz_prompt_socratic.xlsx


In [33]:
RUN_LABEL = "quiz_prompt_verbose" 
df_results = run_quiz_test_final(
    run_label = RUN_LABEL,
    api_url = QUIZ_API_URL, 
    num_questions=FINAL_NUM_QUESTIONS, 
    topic=FINAL_TOPIC, 
    moodle_token=MOODLE_TOKEN, 
    material_ids=[5,6])

Quiz Test Start 'quiz_prompt_verbose'
Results will be saved in file: prompts_quizzes\quiz_results_quiz_prompt_verbose.json
Generating quiz ONLY for materials: [5, 6] ...
Generating quiz 2/3: Full Course (General)...
Generating quiz 3/3: Topic 'decision trees' ...
Success! Results saved in file: prompts_quizzes\quiz_results_quiz_prompt_verbose.xlsx


In [34]:
RUN_LABEL = "quiz_prompt_blooms_taxonomy" 
df_results = run_quiz_test_final(
    run_label = RUN_LABEL,
    api_url = QUIZ_API_URL, 
    num_questions=FINAL_NUM_QUESTIONS, 
    topic=FINAL_TOPIC, 
    moodle_token=MOODLE_TOKEN, 
    material_ids=[5,6])

Quiz Test Start 'quiz_prompt_blooms_taxonomy'
Results will be saved in file: prompts_quizzes\quiz_results_quiz_prompt_blooms_taxonomy.json
Generating quiz ONLY for materials: [5, 6] ...
Generating quiz 2/3: Full Course (General)...
Generating quiz 3/3: Topic 'decision trees' ...
Success! Results saved in file: prompts_quizzes\quiz_results_quiz_prompt_blooms_taxonomy.xlsx


In [35]:
RUN_LABEL = "quiz_prompt_quiz_scenario" 
df_results = run_quiz_test_final(
    run_label = RUN_LABEL,
    api_url = QUIZ_API_URL, 
    num_questions=FINAL_NUM_QUESTIONS, 
    topic=FINAL_TOPIC, 
    moodle_token=MOODLE_TOKEN, 
    material_ids=[5,6])

Quiz Test Start 'quiz_prompt_quiz_scenario'
Results will be saved in file: prompts_quizzes\quiz_results_quiz_prompt_quiz_scenario.json
Generating quiz ONLY for materials: [5, 6] ...
Generating quiz 2/3: Full Course (General)...
Generating quiz 3/3: Topic 'decision trees' ...
Success! Results saved in file: prompts_quizzes\quiz_results_quiz_prompt_quiz_scenario.xlsx


In [37]:
RUN_LABEL = "quiz_prompt_quiz_misconception" 
df_results = run_quiz_test_final(
    run_label = RUN_LABEL,
    api_url = QUIZ_API_URL, 
    num_questions=FINAL_NUM_QUESTIONS, 
    topic=FINAL_TOPIC, 
    moodle_token=MOODLE_TOKEN, 
    material_ids=[5,6])

Quiz Test Start 'quiz_prompt_quiz_misconception'
Results will be saved in file: prompts_quizzes\quiz_results_quiz_prompt_quiz_misconception.json
Generating quiz ONLY for materials: [5, 6] ...
SERVER ERROR: 404 for materials [5, 6] quiz
Generating quiz 2/3: Full Course (General)...
Generating quiz 3/3: Topic 'decision trees' ...
SERVER ERROR: 404 for topic: decision trees
Success! Results saved in file: prompts_quizzes\quiz_results_quiz_prompt_quiz_misconception.xlsx


# philosophy course

In [None]:
API_URL = "https://chatbot-backend-production-4b66.up.railway.app/chat"  # Lub adres Railway
TEST_COURSE_ID = "15"                      # ID kursu z materiałami
MOODLE_TOKEN = ''               # Opcjonalnie token
INPUT_FILE = "question_answer_philosophy.json"

## test embbedding model 

In [23]:
RUN_LABEL = "philosophy_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist065_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="philosophy")

Test Start: 'philosophy_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist065_temp03'
Results will be saved in: philosophy\results_philosophy_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist065_temp03.json


Test progress:   0%|          | 0/20 [00:00<?, ?it/s]


 First context preview (truncated): []...


Test progress: 100%|██████████| 20/20 [11:50<00:00, 35.52s/it]

Success! JSON saved at: philosophy\results_philosophy_results_chunk_1000_overlap_200_text_to_embedding_small_gemini20_flash_topk10_dist065_temp03.json

Średni czas odpowiedzi: 33.51s





Unnamed: 0,question,answer,latency,contexts
0,What does the term 'eudaimonia' mean in Aristo...,"Okay, that's a great question! Let's break dow...",25.32,[]
1,When was René Descartes born and when did he d...,"Okay, I can certainly help you with that!\n\nB...",33.74,[]
2,Aristotle describes a 'Function Argument' in B...,"I am sorry, but the provided text does not con...",54.92,[ologia’ which means defense. (As you surely n...
3,How does Socrates explain why someone might ap...,Socrates addresses this by distinguishing betw...,30.7,[ll harm what is most valuable. (How does inju...
4,Can you explain the three levels of potentiali...,"Okay, I can certainly clarify Aristotle's thre...",34.66,[24.01: Classics of Western Philosophy Prof. S...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,philosophy_results_chunk_1000_overlap_200_text...,What does the term 'eudaimonia' mean in Aristo...,"Okay, that's a great question! Let's break dow...",[],The term 'eudaimonia' is usually translated as...,25.32,
1,philosophy_results_chunk_1000_overlap_200_text...,When was René Descartes born and when did he d...,"Okay, I can certainly help you with that!\n\nB...",[],René Descartes was born in 1596 and died in 16...,33.74,
2,philosophy_results_chunk_1000_overlap_200_text...,Aristotle describes a 'Function Argument' in B...,"I am sorry, but the provided text does not con...",[ologia’ which means defense. (As you surely n...,The three stages of the Function Argument are:...,54.92,
3,philosophy_results_chunk_1000_overlap_200_text...,How does Socrates explain why someone might ap...,Socrates addresses this by distinguishing betw...,[ll harm what is most valuable. (How does inju...,Socrates argues that those who do not know thi...,30.7,
4,philosophy_results_chunk_1000_overlap_200_text...,Can you explain the three levels of potentiali...,"Okay, I can certainly clarify Aristotle's thre...",[24.01: Classics of Western Philosophy Prof. S...,The three levels are: (i) First potentiality (...,34.66,
5,philosophy_results_chunk_1000_overlap_200_text...,What is Descartes' reason for undertaking a pr...,Descartes undertakes the project of systematic...,[No doubt we have to reject the belief which s...,Descartes believes that to establish anything ...,39.05,
6,philosophy_results_chunk_1000_overlap_200_text...,Define the 'articulation condition' as it appl...,The 'articulation condition' in the context of...,[ologia’ which means defense. (As you surely n...,The articulation condition states that if a pe...,24.11,
7,philosophy_results_chunk_1000_overlap_200_text...,Aristotle uses the examples of an axe and an e...,The comparisons of the axe and the eye are use...,[d) Socrates knows that he does not have knowl...,The comparisons illustrate that the soul is a ...,33.9,
8,philosophy_results_chunk_1000_overlap_200_text...,Why does Socrates maintain that 'care of the s...,"Based on the course materials, Socrates believ...",[heses Socrates realized that his defense was ...,Socrates believes that a cared-for soul is the...,27.72,
9,philosophy_results_chunk_1000_overlap_200_text...,"In Meditation 1, why does Descartes think that...","Okay, let's break down why Descartes considers...",[Cartesian Doubt in Meditation 1\n\nI. Backgro...,Optical illusions are 'external interference' ...,35.63,


In [24]:
RUN_LABEL = "philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist065_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="philosophy")

Test Start: 'philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist065_temp03'
Results will be saved in: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist065_temp03.json


Test progress:   0%|          | 0/20 [00:00<?, ?it/s]


 First context preview (truncated): [' with members of the Pythagorean “cult” in Italy. In the 380s, he returned to Athens and founded a school: the Academy.\n\nii. At the Academy, Plato taught philosophy, but probably also mathematics1...


Test progress: 100%|██████████| 20/20 [10:42<00:00, 32.10s/it]

Success! JSON saved at: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist065_temp03.json

Średni czas odpowiedzi: 30.10s





Unnamed: 0,question,answer,latency,contexts
0,What does the term 'eudaimonia' mean in Aristo...,"Based on the provided text, in Aristotle's phi...",22.57,[ with members of the Pythagorean “cult” in It...
1,When was René Descartes born and when did he d...,Hello! I can certainly help you with that.\n\n...,20.33,[https://ocw.mit.edu\n\n24.01 Classics of West...
2,Aristotle describes a 'Function Argument' in B...,"According to the course materials, Aristotle's...",37.96,[heses Socrates realized that his defense was ...
3,How does Socrates explain why someone might ap...,Socrates addresses this apparent contradiction...,25.23,[24.01: Classics of Western Philosophy Prof. S...
4,Can you explain the three levels of potentiali...,"Okay, I can certainly help clarify Aristotle's...",36.83,"[ conclusions about piety? If so, what are the..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,philosophy_results_chunk_1000_overlap_200_voya...,What does the term 'eudaimonia' mean in Aristo...,"Based on the provided text, in Aristotle's phi...",[ with members of the Pythagorean “cult” in It...,The term 'eudaimonia' is usually translated as...,22.57,
1,philosophy_results_chunk_1000_overlap_200_voya...,When was René Descartes born and when did he d...,Hello! I can certainly help you with that.\n\n...,[https://ocw.mit.edu\n\n24.01 Classics of West...,René Descartes was born in 1596 and died in 16...,20.33,
2,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle describes a 'Function Argument' in B...,"According to the course materials, Aristotle's...",[heses Socrates realized that his defense was ...,The three stages of the Function Argument are:...,37.96,
3,philosophy_results_chunk_1000_overlap_200_voya...,How does Socrates explain why someone might ap...,Socrates addresses this apparent contradiction...,[24.01: Classics of Western Philosophy Prof. S...,Socrates argues that those who do not know thi...,25.23,
4,philosophy_results_chunk_1000_overlap_200_voya...,Can you explain the three levels of potentiali...,"Okay, I can certainly help clarify Aristotle's...","[ conclusions about piety? If so, what are the...",The three levels are: (i) First potentiality (...,36.83,
5,philosophy_results_chunk_1000_overlap_200_voya...,What is Descartes' reason for undertaking a pr...,Descartes undertakes a project of systematic d...,[heses Socrates realized that his defense was ...,Descartes believes that to establish anything ...,19.57,
6,philosophy_results_chunk_1000_overlap_200_voya...,Define the 'articulation condition' as it appl...,"Okay, I can help with that!\n\nBased on the co...",[ with members of the Pythagorean “cult” in It...,The articulation condition states that if a pe...,37.34,
7,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle uses the examples of an axe and an e...,The axe and eye analogies are used to explain ...,[heses Socrates realized that his defense was ...,The comparisons illustrate that the soul is a ...,19.21,
8,philosophy_results_chunk_1000_overlap_200_voya...,Why does Socrates maintain that 'care of the s...,"Based on the course materials, Socrates believ...",[heses Socrates realized that his defense was ...,Socrates believes that a cared-for soul is the...,36.44,
9,philosophy_results_chunk_1000_overlap_200_voya...,"In Meditation 1, why does Descartes think that...","Okay, I can help you understand why Descartes ...",[Cartesian Doubt in Meditation 1\n\nI. Backgro...,Optical illusions are 'external interference' ...,20.75,


## test chunk size

In [25]:
RUN_LABEL = "philosophy_results_chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist06_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="philosophy")

Test Start: 'philosophy_results_chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist06_temp03'
Results will be saved in: philosophy\results_philosophy_results_chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist06_temp03.json


Test progress:   0%|          | 0/20 [00:00<?, ?it/s]


 First context preview (truncated): ['wever, about what he thought. The main source of evidence for his views include a historian (Xenophon), a playwright (Aristophanes), and a philosopher (Plato). In fact, in most of Plato’s dialogues,...


Test progress: 100%|██████████| 20/20 [10:39<00:00, 31.96s/it]

Success! JSON saved at: philosophy\results_philosophy_results_chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist06_temp03.json

Średni czas odpowiedzi: 29.94s





Unnamed: 0,question,answer,latency,contexts
0,What does the term 'eudaimonia' mean in Aristo...,"Based on the provided text, here's what we can...",17.85,"[wever, about what he thought. The main source..."
1,When was René Descartes born and when did he d...,Based on the provided text:\n\n* René Descar...,20.35,[https://ocw.mit.edu\n\n24.01 Classics of West...
2,Aristotle describes a 'Function Argument' in B...,"Okay, I can help you understand the three stag...",37.25,"[3. Socrates then argues, and the interlocutor..."
3,How does Socrates explain why someone might ap...,Socrates explains that people might appear to ...,21.96,[reasons)?\n\n2. Socrates claims to have a div...
4,Can you explain the three levels of potentiali...,"Okay, I can certainly help you understand Aris...",35.33,"[3. Socrates then argues, and the interlocutor..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,philosophy_results_chunk_512_overlap_100_voyag...,What does the term 'eudaimonia' mean in Aristo...,"Based on the provided text, here's what we can...","[wever, about what he thought. The main source...",The term 'eudaimonia' is usually translated as...,17.85,
1,philosophy_results_chunk_512_overlap_100_voyag...,When was René Descartes born and when did he d...,Based on the provided text:\n\n* René Descar...,[https://ocw.mit.edu\n\n24.01 Classics of West...,René Descartes was born in 1596 and died in 16...,20.35,
2,philosophy_results_chunk_512_overlap_100_voyag...,Aristotle describes a 'Function Argument' in B...,"Okay, I can help you understand the three stag...","[3. Socrates then argues, and the interlocutor...",The three stages of the Function Argument are:...,37.25,
3,philosophy_results_chunk_512_overlap_100_voyag...,How does Socrates explain why someone might ap...,Socrates explains that people might appear to ...,[reasons)?\n\n2. Socrates claims to have a div...,Socrates argues that those who do not know thi...,21.96,
4,philosophy_results_chunk_512_overlap_100_voyag...,Can you explain the three levels of potentiali...,"Okay, I can certainly help you understand Aris...","[3. Socrates then argues, and the interlocutor...",The three levels are: (i) First potentiality (...,35.33,
5,philosophy_results_chunk_512_overlap_100_voyag...,What is Descartes' reason for undertaking a pr...,Descartes undertakes a project of systematic d...,[24.01: Classics of Western Philosophy Prof. S...,Descartes believes that to establish anything ...,20.51,
6,philosophy_results_chunk_512_overlap_100_voyag...,Define the 'articulation condition' as it appl...,"Okay, I can certainly help you with understand...",[areness of how little he does know. If we all...,The articulation condition states that if a pe...,34.36,
7,philosophy_results_chunk_512_overlap_100_voyag...,Aristotle uses the examples of an axe and an e...,"Okay, let's break down Aristotle's comparisons...",[rtance and the route to happiness. In contras...,The comparisons illustrate that the soul is a ...,24.34,
8,philosophy_results_chunk_512_overlap_100_voyag...,Why does Socrates maintain that 'care of the s...,"Based on the course materials, Socrates believ...",[d) Socrates knows that he does not have knowl...,Socrates believes that a cared-for soul is the...,36.5,
9,philosophy_results_chunk_512_overlap_100_voyag...,"In Meditation 1, why does Descartes think that...","Okay, let's break down why Descartes considers...",[Cartesian Doubt in Meditation 1\n\nI. Backgro...,Optical illusions are 'external interference' ...,26.13,


In [26]:
RUN_LABEL = "philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist06_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="philosophy")


Test Start: 'philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist06_temp03'
Results will be saved in: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist06_temp03.json


Test progress:   0%|          | 0/20 [00:00<?, ?it/s]


 First context preview (truncated): [' with members of the Pythagorean “cult” in Italy. In the 380s, he returned to Athens and founded a school: the Academy.\n\nii. At the Academy, Plato taught philosophy, but probably also mathematics1...


Test progress: 100%|██████████| 20/20 [13:27<00:00, 40.40s/it]

Success! JSON saved at: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk10_dist06_temp03.json

Średni czas odpowiedzi: 38.38s





Unnamed: 0,question,answer,latency,contexts
0,What does the term 'eudaimonia' mean in Aristo...,"Based on the provided text, in Aristotle's phi...",21.04,[ with members of the Pythagorean “cult” in It...
1,When was René Descartes born and when did he d...,Based on the provided text:\n\n* René Descar...,34.96,[https://ocw.mit.edu\n\n24.01 Classics of West...
2,Aristotle describes a 'Function Argument' in B...,"According to the course materials, Aristotle's...",27.54,[heses Socrates realized that his defense was ...
3,How does Socrates explain why someone might ap...,Socrates addresses this apparent contradiction...,43.5,[24.01: Classics of Western Philosophy Prof. S...
4,Can you explain the three levels of potentiali...,"Okay, I can definitely clarify Aristotle's thr...",33.4,"[ conclusions about piety? If so, what are the..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,philosophy_results_chunk_1000_overlap_200_voya...,What does the term 'eudaimonia' mean in Aristo...,"Based on the provided text, in Aristotle's phi...",[ with members of the Pythagorean “cult” in It...,The term 'eudaimonia' is usually translated as...,21.04,
1,philosophy_results_chunk_1000_overlap_200_voya...,When was René Descartes born and when did he d...,Based on the provided text:\n\n* René Descar...,[https://ocw.mit.edu\n\n24.01 Classics of West...,René Descartes was born in 1596 and died in 16...,34.96,
2,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle describes a 'Function Argument' in B...,"According to the course materials, Aristotle's...",[heses Socrates realized that his defense was ...,The three stages of the Function Argument are:...,27.54,
3,philosophy_results_chunk_1000_overlap_200_voya...,How does Socrates explain why someone might ap...,Socrates addresses this apparent contradiction...,[24.01: Classics of Western Philosophy Prof. S...,Socrates argues that those who do not know thi...,43.5,
4,philosophy_results_chunk_1000_overlap_200_voya...,Can you explain the three levels of potentiali...,"Okay, I can definitely clarify Aristotle's thr...","[ conclusions about piety? If so, what are the...",The three levels are: (i) First potentiality (...,33.4,
5,philosophy_results_chunk_1000_overlap_200_voya...,What is Descartes' reason for undertaking a pr...,Descartes undertakes a project of systematic d...,[heses Socrates realized that his defense was ...,Descartes believes that to establish anything ...,26.65,
6,philosophy_results_chunk_1000_overlap_200_voya...,Define the 'articulation condition' as it appl...,The 'articulation condition' in the context of...,[ with members of the Pythagorean “cult” in It...,The articulation condition states that if a pe...,40.15,
7,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle uses the examples of an axe and an e...,The axe and eye comparisons are used to illust...,[heses Socrates realized that his defense was ...,The comparisons illustrate that the soul is a ...,19.58,
8,philosophy_results_chunk_1000_overlap_200_voya...,Why does Socrates maintain that 'care of the s...,"Based on the course materials, Socrates believ...",[heses Socrates realized that his defense was ...,Socrates believes that a cared-for soul is the...,37.06,
9,philosophy_results_chunk_1000_overlap_200_voya...,"In Meditation 1, why does Descartes think that...","Okay, I can help you understand why Descartes ...",[Cartesian Doubt in Meditation 1\n\nI. Backgro...,Optical illusions are 'external interference' ...,23.4,


In [27]:
RUN_LABEL = "philosophy_results_chunk_2000_overlap_400_voyage3lite_gemini20_flash_topk10_dist06_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="philosophy")

Test Start: 'philosophy_results_chunk_2000_overlap_400_voyage3lite_gemini20_flash_topk10_dist06_temp03'
Results will be saved in: philosophy\results_philosophy_results_chunk_2000_overlap_400_voyage3lite_gemini20_flash_topk10_dist06_temp03.json


Test progress:   0%|          | 0/20 [00:00<?, ?it/s]


 First context preview (truncated): ['24.01: Classics of Western Philosophy Prof. Sally Haslanger\n\nThe Apology and Crito\n\n1. Background on Socrates and Plato\n\n(a) Socrates (469–399 BCE)\n\ni. His mother may have been a midwife, hi...


Test progress: 100%|██████████| 20/20 [23:06<00:00, 69.33s/it]

Success! JSON saved at: philosophy\results_philosophy_results_chunk_2000_overlap_400_voyage3lite_gemini20_flash_topk10_dist06_temp03.json

Średni czas odpowiedzi: 67.32s





Unnamed: 0,question,answer,latency,contexts
0,What does the term 'eudaimonia' mean in Aristo...,"Okay, I can help you understand ""eudaimonia"" i...",66.42,[24.01: Classics of Western Philosophy Prof. S...
1,When was René Descartes born and when did he d...,Based on the course materials:\n\n* René Des...,85.78,[https://ocw.mit.edu\n\n24.01 Classics of West...
2,Aristotle describes a 'Function Argument' in B...,"Okay, I can explain the three stages of Aristo...",75.61,[ologia’ which means defense. (As you surely n...
3,How does Socrates explain why someone might ap...,Socrates addresses this in the *Meno* by disti...,78.84,[d) Socrates knows that he does not have knowl...
4,Can you explain the three levels of potentiali...,"Okay, I can definitely clarify Aristotle's thr...",49.13,[24.01: Classics of Western Philosophy Prof. S...


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,philosophy_results_chunk_2000_overlap_400_voya...,What does the term 'eudaimonia' mean in Aristo...,"Okay, I can help you understand ""eudaimonia"" i...",[24.01: Classics of Western Philosophy Prof. S...,The term 'eudaimonia' is usually translated as...,66.42,
1,philosophy_results_chunk_2000_overlap_400_voya...,When was René Descartes born and when did he d...,Based on the course materials:\n\n* René Des...,[https://ocw.mit.edu\n\n24.01 Classics of West...,René Descartes was born in 1596 and died in 16...,85.78,
2,philosophy_results_chunk_2000_overlap_400_voya...,Aristotle describes a 'Function Argument' in B...,"Okay, I can explain the three stages of Aristo...",[ologia’ which means defense. (As you surely n...,The three stages of the Function Argument are:...,75.61,
3,philosophy_results_chunk_2000_overlap_400_voya...,How does Socrates explain why someone might ap...,Socrates addresses this in the *Meno* by disti...,[d) Socrates knows that he does not have knowl...,Socrates argues that those who do not know thi...,78.84,
4,philosophy_results_chunk_2000_overlap_400_voya...,Can you explain the three levels of potentiali...,"Okay, I can definitely clarify Aristotle's thr...",[24.01: Classics of Western Philosophy Prof. S...,The three levels are: (i) First potentiality (...,49.13,
5,philosophy_results_chunk_2000_overlap_400_voya...,What is Descartes' reason for undertaking a pr...,Descartes undertakes a project of systematic d...,[24.01: Classics of Western Philosophy Prof. S...,Descartes believes that to establish anything ...,58.09,
6,philosophy_results_chunk_2000_overlap_400_voya...,Define the 'articulation condition' as it appl...,"The articulation condition, in the context of ...",[24.01: Classics of Western Philosophy Prof. S...,The articulation condition states that if a pe...,63.39,
7,philosophy_results_chunk_2000_overlap_400_voya...,Aristotle uses the examples of an axe and an e...,Aristotle uses the examples of an axe and an e...,[24.01: Classics of Western Philosophy Prof. S...,The comparisons illustrate that the soul is a ...,63.86,
8,philosophy_results_chunk_2000_overlap_400_voya...,Why does Socrates maintain that 'care of the s...,Socrates believes that caring for the soul is ...,[ologia’ which means defense. (As you surely n...,Socrates believes that a cared-for soul is the...,61.04,
9,philosophy_results_chunk_2000_overlap_400_voya...,"In Meditation 1, why does Descartes think that...",Descartes thinks that dreaming provides a stro...,[d) Socrates knows that he does not have knowl...,Optical illusions are 'external interference' ...,55.98,


In [28]:
RUN_LABEL = "philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk5_dist06_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="philosophy")

Test Start: 'philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk5_dist06_temp03'
Results will be saved in: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk5_dist06_temp03.json


Test progress:   0%|          | 0/20 [00:00<?, ?it/s]


 First context preview (truncated): []...


Test progress: 100%|██████████| 20/20 [13:24<00:00, 40.21s/it]

Success! JSON saved at: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk5_dist06_temp03.json

Średni czas odpowiedzi: 38.20s





Unnamed: 0,question,answer,latency,contexts
0,What does the term 'eudaimonia' mean in Aristo...,"Okay, I can help you understand ""eudaimonia"" i...",83.26,[]
1,When was René Descartes born and when did he d...,"Okay, I can certainly help you with those ques...",64.46,[]
2,Aristotle describes a 'Function Argument' in B...,"Okay, I can certainly help you break down Aris...",53.17,[]
3,How does Socrates explain why someone might ap...,"Okay, that's a great question! It gets to the ...",66.86,[]
4,Can you explain the three levels of potentiali...,Absolutely! Let's break down Aristotle's level...,49.63,[]


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,philosophy_results_chunk_1000_overlap_200_voya...,What does the term 'eudaimonia' mean in Aristo...,"Okay, I can help you understand ""eudaimonia"" i...",[],The term 'eudaimonia' is usually translated as...,83.26,
1,philosophy_results_chunk_1000_overlap_200_voya...,When was René Descartes born and when did he d...,"Okay, I can certainly help you with those ques...",[],René Descartes was born in 1596 and died in 16...,64.46,
2,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle describes a 'Function Argument' in B...,"Okay, I can certainly help you break down Aris...",[],The three stages of the Function Argument are:...,53.17,
3,philosophy_results_chunk_1000_overlap_200_voya...,How does Socrates explain why someone might ap...,"Okay, that's a great question! It gets to the ...",[],Socrates argues that those who do not know thi...,66.86,
4,philosophy_results_chunk_1000_overlap_200_voya...,Can you explain the three levels of potentiali...,Absolutely! Let's break down Aristotle's level...,[],The three levels are: (i) First potentiality (...,49.63,
5,philosophy_results_chunk_1000_overlap_200_voya...,What is Descartes' reason for undertaking a pr...,Descartes undertakes the project of systematic...,[Cartesian Doubt in Meditation 1\n\nI. Backgro...,Descartes believes that to establish anything ...,33.78,
6,philosophy_results_chunk_1000_overlap_200_voya...,Define the 'articulation condition' as it appl...,"Okay, I can certainly help with that!\n\nBased...",[24.01: Classics of Western Philosophy Prof. S...,The articulation condition states that if a pe...,37.39,
7,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle uses the examples of an axe and an e...,Aristotle uses the examples of an axe and an e...,[t the form/matter in statues is just an\nanal...,The comparisons illustrate that the soul is a ...,21.16,
8,philosophy_results_chunk_1000_overlap_200_voya...,Why does Socrates maintain that 'care of the s...,"Based on the course materials, Socrates believ...",[heses Socrates realized that his defense was ...,Socrates believes that a cared-for soul is the...,34.49,
9,philosophy_results_chunk_1000_overlap_200_voya...,"In Meditation 1, why does Descartes think that...","Okay, let's break down why Descartes considers...",[Cartesian Doubt in Meditation 1\n\nI. Backgro...,Optical illusions are 'external interference' ...,22.5,


In [29]:
RUN_LABEL = "philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk15_dist06_temp03" 
run_pipeline_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN,
    output_folder="philosophy")

Test Start: 'philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk15_dist06_temp03'
Results will be saved in: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk15_dist06_temp03.json


Test progress:   0%|          | 0/20 [00:00<?, ?it/s]


 First context preview (truncated): [' with members of the Pythagorean “cult” in Italy. In the 380s, he returned to Athens and founded a school: the Academy.\n\nii. At the Academy, Plato taught philosophy, but probably also mathematics1...


Test progress: 100%|██████████| 20/20 [10:24<00:00, 31.21s/it]

Success! JSON saved at: philosophy\results_philosophy_results_chunk_1000_overlap_200_voyage3lite_gemini20_flash_topk15_dist06_temp03.json

Średni czas odpowiedzi: 29.20s





Unnamed: 0,question,answer,latency,contexts
0,What does the term 'eudaimonia' mean in Aristo...,"In Aristotle's philosophy, ""eudaimonia"" refers...",21.72,[ with members of the Pythagorean “cult” in It...
1,When was René Descartes born and when did he d...,Based on the provided text:\n\n* René Descar...,34.33,[24.01: Classics of Western Philosophy Prof. S...
2,Aristotle describes a 'Function Argument' in B...,"Okay, I can help you understand the three stag...",28.36,[heses Socrates realized that his defense was ...
3,How does Socrates explain why someone might ap...,Socrates addresses the issue of why someone mi...,36.29,[heses Socrates realized that his defense was ...
4,Can you explain the three levels of potentiali...,"Okay, I can certainly help clarify Aristotle's...",25.17,"[ conclusions about piety? If so, what are the..."


Unnamed: 0,config_label,question,answer,contexts,ground_truth,latency,error
0,philosophy_results_chunk_1000_overlap_200_voya...,What does the term 'eudaimonia' mean in Aristo...,"In Aristotle's philosophy, ""eudaimonia"" refers...",[ with members of the Pythagorean “cult” in It...,The term 'eudaimonia' is usually translated as...,21.72,
1,philosophy_results_chunk_1000_overlap_200_voya...,When was René Descartes born and when did he d...,Based on the provided text:\n\n* René Descar...,[24.01: Classics of Western Philosophy Prof. S...,René Descartes was born in 1596 and died in 16...,34.33,
2,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle describes a 'Function Argument' in B...,"Okay, I can help you understand the three stag...",[heses Socrates realized that his defense was ...,The three stages of the Function Argument are:...,28.36,
3,philosophy_results_chunk_1000_overlap_200_voya...,How does Socrates explain why someone might ap...,Socrates addresses the issue of why someone mi...,[heses Socrates realized that his defense was ...,Socrates argues that those who do not know thi...,36.29,
4,philosophy_results_chunk_1000_overlap_200_voya...,Can you explain the three levels of potentiali...,"Okay, I can certainly help clarify Aristotle's...","[ conclusions about piety? If so, what are the...",The three levels are: (i) First potentiality (...,25.17,
5,philosophy_results_chunk_1000_overlap_200_voya...,What is Descartes' reason for undertaking a pr...,Descartes undertakes the project of systematic...,[heses Socrates realized that his defense was ...,Descartes believes that to establish anything ...,37.01,
6,philosophy_results_chunk_1000_overlap_200_voya...,Define the 'articulation condition' as it appl...,"Okay, I can certainly help with that!\n\nBased...",[ with members of the Pythagorean “cult” in It...,The articulation condition states that if a pe...,25.41,
7,philosophy_results_chunk_1000_overlap_200_voya...,Aristotle uses the examples of an axe and an e...,The comparisons of the axe and the eye are mea...,[heses Socrates realized that his defense was ...,The comparisons illustrate that the soul is a ...,25.95,
8,philosophy_results_chunk_1000_overlap_200_voya...,Why does Socrates maintain that 'care of the s...,"Based on the course materials, Socrates believ...",[heses Socrates realized that his defense was ...,Socrates believes that a cared-for soul is the...,35.79,
9,philosophy_results_chunk_1000_overlap_200_voya...,"In Meditation 1, why does Descartes think that...",Descartes thinks dreaming provides a stronger ...,[heses Socrates realized that his defense was ...,Optical illusions are 'external interference' ...,25.23,
