# wywoulujemy dwie poni≈ºsze kom√≥rki i jedziemy na d√≥≈Ç pliku

In [None]:
import json
import openpyxl
import requests
import pandas as pd
import time
import uuid
from tqdm import tqdm 
from datetime import datetime

In [11]:
def run_notebook_test(run_label, api_url, course_id, input_file, moodle_token=None):
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M')
    file_tag = f"_{run_label}" if run_label else ""
    output_filename = f"results{file_tag}.json"
    
    print(f"Test Start '{run_label}'")
    print(f"Results will be saved in file: {output_filename}")
    
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            test_data = json.load(f)
    except FileNotFoundError:
        print(f" ERROR: file not found {input_file}")
        return None

    results = []
    headers = {"Content-Type": "application/json"}
    if moodle_token:
        headers["Authorization"] = f"Bearer {moodle_token}"
    n=0
    for item in tqdm(test_data, desc="test progress"):
        n+=1
        question = item.get("question")
        ground_truth = item.get("ground_truth")
        
        if not question: continue

        session_id = str(uuid.uuid4())
        
        payload = {
            "message": question,
            "courseId": str(course_id), 
            "sessionId": session_id,
            "history": [] 
        }

        start_time = time.time()
        bot_answer = ""
        contexts = []
        error_msg = None
        duration = 0 

        try:
            response = requests.post(api_url, json=payload, headers=headers, timeout=180)
            
            if response.status_code == 200:
                duration = time.time() - start_time
                resp_data = response.json()
                bot_answer = resp_data.get("response", "")
                raw_sources = resp_data.get("sources", [])
                if raw_sources:
                    for src in raw_sources:
                        text_content = src.get("chunkText", "")
                        if not text_content:
                            text_content = str(src)
                            
                        contexts.append(text_content)
                # if raw_sources:
                #     for src in raw_sources:
                #         src_text = json.dumps(src, ensure_ascii=False)
                #         contexts.append(src_text)
            else:
                error_msg = f"HTTP {response.status_code}"
                bot_answer = f"ERROR: {error_msg}"
                if response.status_code != 422: 
                    print(f"SEREVER ERROR: {response.status_code} for question: {question}")

        except Exception as e:
            error_msg = str(e)
            bot_answer = "ERROR: Timeout/Connection"
            duration = 180
            print(f"\n Timeout...")
        if n == 1:
            print(contexts)    
        
        results.append({
            "config_label": run_label,
            "question": question,
            "answer": bot_answer,
            "contexts": contexts,
            "ground_truth": ground_truth,
            "latency": round(duration, 2),
            "error": error_msg
        })
        time.sleep(2.0)

    with open(output_filename, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
        
    df = pd.DataFrame(results)
    try:
        import openpyxl 
        df['contexts_str'] = df['contexts'].apply(lambda x: "\n\n".join(x) if x else "") 
        excel_name = output_filename.replace(".json", ".xlsx")
        df.to_excel(excel_name, index=False)
        print(f" Sucess! Results saved in file: {excel_name}")
    except ImportError:
        print("Only json results saved. To save Excel files, please install 'openpyxl' package.")
    except Exception as e:
        print(f" Error saving Excel file: {e}")

    return df

# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: voyage-3-lite
- LLM: gpt-4o-mini
- TEMPERTAURE 0.1

## chunk size = 512, chunk_overlap= 100

In [3]:
API_URL = "https://chatbot-backend-production-9ee8.up.railway.app/chat"  # Lub adres Railway
TEST_COURSE_ID = "11"                      # ID kursu z materia≈Çami
MOODLE_TOKEN = '9f66de570d71c9b474cce459dd02a2bd'               # Opcjonalnie token
INPUT_FILE = "question_answer_claude.json"

In [3]:
# --- KONFIGURACJA TESTU ---

# 1. Opis tego testu (to trafi do nazwy pliku!)
#    Zmieniaj to za ka≈ºdym razem jak zmieniasz co≈õ na Railway
RUN_LABEL = "chunk_512_overlap_100" 
# --- URUCHOMIENIE ---
df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

# PodglƒÖd wynik√≥w od razu pod kom√≥rkƒÖ
if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

NameError: name 'run_notebook_test' is not defined

## chunk size = 1000, chunk_overlap= 200


In [None]:

RUN_LABEL = "chunk_1000_overlap_200" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

NameError: name 'run_notebook_test' is not defined

## chunk size = 2000, chunk_overlap= 400

In [6]:

RUN_LABEL = "chunk_2000_overlap_400" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

üé¨ Rozpoczynam test: 'chunk_2000_overlap_400'
üìÇ Wyniki trafiƒÖ do: results_chunk_2000_overlap_400_20260106_0057.json


Postƒôp testu:   0%|          | 0/30 [00:00<?, ?it/s]

['{"chunkId": 6, "materialId": 1, "fileName": "Rule Mining and the Apriori Algorithm.pdf", "chunkText": "http://ocw.mit.edu\\n\\n15.097 Prediction: Machine Learning and Statistics\\n\\nSpring 2012\\n\\nFor information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.", "metadata": {"material_id": 1, "course_id": 11, "file": "Rule Mining and the Apriori Algorithm.pdf", "page": 6}}', '{"chunkId": 7, "materialId": 2, "fileName": "R for ML.pdf", "chunkText": "R for Machine Learning\\n\\nAllison Chang\\n\\n1 Introduction\\n\\nIt is common for today‚Äôs scientific and business industries to collect large amounts of data, and the ability to analyze the data and learn from it is critical to making informed decisions. Familiarity with software such as R allows users to visualize data, run statistical tests, and apply machine learning algorithms. Even if you already know other software, there are still good reasons to learn R:\\n\\n1. R is free. If your future em

Postƒôp testu: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [30:50<00:00, 61.69s/it]


‚úÖ Sukces! Wyniki w pliku: results_chunk_2000_overlap_400_20260106_0057.xlsx

≈öredni czas odpowiedzi: 58.68s


Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,The logistic function is a mathematical model ...,35.64,"[{""chunkId"": 6, ""materialId"": 1, ""fileName"": ""..."
1,"In logistic regression, why do we model the lo...","In logistic regression, we model the log odds ...",40.82,"[{""chunkId"": 6, ""materialId"": 1, ""fileName"": ""..."
2,How does maximum likelihood estimation work fo...,Maximum Likelihood Estimation (MLE) is a funda...,51.42,"[{""chunkId"": 6, ""materialId"": 1, ""fileName"": ""..."
3,What are the key advantages of decision trees ...,The course notes highlight several key advanta...,31.21,"[{""chunkId"": 6, ""materialId"": 1, ""fileName"": ""..."
4,Explain how Information Gain is calculated for...,**Information Gain Calculation for Decision Tr...,69.2,"[{""chunkId"": 1, ""materialId"": 1, ""fileName"": ""..."


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: voyage-2-large
- chunk overlap: 100
- chunk size: 512
- LLM: gpt-4o-mini
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.55

In [17]:

RUN_LABEL = "chunk_512_overlap_100_voyagelarge2_gptmini_topk10_dist055" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\nmean response time: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_voyagelarge2_gptmini_topk10_dist055'
Results will be saved in file: results_chunk_512_overlap_100_voyagelarge2_gptmini_topk10_dist055_20260106_2258.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['f(xi)\n‚Ä¢ ‚Äúlogistic loss‚Äù log2 1 + e\n‚áê= logistic regression\n\n6\n\n[IMAGE_OCR 1]\n35 ‚Äî tj <0)\nexp)\n3 04,1 +exp yo)\n‚Äî‚Äî max(0,1-¬•"09)\n25 2 15 1\nu = SS\n==)\no\n-2 AS A -0.5 o 0.5 1 15 2\ny* F090.', '., low degree polynomials). We then try to fit the data well using those functions. Hope\xad fully this strikes the right balance of wiggliness (variance) and capturing the mean of the data (bias).\n\n‚Ä¢ One thing we like to do is make assumptions on the distribution D, or at least on the class of functions that might be able to fit well. Those assumptions each lead to a different algorithm (i.e. model). How well the algorithm works or not depends on how true the assumption is.\n\n‚Ä¢ Even when we‚Äôre not ...', 'Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer‚Äôs ‚ÄúThe Origin of Logistic Regression‚Äù\n\nOrigins: 19th Century.\n\n‚Ä¢ Studying growth of populations and the course of chemical reactions using\n\

test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [21:19<00:00, 42.65s/it]


 Sucess! Results saved in file: results_chunk_512_overlap_100_voyagelarge2_gptmini_topk10_dist055_20260106_2258.xlsx

mean response time: 39.64s


Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,The logistic function is a mathematical model ...,25.92,[f(xi)\n‚Ä¢ ‚Äúlogistic loss‚Äù log2 1 + e\n‚áê= logis...
1,"In logistic regression, why do we model the lo...","In logistic regression, we model the log odds ...",33.43,"[\ny, x1, x2, and x3 are also column names in ..."
2,How does maximum likelihood estimation work fo...,Maximum likelihood estimation (MLE) is a metho...,46.72,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
3,What are the key advantages of decision trees ...,The course notes highlight several key advanta...,30.89,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
4,Explain how Information Gain is calculated for...,### Information Gain Calculation\n\nInformatio...,49.93,"[-section risk, etc.\n\nHow to build a decisio..."


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: voyage-3-lite
- chunk overlap: 100
- chunk size: 512
- LLM: gpt-4o-mini
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.55

In [18]:
RUN_LABEL = "chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist055" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist055'
Results will be saved in file: results_chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist055_20260106_2325.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['http://ocw.mit.edu\n\n15.097 Prediction: Machine Learning and Statistics\n\nSpring 2012\n\nFor information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.', 'http://ocw.mit.edu\n\n15.097 Prediction: Machine Learning and Statistics\n\nSpring 2012\n\nFor information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.', 'http://ocw.mit.edu\n\n15.097 Prediction: Machine Learning and Statistics\n\nSpring 2012\n\nFor information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.', 'http://ocw.mit.edu\n\n15.097 Prediction: Machine Learning and Statistics\n\nSpring 2012\n\nFor information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.', 'http://ocw.mit.edu\n\n15.097 Prediction: Machine Learning and Statistics\n\nSpring 2012\n\nFor information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.', 'http://ocw.mit.edu\n\n15.097 Pred

test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [20:07<00:00, 40.24s/it]

 Sucess! Results saved in file: results_chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist055_20260106_2325.xlsx

≈öredni czas odpowiedzi: 37.23s





Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,The logistic function is a mathematical model ...,34.41,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
1,"In logistic regression, why do we model the lo...","Great question! In logistic regression, we mod...",35.25,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
2,How does maximum likelihood estimation work fo...,Maximum likelihood estimation (MLE) is a metho...,47.27,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
3,What are the key advantages of decision trees ...,The course notes highlight several key advanta...,25.29,[4.6 Na¬®ƒ±ve Bayes\n\nInstall and load the e107...
4,Explain how Information Gain is calculated for...,**Information Gain Calculation for Decision Tr...,49.6,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: text embedding 3 small
- chunk overlap: 100
- chunk size: 512
- LLM: gpt-4o-mini
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.55

In [20]:
RUN_LABEL = "chunk_512_overlap_100_textembedding3small_gptmini_topk10_dist055" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_textembedding3small_gptmini_topk10_dist055'
Results will be saved in file: results_chunk_512_overlap_100_textembedding3small_gptmini_topk10_dist055.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer‚Äôs ‚ÄúThe Origin of Logistic Regression‚Äù\n\nOrigins: 19th Century.\n\n‚Ä¢ Studying growth of populations and the course of chemical reactions using\n\nd W(t) = Œ≤W(t)\ndt ‚áí\nW(t) = AeŒ≤t\n\nwhich is a good model for unopposed growth, like the US population‚Äôs growth at the time.\n\n‚Ä¢ Adolphe Quetelet (1796 - 1874), Belgian astronomer turned statistician,\nknew it produced impossible values and asked his pupil Pierr...', 'ho were studying population growth of the US. They also tried to fit the logistic function to population growth, and estimated ‚Ñ¶for the US to be 197 million (the current population is 312 million). Actually, Pearl and collaborators spent 20 years applying the logistic growth curve to almost any living population (fruit flies, humans in North Africa, cantaloupes).\n\nVerhulst‚Äôs work was rediscovered just after Pearl and Reed‚Äôs first paper in 1920, b

test progress:  10%|‚ñà         | 3/30 [01:58<18:10, 40.40s/it]

SEREVER ERROR: 502 for question: What are the key advantages of decision trees mentioned in the course notes?


test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [19:30<00:00, 39.00s/it]

 Sucess! Results saved in file: results_chunk_512_overlap_100_textembedding3small_gptmini_topk10_dist055.xlsx

≈öredni czas odpowiedzi: 35.99s





Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,The logistic function is a mathematical model ...,27.08,[Logistic Regression\n\nMIT 15.097 Course Note...
1,"In logistic regression, why do we model the lo...","In logistic regression, we model the log odds ...",44.54,"[Here Y takes either 0 or 1, but we need a ¬±1 ..."
2,How does maximum likelihood estimation work fo...,Maximum Likelihood Estimation (MLE) is a stati...,37.43,"[Here Y takes either 0 or 1, but we need a ¬±1 ..."
3,What are the key advantages of decision trees ...,ERROR: HTTP 502,0.0,[]
4,Explain how Information Gain is calculated for...,### Information Gain Calculation\n\nInformatio...,29.41,"[-section risk, etc.\n\nHow to build a decisio..."


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: text embedding 3 large
- chunk overlap: 100
- chunk size: 512
- LLM: gpt-4o-mini
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.55

In [4]:
RUN_LABEL = "chunk_512_overlap_100_textembedding3large_gptmini_topk10_dist055" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_textembedding3large_gptmini_topk10_dist055'
Results will be saved in file: results_chunk_512_overlap_100_textembedding3large_gptmini_topk10_dist055.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer‚Äôs ‚ÄúThe Origin of Logistic Regression‚Äù\n\nOrigins: 19th Century.\n\n‚Ä¢ Studying growth of populations and the course of chemical reactions using\n\nd W(t) = Œ≤W(t)\ndt ‚áí\nW(t) = AeŒ≤t\n\nwhich is a good model for unopposed growth, like the US population‚Äôs growth at the time.\n\n‚Ä¢ Adolphe Quetelet (1796 - 1874), Belgian astronomer turned statistician,\nknew it produced impossible values and asked his pupil Pierr...', 'onomer turned statistician,\nknew it produced impossible values and asked his pupil Pierre-Fran¬∏cois Verhulst (1804-1849) to help him work on a more realistic model. They chose\n\nd W(t) = Œ≤W(t) ‚àíŒ¶(W(t))\ndt\n\nto resist further growth, and with the choice of Œ¶ to be a quadratic function, they got:\n\nd W(t) = Œ≤W(t)(‚Ñ¶\ndt ‚àíW(t)),\n\nW(t)\nwhere ‚Ñ¶is the saturation limit of W. Writing P(t) =\nas the proportion of ‚Ñ¶ saturation limit:\nd P(t) 

test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [21:33<00:00, 43.12s/it]


 Sucess! Results saved in file: results_chunk_512_overlap_100_textembedding3large_gptmini_topk10_dist055.xlsx

≈öredni czas odpowiedzi: 40.11s


Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,The logistic function is a mathematical model ...,31.09,[Logistic Regression\n\nMIT 15.097 Course Note...
1,"In logistic regression, why do we model the lo...","Great question! In logistic regression, we mod...",29.3,"[Here Y takes either 0 or 1, but we need a ¬±1 ..."
2,How does maximum likelihood estimation work fo...,Maximum likelihood estimation (MLE) is a metho...,37.52,"[Here Y takes either 0 or 1, but we need a ¬±1 ..."
3,What are the key advantages of decision trees ...,The course notes highlight several key advanta...,36.26,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...
4,Explain how Information Gain is calculated for...,Certainly! Let's break down the concept of Inf...,38.72,"[-section risk, etc.\n\nHow to build a decisio..."


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: voyage-3-lite
- chunk overlap: 100
- chunk size: 512
- LLM: gpt-4o-mini
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.45

In [4]:
RUN_LABEL = "chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist045" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist045'
Results will be saved in file: results_chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist045.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer‚Äôs ‚ÄúThe Origin of Logistic Regression‚Äù\n\nOrigins: 19th Century.\n\n‚Ä¢ Studying growth of populations and the course of chemical reactions using\n\nd W(t) = Œ≤W(t)\ndt ‚áí\nW(t) = AeŒ≤t\n\nwhich is a good model for unopposed growth, like the US population‚Äôs growth at the time.\n\n‚Ä¢ Adolphe Quetelet (1796 - 1874), Belgian astronomer turned statistician,\nknew it produced impossible values and asked his pupil Pierr...', 'earl and Reed‚Äôs first paper in 1920, but they didn‚Äôt acknowledge him in their second paper, and only in a footnote in a third paper (by Pearl) in 1922. They cited him in 1923, but didn‚Äôt use his terminology and called his papers ‚Äúlong since forgotten.‚Äù The name logistic was revived by Yule, in a presidential address to the Royal Statistical Society in 1925.\n\nThere was a lot of debate over whether the logistic function could replace the cdf of

test progress:  30%|‚ñà‚ñà‚ñà       | 9/30 [05:50<13:07, 37.50s/it]

SEREVER ERROR: 502 for question: Explain why Laplace smoothing is used in Na√Øve Bayes and how it works.


test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [19:46<00:00, 39.53s/it]


 Sucess! Results saved in file: results_chunk_512_overlap_100_voyage3lite_gptmini_topk10_dist045.xlsx

≈öredni czas odpowiedzi: 36.51s


Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,The logistic function is a mathematical model ...,34.39,[Logistic Regression\n\nMIT 15.097 Course Note...
1,"In logistic regression, why do we model the lo...","In logistic regression, we model the log odds ...",26.49,[Logistic Regression\n\nMIT 15.097 Course Note...
2,How does maximum likelihood estimation work fo...,Maximum likelihood estimation (MLE) is a metho...,46.07,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
3,What are the key advantages of decision trees ...,The course notes highlight several key advanta...,27.2,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...
4,Explain how Information Gain is calculated for...,Certainly! Let's break down the concept of Inf...,43.46,"[-section risk, etc.\n\nHow to build a decisio..."


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: voyage-3-lite
- chunk overlap: 100
- chunk size: 512
- LLM: Llama 3.3 70B
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.45

In [7]:
RUN_LABEL = "chunk_512_overlap_100_voyage3lite_llama3370B_topk10_dist045" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_voyage3lite_llama3370B_topk10_dist045'
Results will be saved in file: results_chunk_512_overlap_100_voyage3lite_llama3370B_topk10_dist045.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer‚Äôs ‚ÄúThe Origin of Logistic Regression‚Äù\n\nOrigins: 19th Century.\n\n‚Ä¢ Studying growth of populations and the course of chemical reactions using\n\nd W(t) = Œ≤W(t)\ndt ‚áí\nW(t) = AeŒ≤t\n\nwhich is a good model for unopposed growth, like the US population‚Äôs growth at the time.\n\n‚Ä¢ Adolphe Quetelet (1796 - 1874), Belgian astronomer turned statistician,\nknew it produced impossible values and asked his pupil Pierr...', 'earl and Reed‚Äôs first paper in 1920, but they didn‚Äôt acknowledge him in their second paper, and only in a footnote in a third paper (by Pearl) in 1922. They cited him in 1923, but didn‚Äôt use his terminology and called his papers ‚Äúlong since forgotten.‚Äù The name logistic was revived by Yule, in a presidential address to the Royal Statistical Society in 1925.\n\nThere was a lot of debate over whether the logistic function could replace the cdf of

test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [41:01<00:00, 82.04s/it]

 Sucess! Results saved in file: results_chunk_512_overlap_100_voyage3lite_llama3370B_topk10_dist045.xlsx

≈öredni czas odpowiedzi: 17.03s





Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,The logistic function originated in the 19th c...,16.73,[Logistic Regression\n\nMIT 15.097 Course Note...
1,"In logistic regression, why do we model the lo...","In logistic regression, we model the log odds ...",27.86,[Logistic Regression\n\nMIT 15.097 Course Note...
2,How does maximum likelihood estimation work fo...,Maximum likelihood estimation is a method used...,18.79,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
3,What are the key advantages of decision trees ...,"According to the course notes, the key advanta...",19.13,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...
4,Explain how Information Gain is calculated for...,I'd be happy to explain how Information Gain i...,15.51,"[-section risk, etc.\n\nHow to build a decisio..."


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: voyage-3-lite
- chunk overlap: 100
- chunk size: 512
- LLM: gemini-2.0-flash
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.45

In [10]:
RUN_LABEL = "chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist045" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist045'
Results will be saved in file: results_chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist045.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer‚Äôs ‚ÄúThe Origin of Logistic Regression‚Äù\n\nOrigins: 19th Century.\n\n‚Ä¢ Studying growth of populations and the course of chemical reactions using\n\nd W(t) = Œ≤W(t)\ndt ‚áí\nW(t) = AeŒ≤t\n\nwhich is a good model for unopposed growth, like the US population‚Äôs growth at the time.\n\n‚Ä¢ Adolphe Quetelet (1796 - 1874), Belgian astronomer turned statistician,\nknew it produced impossible values and asked his pupil Pierr...', 'earl and Reed‚Äôs first paper in 1920, but they didn‚Äôt acknowledge him in their second paper, and only in a footnote in a third paper (by Pearl) in 1922. They cited him in 1923, but didn‚Äôt use his terminology and called his papers ‚Äúlong since forgotten.‚Äù The name logistic was revived by Yule, in a presidential address to the Royal Statistical Society in 1925.\n\nThere was a lot of debate over whether the logistic function could replace the cdf of

test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [19:06<00:00, 38.22s/it]


 Sucess! Results saved in file: results_chunk_512_overlap_100_voyage3lite_gemini20_flash_topk10_dist045.xlsx

≈öredni czas odpowiedzi: 33.21s


Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,"Based on the course materials, the logistic fu...",36.53,[Logistic Regression\n\nMIT 15.097 Course Note...
1,"In logistic regression, why do we model the lo...","Okay, that's a great question! Let's break dow...",18.29,[Logistic Regression\n\nMIT 15.097 Course Note...
2,How does maximum likelihood estimation work fo...,"Okay, let's break down how maximum likelihood ...",49.39,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
3,What are the key advantages of decision trees ...,"Based on the course notes, here are the key ad...",33.23,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...
4,Explain how Information Gain is calculated for...,"Okay, I can explain how Information Gain is ca...",30.87,"[-section risk, etc.\n\nHow to build a decisio..."


# KONFIGURACJA
- MODEL DO EMBEDDING√ìW: voyage-3-lite
- chunk overlap: 100
- chunk size: 512
- LLM: deepseek/deepseek-v3.2
- TEMPERTAURE 0.1
- top_k = 10
- rag distance threshold = 0.45

In [12]:
RUN_LABEL = "chunk_512_overlap_100_voyage3lite_deepseek_v3_2_topk10_dist045" 

df_results = run_notebook_test(
    run_label=RUN_LABEL,
    api_url=API_URL,
    course_id=TEST_COURSE_ID,
    input_file=INPUT_FILE,
    moodle_token=MOODLE_TOKEN
)

if df_results is not None:
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())

Test Start 'chunk_512_overlap_100_voyage3lite_deepseek_v3_2_topk10_dist045'
Results will be saved in file: results_chunk_512_overlap_100_voyage3lite_deepseek_v3_2_topk10_dist045.json


test progress:   0%|          | 0/30 [00:00<?, ?it/s]

['Logistic Regression\n\nMIT 15.097 Course Notes Cynthia Rudin\n\nThanks to Ashia Wilson Credit: J.S. Cramer‚Äôs ‚ÄúThe Origin of Logistic Regression‚Äù\n\nOrigins: 19th Century.\n\n‚Ä¢ Studying growth of populations and the course of chemical reactions using\n\nd W(t) = Œ≤W(t)\ndt ‚áí\nW(t) = AeŒ≤t\n\nwhich is a good model for unopposed growth, like the US population‚Äôs growth at the time.\n\n‚Ä¢ Adolphe Quetelet (1796 - 1874), Belgian astronomer turned statistician,\nknew it produced impossible values and asked his pupil Pierr...', 'earl and Reed‚Äôs first paper in 1920, but they didn‚Äôt acknowledge him in their second paper, and only in a footnote in a third paper (by Pearl) in 1922. They cited him in 1923, but didn‚Äôt use his terminology and called his papers ‚Äúlong since forgotten.‚Äù The name logistic was revived by Yule, in a presidential address to the Royal Statistical Society in 1925.\n\nThere was a lot of debate over whether the logistic function could replace the cdf of

test progress:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 22/30 [21:52<09:02, 67.87s/it]


 Timeout...


test progress:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 24/30 [27:47<12:20, 123.44s/it]

SEREVER ERROR: 502 for question: How do the origins of logistic regression relate to population modeling, and how was this later applied to classification problems?


test progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [31:39<00:00, 63.33s/it] 


 Sucess! Results saved in file: results_chunk_512_overlap_100_voyage3lite_deepseek_v3_2_topk10_dist045.xlsx

≈öredni czas odpowiedzi: 61.29s


Unnamed: 0,question,answer,latency,contexts
0,What is the logistic function and how did it o...,"Based on the provided course materials, here's...",28.23,[Logistic Regression\n\nMIT 15.097 Course Note...
1,"In logistic regression, why do we model the lo...",Excellent question! This gets to the very hear...,41.03,[Logistic Regression\n\nMIT 15.097 Course Note...
2,How does maximum likelihood estimation work fo...,"Based on the provided course materials, here's...",35.14,[http://ocw.mit.edu\n\n15.097 Prediction: Mach...
3,What are the key advantages of decision trees ...,"Based on the provided course materials, the ke...",35.8,[Decision Trees\n\nMIT 15.097 Course Notes Cyn...
4,Explain how Information Gain is calculated for...,Excellent questions! Let's break these down cl...,61.95,"[-section risk, etc.\n\nHow to build a decisio..."


#### 1.w pliku chroma-clean up, funkcjƒÖ print_chroma sprawdzamy czy dana konfigracja: rozmiar chunku, chunk overlap i model do embeddingow jest juz w chromie, jesli nie to ustwiamy odpowienie zmienne na railway i wywolujemy ingest

#### 2.jesli mamy dana kolekcje juz na chromie to ustaiwamy intresujace nas parametry na railway do testowania(sprawdz czy takiego pliku juz nie ma zeby nie powtarzaƒá) i zgodnie z ustawionymi parametrami nadajmy label: 

## na przyk≈Çad : RUN_LABEL = "chunk_512_overlap_100_voyage3lite_deepseek_v3_2_topk10_dist045" 

RUN_LABEL = "chunk_512_overlap_100_voyage3lite_deepseek_v3_2_topk10_dist045"   

df_results = run_notebook_test(  
    run_label=RUN_LABEL,  
    api_url=API_URL,  
    course_id=TEST_COURSE_ID,  
    input_file=INPUT_FILE,  
    moodle_token=MOODLE_TOKEN  
)

if df_results is not None:  
    print(f"\n≈öredni czas odpowiedzi: {df_results['latency'].mean():.2f}s")  
    display(df_results[['question', 'answer', 'latency', 'contexts']].head())  