# Introduction
This notebook is used to experiment with different prompts for the question generation model that uses the chatGPT API from open.ai.
First a helper function is created to call the API with the provided prompt. For this prompt different techniques are tried out and evaluated to find the best performing prompt template.

In [1]:
import os
from dotenv import load_dotenv
import openai
from src.datageneration.extractor import extract_text_without_image
from pypdfium2 import PdfDocument
import pandas as pd
from sklearn.model_selection import train_test_split
from src.evaluation.eval_main import Metrics
import nltk
import time

nltk.download('wordnet')

load_dotenv()
openai.api_key = os.getenv("OPENAI-API-KEY")

def chat_gpt(prompt, temperature=0):
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=temperature
    )
    return completion.choices[0].message.content

[nltk_data] Downloading package wordnet to /Users/I516258/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


First we prepare the data for the evaluation.

In [27]:
data_df = pd.read_csv("../../../datasets/master/Master_Labeling_cleaned.csv", delimiter=';') # Read in csv
data_df

Unnamed: 0,PDF-name,Topic,Page number,Marked for processing,Includes image data,Includes formula,Question 1,Question 2,Question 3,Title of the slide,Type of question,Comment
0,ase_combined.pdf,Agile Software Engineering,1,No,No,No,,,,,,
1,ase_combined.pdf,Agile Software Engineering,2,No,No,No,,,,,,
2,ase_combined.pdf,Agile Software Engineering,3,No,No,No,,,,,,
3,ase_combined.pdf,Agile Software Engineering,4,No,No,No,,,,,,
4,ase_combined.pdf,Agile Software Engineering,5,No,No,No,,,,Cost of Software Failures,,
...,...,...,...,...,...,...,...,...,...,...,...,...
3942,it-security-all-slides_no_duplicates.pdf,IT-Security,592,Yes,Yes,No,What is the intuition of Differential Privacy?,,,Differential Privacy Intuition,,
3943,it-security-all-slides_no_duplicates.pdf,IT-Security,593,Yes,Yes,Yes,What is the definition of Differential Privacy?,,,Differential Privacy Definition (Simplified Ve...,,
3944,it-security-all-slides_no_duplicates.pdf,IT-Security,594,Yes,Yes,Yes,What is the parameter 𝜖 and the composition t...,,,On the Parameter 𝜺,,
3945,it-security-all-slides_no_duplicates.pdf,IT-Security,595,Yes,Yes,Yes,What is the Privacy Budget? What are challenges?,,,Privacy Budget,,


In [3]:
len(data_df)

3947

In [4]:
data_df["PDF-name"].unique() # check which pdfs are needed

array(['ase_combined.pdf', 'corporate_knowledge_management_combined.pdf',
       'decision_support_combined.pdf',
       'information_retrieval_combined.pdf', 'large_scale_combined.pdf',
       'leadership_combined.pdf', 'machine_learning_combined.pdf',
       'network_science_combined.pdf',
       'it-security-all-slides_no_duplicates.pdf'], dtype=object)

In [28]:
data_df = data_df.dropna(subset="Question 1").drop(columns=["Marked for processing", "Includes image data", "Includes formula", "Type of question", "Title of the slide", "Comment"]) # filter to include only the samples that have at least one questino, drop unnecessary slides
data_df

Unnamed: 0,PDF-name,Topic,Page number,Question 1,Question 2,Question 3
5,ase_combined.pdf,Agile Software Engineering,6,What is the definition of Verification and Val...,What are the objectives of Verification and Va...,
6,ase_combined.pdf,Agile Software Engineering,7,What are the goals of verification and validat...,On what does verification and validation depend?,
7,ase_combined.pdf,Agile Software Engineering,8,What is static and what is dynamic V&V?,How does static V&V differentiate from dynamic...,
8,ase_combined.pdf,Agile Software Engineering,9,How the relative cost of fixing defects behave...,,
9,ase_combined.pdf,Agile Software Engineering,10,What is model-driven development?,,
...,...,...,...,...,...,...
3942,it-security-all-slides_no_duplicates.pdf,IT-Security,592,What is the intuition of Differential Privacy?,,
3943,it-security-all-slides_no_duplicates.pdf,IT-Security,593,What is the definition of Differential Privacy?,,
3944,it-security-all-slides_no_duplicates.pdf,IT-Security,594,What is the parameter 𝜖 and the composition t...,,
3945,it-security-all-slides_no_duplicates.pdf,IT-Security,595,What is the Privacy Budget? What are challenges?,,


In [11]:
from src.image_to_text.data_preprocessing.util import extract_text
import os

# Extract the text from the pdf slides and associate them with a unique id, which consists of the pdf name and the page number of the text
extracted_content = pd.DataFrame(columns=['PDF-Name', 'Pagenumber', 'Page-Text', 'OCR-text'])
pdf_paths = [os.path.join("../../../datasets/master/slides/", pdf_slides) for pdf_slides in data_df["PDF-name"].unique()]
for pdf_path in pdf_paths:

    pdf_name = os.path.split(pdf_path)[-1]
    #allowed_page_numbers = data_df.loc[data_df["PDF-Name"] == pdf_name]["Page Number"].to_list() # page numbers of the current pdf in question which should be kept

    # extract text
    pdf = PdfDocument(pdf_path)
    text = extract_text_without_image(pdf.raw)

    for i in text:
        extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Pagenumber': i[0], 'Page-Text': i[1], 'OCR-text': i[2]}, ignore_index=True)

    # free memory to prevent kernel dying
    del text
    del pdf

100%|██████████| 244/244 [01:58<00:00,  2.07it/s]
  extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Pagenumber': i[0], 'Page-Text': i[1], 'OCR-text': i[2]}, ignore_index=True)
  extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Pagenumber': i[0], 'Page-Text': i[1], 'OCR-text': i[2]}, ignore_index=True)
  extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Pagenumber': i[0], 'Page-Text': i[1], 'OCR-text': i[2]}, ignore_index=True)
  extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Pagenumber': i[0], 'Page-Text': i[1], 'OCR-text': i[2]}, ignore_index=True)
  extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Pagenumber': i[0], 'Page-Text': i[1], 'OCR-text': i[2]}, ignore_index=True)
  extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Pagenumber': i[0], 'Page-Text': i[1], 'OCR-text': i[2]}, ignore_index=True)
  extracted_content = extracted_content.append({'PDF-Name': pdf_name,'Page

FileNotFoundError: /Users/I516258/Documents/GitHub/ankinator-flashcard-models/datasets/master/slides/it-security-all-slides_no_duplicates.pdf

In [29]:
file_path = "../../../datasets/master/extracted_text_content.csv"

# Save the DataFrame to the specified folder
#extracted_content.to_csv(file_path, index=False)

#Load the content from the folder
extracted_content = pd.read_csv(file_path)

extracted_content

Unnamed: 0,PDF-Name,Pagenumber,Page-Text,OCR-text
0,ase_combined.pdf,0,1\r\nSoftware Engineering Group 1\r\nAdvanced ...,Advanced Software\nEngineering\n\n1. Introduct...
1,ase_combined.pdf,1,2\r\nSoftware Engineering Group 2\r\nSoftware ...,Software Drives our Modern Civilisation\n\nwil...
2,ase_combined.pdf,2,3\r\nSoftware Engineering Group 3\r\nSpectacul...,Spectacular Software Failures (1/2)\n\nTHERAC-...
3,ase_combined.pdf,3,4\r\nSoftware Engineering Group 4\r\nSpectacul...,Spectacular Software Failures (2/2)\n\nNorthea...
4,ase_combined.pdf,4,5\r\nSoftware Engineering Group 5\r\nCost of S...,Cost of Software Failures\n\n= 2002: NIST repo...
...,...,...,...,...
3942,it-security-all-slides_no_duplicates.pdf,591,Differential Privacy\r\nIntuition\r\n• Assume ...,te\nGee 5 UNIVERSITY\n\nDifferential Privacy 8...
3943,it-security-all-slides_no_duplicates.pdf,592,Differential Privacy\r\nDefinition (Simplified...,ol\nDifferential Privacy Be) OF MANNHEIM\n\n— ...
3944,it-security-all-slides_no_duplicates.pdf,593,On the Parameter \r\nPr ଵ = ≤ ఢ ⋅ Pr ଶ = \r...,te\n\nSeta UNIVERSITY\nOn the Parameter €-2o B...
3945,it-security-all-slides_no_duplicates.pdf,594,Privacy Budget\r\n• Defines an upper bound on ...,Privacy Budget\n\n* Defines an upper bound on ...


In [30]:
# merge both dataframes to delete unwanted rows
merged_df = data_df.merge(extracted_content, left_index=True, right_index=True, how='left')
merged_df = merged_df.drop(columns=["PDF-Name", "Pagenumber"])
merged_df.reset_index(inplace=True, drop=True)
merged_df

Unnamed: 0,PDF-name,Topic,Page number,Question 1,Question 2,Question 3,Page-Text,OCR-text
0,ase_combined.pdf,Agile Software Engineering,6,What is the definition of Verification and Val...,What are the objectives of Verification and Va...,,6\r\nSoftware Engineering Group 6\r\n■ Verific...,Verification and Validation (V&V)\n\n= Verific...
1,ase_combined.pdf,Agile Software Engineering,7,What are the goals of verification and validat...,On what does verification and validation depend?,,7\r\nSoftware Engineering Group 7\r\nV&V Goals...,V&V Goals\n\n= Verification and validation sho...
2,ase_combined.pdf,Agile Software Engineering,8,What is static and what is dynamic V&V?,How does static V&V differentiate from dynamic...,,8\r\nSoftware Engineering Group 8\r\nStatic ve...,Static versus Dynamic V&V\n\n= Software inspec...
3,ase_combined.pdf,Agile Software Engineering,9,How the relative cost of fixing defects behave...,,,9\r\nSoftware Engineering Group 9\r\nRelative ...,Relative Cost of Fixing Defects\n\n=u The rela...
4,ase_combined.pdf,Agile Software Engineering,10,What is model-driven development?,,,10\r\nSoftware Engineering Group 10\r\nModel-D...,Model-Driven Development\n\nPlaces models at t...
...,...,...,...,...,...,...,...,...
2177,it-security-all-slides_no_duplicates.pdf,IT-Security,592,What is the intuition of Differential Privacy?,,,Differential Privacy\r\nIntuition\r\n• Assume ...,te\nGee 5 UNIVERSITY\n\nDifferential Privacy 8...
2178,it-security-all-slides_no_duplicates.pdf,IT-Security,593,What is the definition of Differential Privacy?,,,Differential Privacy\r\nDefinition (Simplified...,ol\nDifferential Privacy Be) OF MANNHEIM\n\n— ...
2179,it-security-all-slides_no_duplicates.pdf,IT-Security,594,What is the parameter 𝜖 and the composition t...,,,On the Parameter \r\nPr ଵ = ≤ ఢ ⋅ Pr ଶ = \r...,te\n\nSeta UNIVERSITY\nOn the Parameter €-2o B...
2180,it-security-all-slides_no_duplicates.pdf,IT-Security,595,What is the Privacy Budget? What are challenges?,,,Privacy Budget\r\n• Defines an upper bound on ...,Privacy Budget\n\n* Defines an upper bound on ...


In [31]:
# Split the DataFrame into train, validation, and test sets
master_train_val, master_test = train_test_split(merged_df, test_size=0.2, random_state=42)

print("Lenght of test set: ", len(master_test))
master_test

Lenght of test set:  437


Unnamed: 0,PDF-name,Topic,Page number,Question 1,Question 2,Question 3,Page-Text,OCR-text
282,corporate_knowledge_management_combined.pdf,Corporate Knowledge Management,253,Which views of a data cube can be used for dif...,,,Multidimensional Data Structure -\r\nCube\r\nL...,7. Knowledge retrieval in DW through Business ...
479,decision_support_combined.pdf,Decision Support,198,Explain Structural Properties.,,,Structural Properties\r\nLet S be an SPN and l...,Structural Properties\n\nLet S be an SPN and l...
1317,leadership_combined.pdf,Leadership and Motivation,132,What are the four component that form authenti...,,,Four components that form AL\r\n➢ Self-awarene...,ee UNIVERSITY\n\nFour components that form AL ...
1455,machine_learning_combined.pdf,Machine Learning,170,Explain empirical risk minimization.,,,Empirical risk minimization with log loss\r\n•...,Empirical risk minimization with log loss\n\ne...
507,decision_support_combined.pdf,Decision Support,308,What is the Framing Effect?,What is the Anchoring Effect?,,Universität Mannheim – Prof. Dr. Heiner Stucke...,More Effects\nee\n\n— framing effect:\n\n¢ Peo...
...,...,...,...,...,...,...,...,...
1859,it-security-all-slides_no_duplicates.pdf,IT-Security,164,What are general advices regarding potentially...,,,General Advices\r\n• Companies etc. usually do...,na\n—— Scho\nnm\n\nte\nGeneral Advices Be OP M...
2039,it-security-all-slides_no_duplicates.pdf,IT-Security,403,How is the ICV calculated in the AH Protocol i...,,,Authentication Header (AH) Protocol\r\nTranspo...,ol\nAuthentication Header (AH) Protocol SB OF ...
548,information_retrieval_combined.pdf,Information Retrieval,67,Why should posting sites be sorted?,,,"21\r\nIR & WS, Lecture 2: Boolean Retrieval an...",The merge\neS\n\n= If posting lists are sorted...
634,information_retrieval_combined.pdf,Information Retrieval,172,How does the ranking work when relevance judge...,,,"30\r\nIR & WS, Lecture 5: Probabilistic Inform...","Binary independence model\nEe\n"" Scenario #2: ..."


In [33]:
# Reset the index of the DataFrame
master_test.reset_index(inplace=True, drop=True)

# this stores now the possible input for the chatGPT model
content = master_test[["Topic", "Page-Text", "OCR-text"]]

# this stores the reference
references = master_test[["Question 1", "Question 2", "Question 3"]]

references.to_csv("./refs.csv")

In [34]:
len(references)

437

In [35]:
content

Unnamed: 0,Topic,Page-Text,OCR-text
0,Corporate Knowledge Management,Multidimensional Data Structure -\r\nCube\r\nL...,7. Knowledge retrieval in DW through Business ...
1,Decision Support,Structural Properties\r\nLet S be an SPN and l...,Structural Properties\n\nLet S be an SPN and l...
2,Leadership and Motivation,Four components that form AL\r\n➢ Self-awarene...,ee UNIVERSITY\n\nFour components that form AL ...
3,Machine Learning,Empirical risk minimization with log loss\r\n•...,Empirical risk minimization with log loss\n\ne...
4,Decision Support,Universität Mannheim – Prof. Dr. Heiner Stucke...,More Effects\nee\n\n— framing effect:\n\n¢ Peo...
...,...,...,...
432,IT-Security,General Advices\r\n• Companies etc. usually do...,na\n—— Scho\nnm\n\nte\nGeneral Advices Be OP M...
433,IT-Security,Authentication Header (AH) Protocol\r\nTranspo...,ol\nAuthentication Header (AH) Protocol SB OF ...
434,Information Retrieval,"21\r\nIR & WS, Lecture 2: Boolean Retrieval an...",The merge\neS\n\n= If posting lists are sorted...
435,Information Retrieval,"30\r\nIR & WS, Lecture 5: Probabilistic Inform...","Binary independence model\nEe\n"" Scenario #2: ..."


# Prompt Engineering
Having prepared everything it is possible to start with prompt engineering. It is started with simple prompts and continued with more complex prompts.

| **#** | **Prompt**                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | **Techniques** |
|-------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|
| 1     | Generate a question in a flashcard style for the content delimited by triple backticks. ```{row['Page-Text']}```                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |                |
| 2     | Generate a question in a flashcard style for the content delimited by triple backticks. Take into account how exam questions are normally formulated and formulate the question accordingly. ```{row['Page-Text']}```                                                                                                                                                                                                                                                                                                                                                                                                  |                |
| 3     | Generate a question in a flashcard style for the content delimited by triple backticks. When there are examples do not focus on their specifics but try to cover the overarching concept or idea. ```{row['Page-Text']}```                                                                                                                                                                                                                                                                                                                                                                                             |                |
| 4     | Generate a question in a flashcard style for the content delimited by triple backticks. Focus on concepts, definitions and key-words. Take into account how exam questions are normally formulated and formulate the question accordingly. When there are examples do not focus on their specifics but try to cover the overarching concept or idea. ```{row['Page-Text']}```                                                                                                                                                                                                                                          |                |
| 5     | You are a bot to support in the generation of flashcards from lecture slides. You are provided with two inputs. The first input delimited by triple backticks is the text that is copied from the slides. The second input delimited by triple quotation marks is retrieved with an OCR tool to extract all text from a slide. Follow the below process: 1. Step: Compare the first input with the second input to retrieve the relevant information 2. Step: Generate a question for this information in a flashcard style Only return the generated question. ```{row['Page-Text']}``` \"\"\"{row['OCR-text']}\"\"\" |                |
| 6     | Generate a question in a flashcard style for the content delimited by triple backticks. ```{row['Page-Text']}``` Follow a similar style for generating the question as in this two examples: 1) Input: {goldstandard_train_val.loc[0, 'Page-Text']}, question: {goldstandard_train_val.loc[0, 'Question']} 2) Input: {goldstandard_train_val.loc[1, 'Page-Text']}, question: {goldstandard_train_val.loc[1, 'Question']}                                                                                                                                                                                               |                |
| 7     | Generate a question in a flashcard style for the content delimited by triple backticks. Take into account how exam questions are normally formulated and formulate the question accordingly. ```{row['Page-Text']}``` Follow a similar style for generating the question as in this two examples: 1) Input: {goldstandard_train_val.loc[0, 'Page-Text']}, question: {goldstandard_train_val.loc[0, 'Question']} 2) Input: {goldstandard_train_val.loc[1, 'Page-Text']}, question: {goldstandard_train_val.loc[1, 'Question']}                                                                                          |                |

## Zero-Shot Prompting

In [19]:
refs = []
for i, row in master_test.iterrows():
    refs.append((i, [row['Question 1'], '' if pd.isna(row['Question 2']) else row['Question 2'], '' if pd.isna(row['Question 3']) else row['Question 3']]))
refs

[(0,
  ['Which views of a data cube can be used for different organizational roles? ',
   '',
   '']),
 (1, ['Explain Structural Properties.', '', '']),
 (2,
  ['What are the four component that form authentic leadership (AL)?',
   '',
   '']),
 (3, ['Explain empirical risk minimization.', '', '']),
 (4, ['What is the Framing Effect?', 'What is the Anchoring Effect?', '']),
 (5, ['What is the terminology to represent preferences?', '', '']),
 (6,
  ['Which conditions does a probability Distribution satisfy?',
   'What is a probability distribution?',
   '']),
 (7, ['How does WEP aim to provide integrity?', '', '']),
 (8, ['What is pretexting?', '', '']),
 (9, ['What are the three main Challenges of the Bitcoin Network?', '', '']),
 (10, ['What is the attack strategy of attackers?', '', '']),
 (11, ['What are generic component models?', '', '']),
 (12, ['What is the collection frequency?', '', '']),
 (13, ['How is a Virus defined?', '', '']),
 (14, ['What is the cosine similarity?', '',

In [38]:
model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if index >=0:
        prompt = f"""
        Generate a question in a flashcard style for the content delimited by triple backticks.
        ```{row['Page-Text']}```
        """
        question = chat_gpt(prompt)
        model_results.append((index, [question]))
        print("Generated question for index ", index, ": ", question)
        time.sleep(1)


print(model_results)

Generated question for index  0 :  What are the different views that can be obtained from a multidimensional data structure?
Generated question for index  1 :  What are the structural properties of a Sum-Product Network (SPN)?
Generated question for index  2 :  What are the four components that form Authentic Leadership (AL)?
Generated question for index  3 :  What is the objective function used in empirical risk minimization with log loss?
Generated question for index  4 :  What is the framing effect and how does it influence people's preferences?
Generated question for index  5 :  What is the topic of slide 17 in Prof. Dr. Heiner Stuckenschmidt's lecture at Universität Mannheim?
Generated question for index  6 :  What is the topic of slide 16 in Prof. Dr. Heiner Stuckenschmidt's lecture at Universität Mannheim?
Generated question for index  7 :  What is the purpose of using a Cyclic Redundancy Code (CRC) in WEP for security?
Generated question for index  8 :  What is pretexting and h

In [15]:
# Save model_results to disk
#df_model_results = pd.DataFrame(model_results, columns=["Index", "Question"])
#df_model_results.to_csv("./model_results/master/prompt1.csv", index=False)

# restore model_results
df_model_results = pd.read_csv("./model_results/master/prompt1.csv")
model_results = [(row['Index'], [row['Question']]) for _, row in df_model_results.iterrows()]

In [18]:
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs),
    index=["ChatGPT-Prompt1"]
)
print(result)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/I516258/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                 PUNCTUATION  TYPOGRAPHY  CASING  TYPOS  MISC  REDUNDANCY  \
ChatGPT-Prompt1          879         103       1      1     1           1   

                 COLLOCATIONS  rouge1_fmeasure  rouge1_precision  \
ChatGPT-Prompt1             1         0.400426           0.34423   

                 rouge1_recall  ...  min_r    avg_f1    max_f1  min_f1  \
ChatGPT-Prompt1       0.538302  ...    0.0  0.325862  0.823529     0.0   

                 avg_cos_sim  max_cos_sim  min_cos_sim  avg_sem_meteor  \
ChatGPT-Prompt1     0.537373     0.870126    -0.036201        0.372972   

                 max_sem_meteor  min_sem_meteor  
ChatGPT-Prompt1        0.876779             0.0  

[1 rows x 34 columns]


In [19]:
model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if index >=0:
        prompt = f"""
        Generate a question in a flashcard style for the content delimited by triple backticks.
        Take into account how exam questions are normally formulated and formulate the question accordingly.
        ```{row['Page-Text']}```
        """
        question = chat_gpt(prompt)
        model_results.append((index, [question]))
        print("Generated question for index ", index, ": ", question)
        time.sleep(1)

print(model_results)

Generated question for index  0 :  What is the Controller's view in the context of multidimensional data structure?
Generated question for index  1 :  What are the structural properties of a complete and decomposable Sum-Product Network (SPN)?
Generated question for index  2 :  What are the four components that form Authentic Leadership (AL)?
Generated question for index  3 :  What is the objective function used in empirical risk minimization with log loss for discriminative classifiers?
Generated question for index  4 :  What are the framing effect and the anchoring effect?
Generated question for index  5 :  What are the constraints on preferences according to Prof. Dr. Heiner Stuckenschmidt's lecture at Universität Mannheim?
Generated question for index  6 :  What is the topic discussed on slide 16 of Prof. Dr. Heiner Stuckenschmidt's lecture at Universität Mannheim?
Generated question for index  7 :  What is the purpose of using a Cyclic Redundancy Code (CRC) in the context of secur

In [20]:
# Save model_results to disk
df_model_results = pd.DataFrame(model_results, columns=["Index", "Question"])
df_model_results.to_csv("./model_results/master/prompt2.csv", index=False)

# # restore model_results
# df_model_results = pd.read_csv("./model_results/prompt2.csv")
# model_results = [(row["Index"], [row["Question"]]) for _, row in df_model_results.iterrows()]

In [21]:
model_results

[(0,
  ["What is the Controller's view in the context of multidimensional data structure?"]),
 (1,
  ['What are the structural properties of a complete and decomposable Sum-Product Network (SPN)?']),
 (2, ['What are the four components that form Authentic Leadership (AL)?']),
 (3,
  ['What is the objective function used in empirical risk minimization with log loss for discriminative classifiers?']),
 (4, ['What are the framing effect and the anchoring effect?']),
 (5,
  ["What are the constraints on preferences according to Prof. Dr. Heiner Stuckenschmidt's lecture at Universität Mannheim?"]),
 (6,
  ["What is the topic discussed on slide 16 of Prof. Dr. Heiner Stuckenschmidt's lecture at Universität Mannheim?"]),
 (7,
  ['What is the purpose of using a Cyclic Redundancy Code (CRC) in the context of security?']),
 (8,
  ['What is pretexting and how can attackers use it to deceive their victims?']),
 (9,
  ['What are some challenges in IT-Security related to representing wallets and tra

In [22]:
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs),
    index=["ChatGPT-Prompt2"]
)
print(result)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/I516258/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                 PUNCTUATION  REDUNDANCY  TYPOGRAPHY  GRAMMAR  MISC  \
ChatGPT-Prompt2           66           4          31        1     1   

                 COLLOCATIONS  rouge1_fmeasure  rouge1_precision  \
ChatGPT-Prompt2             1         0.380047          0.314394   

                 rouge1_recall  rouge2_fmeasure  ...  min_r   avg_f1  max_f1  \
ChatGPT-Prompt2       0.552623         0.205541  ...    0.0  0.40836     1.0   

                 min_f1  avg_cos_sim  max_cos_sim  min_cos_sim  \
ChatGPT-Prompt2     0.0     0.653568     0.997026    -0.016295   

                 avg_sem_meteor  max_sem_meteor  min_sem_meteor  
ChatGPT-Prompt2        0.447554        0.999772             0.0  

[1 rows x 33 columns]


In [None]:
model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if (index == 3):
        break
    prompt = f"""
    Generate a question in a flashcard style for the content delimited by triple backticks.
    When there are examples do not focus on their specifics but try to cover the overarching concept or idea.
    ```{row['Page-Text']}```
    """
    model_results.append((index, [chat_gpt(prompt)]))

print(model_results)
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs[:3]),
    index=["ChatGPT-Prompt3"]
)
result

In [None]:
model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if (index == 3):
        break
    prompt = f"""
    Generate a question in a flashcard style for the content delimited by triple backticks.
    Focus on concepts, definitions and key-words.
    Take into account how exam questions are normally formulated and formulate the question accordingly.
    When there are examples do not focus on their specifics but try to cover the overarching concept or idea.
    ```{row['Page-Text']}```
    """
    model_results.append((index, [chat_gpt(prompt)]))

print(model_results)
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs[:3]),
    index=["ChatGPT-Prompt4"]
)
result

In [None]:
model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if (index == 3):
        break
    prompt = f"""
    You are a bot to support in the generation of flashcards from lecture slides.
    You are provided with two inputs. The first input delimited by triple backticks is the text that is copied from the slides.
    The second input delimited by triple quotation marks is retrieved with an OCR tool to extract all text from a slide.
    Follow the below process:
    1. Step: Compare the first input with the second input to retrieve the relevant information
    2. Step: Generate a question for this information in a flashcard style
    Only return the generated question.
    ```{row['Page-Text']}```
    \"\"\"{row['OCR-text']}\"\"\"
    """
    model_results.append((index, [chat_gpt(prompt)]))


print(model_results)
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs[:3]),
    index=["ChatGPT-Prompt5"]
)
result

## Few-Shot

In [16]:
#model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if index >=348:
        prompt = f"""
        Generate a question in a flashcard style for the content delimited by triple backticks.
        Follow a similar style for generating the question as in this three examples:
        Example 1) Input: {master_train_val.loc[0, 'Page-Text']}, question: {master_train_val.loc[0, 'Question 1']}
        Example 2) Input: {master_train_val.loc[1, 'Page-Text']}, question: {master_train_val.loc[1, 'Question 1']}
        Example 3) Input: {master_train_val.loc[2, 'Page-Text']}, question: {master_train_val.loc[2, 'Question 1']}

        ```{row['Page-Text']}```
        """
        question = chat_gpt(prompt)
        model_results.append((index, [question]))
        print("Generated question for index ", index, ": ", question)
        time.sleep(1)

print(model_results)

Generated question for index  348 :  Integer Under-/Overflow, question: What is Integer Under-/Overflow and what are the potential side effects if no precautions are taken?
Generated question for index  349 :  Problem with RSA Signature Scheme, question: What are the two variants of RSA signature scheme used in TLS and how do they differ from the plain RSA signature scheme?
Generated question for index  350 :  Type 2: Trojan Threat of Destruction, question: What is the characteristic of Trojan Threat of Destruction (scareware) and what is an example of it?
Generated question for index  351 :  Meltdown Attack, question: What is the goal of the Meltdown Attack and how does it exploit modern CPUs?
Generated question for index  352 :  Boolean Query Optimization, question: What is the algorithm for optimizing a Boolean query and obtaining conjunction postings?
Generated question for index  353 :  Generative Models, question: What are some characteristics and applications of generative model

In [17]:
# Save model_results to disk
df_model_results = pd.DataFrame(model_results, columns=["Index", "Question"])
df_model_results.to_csv("./model_results/master/prompt6.csv", index=False)

# # restore model_results
# df_model_results = pd.read_csv("./model_results/prompt6.csv")
# model_results = [(row["Index"], [row["Question"]]) for _, row in df_model_results.iterrows()]

In [20]:
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs),
    index=["ChatGPT-Prompt6"]
)
print(result)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/I516258/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                 PUNCTUATION  REDUNDANCY  GRAMMAR  MISC  rouge1_fmeasure  \
ChatGPT-Prompt6          154           2        2     2         0.357437   

                 rouge1_precision  rouge1_recall  rouge2_fmeasure  \
ChatGPT-Prompt6          0.274812        0.58597         0.198176   

                 rouge2_precision  rouge2_recall  ...  min_r    avg_f1  \
ChatGPT-Prompt6          0.149621       0.350631  ...    0.0  0.412628   

                   max_f1  min_f1  avg_cos_sim  max_cos_sim  min_cos_sim  \
ChatGPT-Prompt6  0.941176     0.0      0.68714     0.970361     0.019658   

                 avg_sem_meteor  max_sem_meteor  min_sem_meteor  
ChatGPT-Prompt6        0.451798        0.983584             0.0  

[1 rows x 31 columns]


In [23]:
#model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if index >=115:
        prompt = f"""
        Generate a question in a flashcard style for the content delimited by triple backticks.
        Take into account how exam questions are normally formulated and formulate the question accordingly.
        Only output the question.
        Follow a similar style for generating the question as in this two examples:
        Example 1) Input: {master_train_val.loc[0, 'Page-Text']}, question: {master_train_val.loc[0, 'Question 1']}
        Example 2) Input: {master_train_val.loc[1, 'Page-Text']}, question: {master_train_val.loc[1, 'Question 1']}
        Example 3) Input: {master_train_val.loc[2, 'Page-Text']}, question: {master_train_val.loc[2, 'Question 1']}
        ```{row['Page-Text']}```
        """
        question = chat_gpt(prompt)
        model_results.append((index, [question]))
        print("Generated question for index ", index, ": ", question)
        #time.sleep(1)

print(model_results)

Generated question for index  115 :  Stage 3: Exploitation, question: What are the six Principles of Influence according to Robert Cialdini?
Generated question for index  116 :  Logistic regression, question: How does logistic regression assume the relationship between the input and the log odds on y?
Generated question for index  117 :  What is the type of attack that involves encrypting data and demanding ransom from the victim?
Generated question for index  118 :  Beta-binomial model, question: What is the Beta-binomial model and why is it important to study?
Generated question for index  119 :  Bayes' rule, question: What is Bayes' rule and what is its significance in Bayesian inference?
Generated question for index  120 :  Origin 2: Theories of organizational learning (1/2), question: What are the aspects in which organizational learning can alter the knowledge base?
Generated question for index  121 :  What is the definition of empirical risk minimization and what is the risk ass

In [41]:
# Save model_results to disk
#df_model_results = pd.DataFrame(model_results, columns=["Index", "Question"])
#df_model_results.to_csv("./model_results/master/prompt7.csv", index=False)

# # restore model_results
df_model_results = pd.read_csv("./model_results/master/prompt7.csv")
model_results = [(row["Index"], [row["Question"]]) for _, row in df_model_results.iterrows()]

In [42]:
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs),
    index=["ChatGPT-Prompt7"]
)
print(result)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/I516258/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                 PUNCTUATION  REDUNDANCY  GRAMMAR  TYPOS  MISC  COLLOCATIONS  \
ChatGPT-Prompt7          432           1        4      1     1             1   

                 rouge1_fmeasure  rouge1_precision  rouge1_recall  \
ChatGPT-Prompt7          0.36601          0.288644       0.577414   

                 rouge2_fmeasure  ...  min_r    avg_f1  max_f1  min_f1  \
ChatGPT-Prompt7         0.201854  ...    0.0  0.366995   0.875     0.0   

                 avg_cos_sim  max_cos_sim  min_cos_sim  avg_sem_meteor  \
ChatGPT-Prompt7     0.619565     0.947759      0.02409         0.41395   

                 max_sem_meteor  min_sem_meteor  
ChatGPT-Prompt7        0.972045             0.0  

[1 rows x 33 columns]


In [37]:
model_results = []
# the chatGPT API is called and results are stored
for index, row in content.iterrows():
    if index >=0:
        prompt = f"""
        Generate a question in a flashcard style for the content delimited by triple backticks.
        The content is from the lecture {row['Topic']}.
        Follow a similar style for generating the question as in this two examples:
        Example 1) Input: {master_train_val.loc[0, 'Page-Text']}, question: {master_train_val.loc[0, 'Question 1']}
        Example 2) Input: {master_train_val.loc[1, 'Page-Text']}, question: {master_train_val.loc[1, 'Question 1']}
        Example 3) Input: {master_train_val.loc[2, 'Page-Text']}, question: {master_train_val.loc[2, 'Question 1']}
        Only output the plain question.
        ```{row['Page-Text']}```
        """
        question = chat_gpt(prompt)
        model_results.append((index, [question]))
        print("Generated question for index ", index, ": ", question)
        #time.sleep(1)

print(model_results)

Generated question for index  0 :  Multidimensional Data Structure - Cube, question: What is a multidimensional data structure and what is an example of it in the given content?
Generated question for index  1 :  Structural Properties, question: What are the three properties of a complete and decomposable SPN?
Generated question for index  2 :  What are the four components that form AL (Authentic Leadership)?
Generated question for index  3 :  What is the objective of empirical risk minimization with log loss and how is it related to MLE?
Generated question for index  4 :  What are the framing effect and the anchoring effect?
Generated question for index  5 :  What is the topic of slide 17 in the lecture on Decision Support?
Generated question for index  6 :  Probability Distribution, question: What is the topic of slide 16?
Generated question for index  7 :  What is the importance of integrity in IT-Security and how does WEP aim to provide security in terms of integrity?
Generated que

In [38]:
# Save model_results to disk
df_model_results = pd.DataFrame(model_results, columns=["Index", "Question"])
df_model_results.to_csv("./model_results/master/prompt8.csv", index=False)

# # restore model_results
# df_model_results = pd.read_csv("./model_results/prompt8.csv")
# model_results = [(row["Index"], [row["Question"]]) for _, row in df_model_results.iterrows()]

In [39]:
# Performance is evaluated
metrics = Metrics(save_to_file=True)
result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=refs),
    index=["ChatGPT-Prompt8"]
)
print(result)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/I516258/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                 PUNCTUATION  TYPOS  GRAMMAR  MISC  CASING  COLLOCATIONS  \
ChatGPT-Prompt8          106      1        1     2       1             1   

                 rouge1_fmeasure  rouge1_precision  rouge1_recall  \
ChatGPT-Prompt8         0.386787          0.320903       0.565459   

                 rouge2_fmeasure  ...  min_r    avg_f1  max_f1  min_f1  \
ChatGPT-Prompt8         0.217809  ...    0.0  0.426167     1.0     0.0   

                 avg_cos_sim  max_cos_sim  min_cos_sim  avg_sem_meteor  \
ChatGPT-Prompt8     0.672402          1.0     -0.04231        0.458501   

                 max_sem_meteor  min_sem_meteor  
ChatGPT-Prompt8        0.999898             0.0  

[1 rows x 33 columns]
