# baseline creation
This notebook is intended to create a baseline for the self-labeled dataset.

In [2]:
import pandas as pd
import random
from IPython.display import display

In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [4]:
# load data set
df = pd.read_csv('./../../datasets/baseline_creation.csv', delimiter = ";")

display(df)

Unnamed: 0,PDF-Name,Topic,Page Number,Marked for processing,Includes Image Data,Includes formula,Question 1,Question 2,Question 3,Title of the slide,Type of Question,Comment
0,ase_combined.pdf,Agile Software Engineering,1,No,No,No,,,,,,
1,ase_combined.pdf,Agile Software Engineering,2,No,No,No,,,,,,
2,ase_combined.pdf,Agile Software Engineering,3,No,No,No,,,,,,
3,ase_combined.pdf,Agile Software Engineering,4,No,No,No,,,,,,
4,ase_combined.pdf,Agile Software Engineering,5,No,No,No,,,,Cost of Software Failures,,
5,ase_combined.pdf,Agile Software Engineering,6,Yes,No,No,What is the definition of Verification and Val...,What are the objectives of Verification and Va...,,Verification and Validation (V&V),Knowledge Question,
6,ase_combined.pdf,Agile Software Engineering,7,Yes,No,No,What are the goals of verification and validat...,On what does verification and validation depend?,,V&V Goals,Knowledge Question,
7,ase_combined.pdf,Agile Software Engineering,8,Yes,Yes,No,What is static and what is dynamic V&V?,How does static V&V differentiate from dynamic...,,Static versus Dynamic V&V,Knowledge Question,
8,ase_combined.pdf,Agile Software Engineering,9,Yes,Yes,No,How the relative cost of fixing defects behave...,,,Relative Cost of Fixing Defects,Knowledge Question,
9,ase_combined.pdf,Agile Software Engineering,10,Yes,Yes,No,What is model-driven development?,,,Model-Driven Development,Knowledge Question,


In [5]:
# Boolean indexing to filter out specific values from 'feature_column'
filtered_df = df[(df['Marked for processing'] != 'No') & (~df['Marked for processing'].isna())]


# Display the filtered DataFrame
display(filtered_df)

Unnamed: 0,PDF-Name,Topic,Page Number,Marked for processing,Includes Image Data,Includes formula,Question 1,Question 2,Question 3,Title of the slide,Type of Question,Comment
5,ase_combined.pdf,Agile Software Engineering,6,Yes,No,No,What is the definition of Verification and Val...,What are the objectives of Verification and Va...,,Verification and Validation (V&V),Knowledge Question,
6,ase_combined.pdf,Agile Software Engineering,7,Yes,No,No,What are the goals of verification and validat...,On what does verification and validation depend?,,V&V Goals,Knowledge Question,
7,ase_combined.pdf,Agile Software Engineering,8,Yes,Yes,No,What is static and what is dynamic V&V?,How does static V&V differentiate from dynamic...,,Static versus Dynamic V&V,Knowledge Question,
8,ase_combined.pdf,Agile Software Engineering,9,Yes,Yes,No,How the relative cost of fixing defects behave...,,,Relative Cost of Fixing Defects,Knowledge Question,
9,ase_combined.pdf,Agile Software Engineering,10,Yes,Yes,No,What is model-driven development?,,,Model-Driven Development,Knowledge Question,
10,ase_combined.pdf,Agile Software Engineering,11,Yes,Yes,No,What is the systematic view-based approach?,,,Systematic View-based Approach,Knowledge Question,
14,ase_combined.pdf,Agile Software Engineering,15,Yes,No,No,What is the definition of software testing?,What is the definition of software testing and...,What types of software testing exist?,Types of testing,Knowledge Question,
15,ase_combined.pdf,Agile Software Engineering,16,Yes,No,No,What additional types of testing exist?,,,Types of Testing (cont.),Knowledge Question,
16,ase_combined.pdf,Agile Software Engineering,17,Yes,No,No,What are the levels of testing?,,,Testing Levels,Knowledge Question,
17,ase_combined.pdf,Agile Software Engineering,18,Yes,No,No,What is defect testing and how is it basically...,What is the definition of defect testing?,,Defect Testing,Knowledge Question,


In [6]:
# create set of possible question types

question_types = ["What are {}?", "What is {}?", "How is the definition of {}?", "How does {} work?", "How is {} defined?", "What is the idea of {}?", "What do you know about {}?"]

In [7]:
def create_baseline_question(array_of_strings, dataframe, loop_column_name):
    """
    Function to add the baseline question based on the title of the slide.

    Parameters:
        - array_of_strings (list): A list of strings containing placeholders.
        - dataframe (pd.DataFrame): The DataFrame to which the placeholder column will be added.
        - loop_column_name (str): The name of the column in the DataFrame used for the loop.

    Returns:
        - pd.DataFrame: The DataFrame with an additional Baseline Question column.
    """
    # Create an empty list to store the values for the baseline questions
    baseline_questions = []

    # Iterate through each row of the DataFrame
    for index, row in dataframe.iterrows():
        # Select a random placeholder string from the array
        random_placeholder = random.choice(array_of_strings)

        # Get the current value of the loop_column_name for this row
        loop_value = row[loop_column_name]

        # Combine the random placeholder string with the loop_value
        placeholder_value = random_placeholder.format(loop_value)

        # Append the placeholder_value to the list
        baseline_questions.append(placeholder_value)

    # Add the new placeholder column to the DataFrame
    dataframe['Baseline Question'] = baseline_questions

    return dataframe

In [8]:
df_baseline = create_baseline_question(question_types, filtered_df, "Title of the slide")

display(df_baseline)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe['Baseline Question'] = baseline_questions


Unnamed: 0,PDF-Name,Topic,Page Number,Marked for processing,Includes Image Data,Includes formula,Question 1,Question 2,Question 3,Title of the slide,Type of Question,Comment,Baseline Question
5,ase_combined.pdf,Agile Software Engineering,6,Yes,No,No,What is the definition of Verification and Val...,What are the objectives of Verification and Va...,,Verification and Validation (V&V),Knowledge Question,,How does Verification and Validation (V&V) work?
6,ase_combined.pdf,Agile Software Engineering,7,Yes,No,No,What are the goals of verification and validat...,On what does verification and validation depend?,,V&V Goals,Knowledge Question,,How is the definition of V&V Goals?
7,ase_combined.pdf,Agile Software Engineering,8,Yes,Yes,No,What is static and what is dynamic V&V?,How does static V&V differentiate from dynamic...,,Static versus Dynamic V&V,Knowledge Question,,What is Static versus Dynamic V&V?
8,ase_combined.pdf,Agile Software Engineering,9,Yes,Yes,No,How the relative cost of fixing defects behave...,,,Relative Cost of Fixing Defects,Knowledge Question,,How does Relative Cost of Fixing Defects work?
9,ase_combined.pdf,Agile Software Engineering,10,Yes,Yes,No,What is model-driven development?,,,Model-Driven Development,Knowledge Question,,What is the idea of Model-Driven Development?
10,ase_combined.pdf,Agile Software Engineering,11,Yes,Yes,No,What is the systematic view-based approach?,,,Systematic View-based Approach,Knowledge Question,,What is Systematic View-based Approach?
14,ase_combined.pdf,Agile Software Engineering,15,Yes,No,No,What is the definition of software testing?,What is the definition of software testing and...,What types of software testing exist?,Types of testing,Knowledge Question,,What do you know about Types of testing?
15,ase_combined.pdf,Agile Software Engineering,16,Yes,No,No,What additional types of testing exist?,,,Types of Testing (cont.),Knowledge Question,,What is Types of Testing (cont.)?
16,ase_combined.pdf,Agile Software Engineering,17,Yes,No,No,What are the levels of testing?,,,Testing Levels,Knowledge Question,,What is Testing Levels?
17,ase_combined.pdf,Agile Software Engineering,18,Yes,No,No,What is defect testing and how is it basically...,What is the definition of defect testing?,,Defect Testing,Knowledge Question,,How is the definition of Defect Testing?


In [9]:
# store the dataframe to a csv file
df_baseline.to_csv("./../../datasets/baseline.csv", index=False)