In [13]:
import pandas as pd
import re

def extract_links_from_text(column_value, is_chatbot=False):
    """Extract links from a text column, handling human and chatbot formats."""
    if pd.isna(column_value):
        return []
    if is_chatbot:
        return re.findall(r'Information taken from: (https?://[^\s]+)', column_value)
    return [link[0] or link[1] for link in re.findall(r'\[.*?\]\((.*?)\)|([^;\s]+)', column_value)]

# True Positives (TP): Links that are present in both human_links and chatbot_links. - Intersection count
# False Positives (FP): Links that are present in chatbot_links but not in human_links. - Extra links used by chatbot
# False Negatives (FN): Links that are present in human_links but not in chatbot_links. - Missing links used by human but not chatbot

def ture_positives(human_links, chatbot_links):
    """Calculate the number of true positives. - Intersection count"""
    return len(set(human_links) & set(chatbot_links))

def false_positives(human_links, chatbot_links):
    """Calculate the number of false positives. - Extra links used by chatbot"""
    return len(set(chatbot_links) - set(human_links))

def false_negatives(human_links, chatbot_links):
    """Calculate the number of false negatives. - Missing links used by human but not chatbot"""
    return len(set(human_links) - set(chatbot_links))

def recall(human_links, chatbot_links):
    """Calculate the recall of chatbot links w.r.t. human links."""
    tp = ture_positives(human_links, chatbot_links)
    fn = false_negatives(human_links, chatbot_links)
    return tp / (tp + fn) if tp + fn != 0 else 0.0

def precision(human_links, chatbot_links):
    """Calculate the precision of chatbot links w.r.t. human links."""
    tp = ture_positives(human_links, chatbot_links)
    fp = false_positives(human_links, chatbot_links)
    return tp / (tp + fp) if tp + fp != 0 else 0.0


def f1_score(human_links, chatbot_links):
    """Calculate the F1-score with special handling for no human links."""
    if not human_links:  # No links provided by humans
        return 1.0 if not chatbot_links else 0.0  # Perfect if chatbot also uses no links
    rec = recall(human_links, chatbot_links)
    prec = precision(human_links, chatbot_links)
    if rec + prec == 0:
        return 0.0
    return 2 * (prec * rec) / (prec + rec)

# Load datasets
csv_english = "../../data/short_dataset_en.csv"
csv_german = "../../data/short_dataset_de.csv"

df_en = pd.read_csv(csv_english)
df_de = pd.read_csv(csv_german)

# Process datasets
def process_dataset(df, human_col, chatbot_col):
    df['human_links'] = df[human_col].apply(extract_links_from_text)
    df['chatbot_links'] = df[chatbot_col].apply(lambda x: extract_links_from_text(x, is_chatbot=True))
    df['true_positive'] = df.apply(lambda row: ture_positives(row['human_links'], row['chatbot_links']), axis=1)
    df['false_positive'] = df.apply(lambda row: false_positives(row['human_links'], row['chatbot_links']), axis=1)
    df['false_negative'] = df.apply(lambda row: false_negatives(row['human_links'], row['chatbot_links']), axis=1)
    df['recall'] = df.apply(lambda row: recall(row['human_links'], row['chatbot_links']), axis=1)
    df['precision'] = df.apply(lambda row: precision(row['human_links'], row['chatbot_links']), axis=1)
    df['f1_score'] = df.apply(lambda row: f1_score(row['human_links'], row['chatbot_links']), axis=1)

process_dataset(df_en, 'human_answer_links_en', 'chatbot_context_en')
process_dataset(df_de, 'human_answer_links_de', 'chatbot_context_de')

# Save results
output_english = "../../testing/english_comparison_results_1.csv"
output_german = "../../testing/german_comparison_results_1.csv"

df_en.to_csv(output_english, index=False)
df_de.to_csv(output_german, index=False)

# Print summary
for lang, df in [('English', df_en), ('German', df_de)]:
    print(f"\n{lang} Dataset Metrics:")
    print(f"Average Recall: {df['recall'].mean():.3f}")
    print(f"Average Precision: {df['precision'].mean():.3f}")
    print(f"Macro level F1-Score: {df['f1_score'].mean():.3f}")
    # Micro level F1-Score potentially bad since oone cannot take into account when both uses no links which is a perfect case
    #print(f"Micro level F1-Score: {df['true_positive'].sum() / (df['true_positive'].sum() + 1/2 * (df['false_positive'].sum() + df['false_negative'].sum())):.3f}")
    # Chatbot 4-link limit
    # calculate the percentage of times the human uses more than 4 links
    print(f"Percentage of times human uses more than 4 links: {len(df[df['human_links'].apply(len) > 4]) / len(df) * 100:.2f}%")
print(f"Results saved to {output_english} and {output_german}")



English Dataset Metrics:
Average Recall: 0.202
Average Precision: 0.098
Macro level F1-Score: 0.186
Percentage of times human uses more than 4 links: 6.06%

German Dataset Metrics:
Average Recall: 0.169
Average Precision: 0.083
Macro level F1-Score: 0.195
Percentage of times human uses more than 4 links: 6.06%
Results saved to ../../testing/english_comparison_results_1.csv and ../../testing/german_comparison_results_1.csv


In [8]:
df_en

Unnamed: 0,english_question_text_q,human_answer_en,human_answer_links_en,chatbot_answer_en,chatbot_context_en,chatbot_links_function_en,chatbot_query_en,latency_en,tool_run_used_en,original_question_language,...,participant_id_a,participant_id_q,human_links,chatbot_links,true_positive,false_positive,false_negative,recall,precision,f1_score
0,What jobs can you take up with which fields of...,This is a very general question. Please provid...,,I'm unable to provide information on job oppor...,,,,2.002039,no search tool used,de,...,2.0,119,[],[],0,0,0,0.0,0.0,0.0
1,Can you provide details about scholarships or ...,The University of Osnabrück offers a range of ...,"[Costs, Financing, and Scholarships](https://...",The University of Osnabrück offers various sch...,Information taken from: https://www.uni-osnabr...,https://www.uni-osnabrueck.de/studieninteressi...,Stipendien oder finanzielle Unterstützung für ...,41.063794,search tool used,en,...,4.0,120,[https://www.uni-osnabrueck.de/studieninteress...,[https://www.uni-osnabrueck.de/studieninteress...,1,3,0,1.0,0.25,0.4
2,What are the semester dates?,Semester data refers to the dates and deadline...,[University of Osnabrück - Dates and Deadlines...,Here are the semester dates for the University...,Information taken from: https://www.uni-osnabr...,https://www.uni-osnabrueck.de/studieninteressi...,Semestertermine Universität Osnabrück,7.789676,search tool used,en,...,11.0,103,[https://www.uni-osnabrueck.de/studieninteress...,[https://www.uni-osnabrueck.de/studieninteress...,1,3,0,1.0,0.25,0.4
3,Is the university easily accessible?,The University of Osnabrück is easily accessib...,[Site plan University of Osnabrück Arrival](ht...,The accessibility of the University of Osnabrü...,Information taken from: https://www.uni-osnabr...,https://www.uni-osnabrueck.de/studieninteressi...,Zugänglichkeit der Universität Osnabrück,11.616595,search tool used,de,...,10.0,99,[https://www.uni-osnabrueck.de/universitaet/la...,[https://www.uni-osnabrueck.de/studieninteress...,0,4,2,0.0,0.0,0.0
4,Which master's degree programs can I do with a...,"With a Bachelor's in Cognitive Science, the Un...",https://www.uni-osnabrueck.de/fileadmin/docume...,I can only provide information related to the ...,,,,1.66775,no search tool used,de,...,1.0,24,[https://www.uni-osnabrueck.de/fileadmin/docum...,[],0,0,4,0.0,0.0,0.0
5,Are there any special workshops or networking ...,I couldn't find any workshops or specific netw...,https://www.cogsci-network.org/de,I couldn't find specific information about spe...,Content not found,,Sonderworkshops oder Networking-Events für Mas...,4.212218,search tool used,en,...,5.0,53,[https://www.cogsci-network.org/de],[],0,0,1,0.0,0.0,0.0
6,Does the University of Osnabrück offer music a...,The University of Osnabrück offers a diverse r...,[University Sports Offerings](https://buchung....,"Yes, the University of Osnabrück offers music ...",Information taken from: https://www.uni-osnabr...,https://www.uni-osnabrueck.de/kommunikation/ko...,Bietet die Universität Osnabrück Musik- und Sp...,8.432385,search tool used,de,...,8.0,24,[https://buchung.zfh.uni-osnabrueck.de/angebot...,[https://www.uni-osnabrueck.de/kommunikation/k...,0,4,2,0.0,0.0,0.0
7,How can I optimize my learning.,"You can, for example, take advantage of the ""S...",[Studying PLuS](https://www.uni-osnabrueck.de/...,I'm unable to assist with questions about opti...,,,,1.303267,no search tool used,de,...,6.0,24,[https://www.uni-osnabrueck.de/studium/im-stud...,[],0,0,1,0.0,0.0,0.0
8,Do you need a NC for the German Studies teachi...,That depends on which degree program in the fi...,[Admission restrictions and application deadli...,"Yes, the German Studies teaching degree progra...",Information taken from: https://www.uni-osnabr...,https://www.uni-osnabrueck.de/studieninteressi...,NC für das Lehramtsstudium Germanistik,45.092591,search tool with summarization used,de,...,4.0,99,[https://www.uni-osnabrueck.de/studieninteress...,[https://www.uni-osnabrueck.de/studieninteress...,1,3,3,0.25,0.25,0.25
9,How can I sign up for sports?,The University of Osnabrück offers a wide rang...,[Website registration process](https://zfh.uni...,To sign up for sports at the University of Osn...,Information taken from: https://www.sport.uni-...,https://www.sport.uni-osnabrueck.de/studium/ha...,Wie kann ich mich für Sport anmelden?,14.397051,search tool used,en,...,10.0,83,[https://zfh.uni-osnabrueck.de/service/anmelde...,[https://www.sport.uni-osnabrueck.de/studium/h...,1,3,1,0.5,0.25,0.333333


In [43]:
import pandas as pd
import re

def extract_links_from_human(column_value):
    """Extract links from the human_answer_links_xx column."""
    if pd.isna(column_value):
        return []
    # Match both markdown and plain links
    links = re.findall(r'\[.*?\]\((.*?)\)|([^;\s]+)', column_value)
    return [link[0] or link[1] for link in links]

def extract_links_from_chatbot(column_value):
    """Extract links from the chatbot_context_xx column."""
    if pd.isna(column_value):
        return []
    # Find the "Information taken from:" section and extract links
    matches = re.findall(r'Information taken from: (https?://[^\s]+)', column_value)
    return matches

def compare_links(human_links, chatbot_links):
    """Check if all human links are used by the chatbot.

    Args:
        human_links (list): A list of links provided by humans.
        chatbot_links (list): A list of links extracted from the chatbot's context.

    Returns:
        bool: True if all human links are used by the chatbot, or if human_links is empty,
              chatbot_links must also be empty.
    """
    if not human_links:
        return not chatbot_links
    return set(human_links).issubset(set(chatbot_links))

def partial_overlap_links(human_links, chatbot_links):
    """Check if at least one human link is used by the chatbot.

    Args:
        human_links (list): A list of links provided by humans.
        chatbot_links (list): A list of links extracted from the chatbot's context.

    Returns:
        bool: True if at least one human link is used by the chatbot, or if human_links is empty,
              chatbot_links must also be empty.
    """
    if not human_links:
        return not chatbot_links
    return bool(set(human_links) & set(chatbot_links))

# Load datasets
csv_english = "../../data/short_dataset_en.csv"
csv_german = "../../data/short_dataset_de.csv"

df_en = pd.read_csv(csv_english)
df_de = pd.read_csv(csv_german)

# Process datasets for English
df_en['human_answer_links_list_en'] = df_en['human_answer_links_en'].apply(extract_links_from_human)
df_en['chatbot_links_context_en'] = df_en['chatbot_context_en'].apply(extract_links_from_chatbot)
df_en['human_links_used'] = df_en.apply(
    lambda row: compare_links(row['human_answer_links_list_en'], row['chatbot_links_context_en']),
    axis=1
)
df_en['partial_overlap_links'] = df_en.apply(
    lambda row: partial_overlap_links(row['human_answer_links_list_en'], row['chatbot_links_context_en']),
    axis=1
)

# Process datasets for German
df_de['human_answer_links_list_de'] = df_de['human_answer_links_de'].apply(extract_links_from_human)
df_de['chatbot_links_context_de'] = df_de['chatbot_context_de'].apply(extract_links_from_chatbot)
df_de['human_links_used'] = df_de.apply(
    lambda row: compare_links(row['human_answer_links_list_de'], row['chatbot_links_context_de']),
    axis=1
)
df_de['partial_overlap_links'] = df_de.apply(
    lambda row: partial_overlap_links(row['human_answer_links_list_de'], row['chatbot_links_context_de']),
    axis=1
)

# Calculate and print the percentage of all human links used per dataset
percentage_all_en = (df_en['human_links_used'].mean() * 100) if not df_en.empty else 0
percentage_all_de = (df_de['human_links_used'].mean() * 100) if not df_de.empty else 0

print(f"\nPercentage of all human links used in the English dataset: {percentage_all_en:.2f}%")
print(f"Percentage of all human links used in the German dataset: {percentage_all_de:.2f}%")

# Calculate and print the percentage of min one human link used per dataset
percentage_one_en = (df_en['partial_overlap_links'].mean() * 100) if not df_en.empty else 0
percentage_one_de = (df_de['partial_overlap_links'].mean() * 100) if not df_de.empty else 0

print(f"\nPercentage of min one human link used in the English dataset: {percentage_one_en:.2f}%")
print(f"Percentage of min one human link used in the German dataset: {percentage_one_de:.2f}%")


# Calculate and print the average number of links used
avg_human_links_en = df_en['human_answer_links_list_en'].apply(len).mean() if not df_en.empty else 0
avg_chatbot_links_en = df_en['chatbot_links_context_en'].apply(len).mean() if not df_en.empty else 0
avg_human_links_de = df_de['human_answer_links_list_de'].apply(len).mean() if not df_de.empty else 0
avg_chatbot_links_de = df_de['chatbot_links_context_de'].apply(len).mean() if not df_de.empty else 0

print(f"\nAverage number of links used in the English dataset:\n  Human: {avg_human_links_en:.2f}\n  Chatbot: {avg_chatbot_links_en:.2f}")
print(f"Average number of links used in the German dataset:\n  Human: {avg_human_links_de:.2f}\n  Chatbot: {avg_chatbot_links_de:.2f}")

# Percentage of times the chatbot used no context links
no_links_chatbot_en_percentage = (df_en['chatbot_links_context_en'].apply(len) == 0).mean() * 100
no_links_chatbot_de_percentage = (df_de['chatbot_links_context_de'].apply(len) == 0).mean() * 100
print(f"\nPercentage of times the chatbot used no context links:\n  English: {no_links_chatbot_en_percentage:.2f}%\n  German: {no_links_chatbot_de_percentage:.2f}%")

# Percentage of times the human used no context links
no_links_human_en_percentage = (df_en['human_answer_links_list_en'].apply(len) == 0).mean() * 100
no_links_human_de_percentage = (df_de['human_answer_links_list_de'].apply(len) == 0).mean() * 100
print(f"\nPercentage of times the human used no context links:\n  English: {no_links_human_en_percentage:.2f}%\n  German: {no_links_human_de_percentage:.2f}%")

# # Print results in a readable format
# print("\nEnglish Dataset Results:")
# for index, row in df_en.iterrows():
#     print(f"Row {index + 1} - Human Links: {row['human_answer_links_list_en']}\nChatbot Links: {row['chatbot_links_context_en']}\nHuman Links Used: {row['human_links_used']}\nPartial Overlap Links: {row['partial_overlap_links']}\n")

# print("\nGerman Dataset Results:")
# for index, row in df_de.iterrows():
#     print(f"Row {index + 1} - Human Links: {row['human_answer_links_list_de']}\nChatbot Links: {row['chatbot_links_context_de']}\nHuman Links Used: {row['human_links_used']}\nPartial Overlap Links: {row['partial_overlap_links']}\n")

# Save results
output_english = "../../testing/english_comparison_results.csv"
output_german = "../../testing/german_comparison_results.csv"

df_en.to_csv(output_english, index=False)
df_de.to_csv(output_german, index=False)

print(f"Results saved to {output_english} and {output_german}")



Percentage of all human links used in the English dataset: 15.15%
Percentage of all human links used in the German dataset: 18.18%

Percentage of min one human link used in the English dataset: 39.39%
Percentage of min one human link used in the German dataset: 39.39%

Average number of links used in the English dataset:
  Human: 1.97
  Chatbot: 2.64
Average number of links used in the German dataset:
  Human: 1.97
  Chatbot: 2.30

Percentage of times the chatbot used no context links:
  English: 33.33%
  German: 42.42%

Percentage of times the human used no context links:
  English: 15.15%
  German: 15.15%
Results saved to ../../testing/english_comparison_results.csv and ../../testing/german_comparison_results.csv


In [2]:
import pandas as pd
import re

def extract_links_from_human(column_value):
    """Extract links from the human_answer_links_xx column."""
    if pd.isna(column_value):
        return []
    # Match both markdown and plain links
    links = re.findall(r'\[.*?\]\((.*?)\)|([^;\s]+)', column_value)
    return [link[0] or link[1] for link in links]

def extract_links_from_chatbot(column_value):
    """Extract links from the chatbot_context_xx column."""
    if pd.isna(column_value):
        return []
    # Find the "Information taken from:" section and extract links
    matches = re.findall(r'Information taken from: (https?://[^\s]+)', column_value)
    return matches

def compare_links(human_links, chatbot_links):
    """Check if all human links are used by the chatbot.

    Args:
        human_links (list): A list of links provided by humans.
        chatbot_links (list): A list of links extracted from the chatbot's context.

    Returns:
        bool: True if all human links are used by the chatbot, or if human_links is empty,
              chatbot_links must also be empty.
    """
    if not human_links:
        return not chatbot_links
    return set(human_links).issubset(set(chatbot_links))

def partial_overlap_links(human_links, chatbot_links):
    """Calculate the ratio of human links used by the chatbot.

    Args:
        human_links (list): A list of links provided by humans.
        chatbot_links (list): A list of links extracted from the chatbot's context.

    Returns:
        float: Ratio of human links used by the chatbot, or 100% if both are empty.
    """
    if not human_links:
        return 1.0 if not chatbot_links else 0.0
    overlap_count = len(set(human_links) & set(chatbot_links))
    total_human_links = len(human_links)
    return (overlap_count / total_human_links)

# Load datasets
csv_english = "../../data/short_dataset_en.csv"
csv_german = "../../data/short_dataset_de.csv"

df_en = pd.read_csv(csv_english)
df_de = pd.read_csv(csv_german)

# Process datasets for English
df_en['human_answer_links_list_en'] = df_en['human_answer_links_en'].apply(extract_links_from_human)
df_en['chatbot_links_context_en'] = df_en['chatbot_context_en'].apply(extract_links_from_chatbot)
df_en['human_links_used'] = df_en.apply(
    lambda row: compare_links(row['human_answer_links_list_en'], row['chatbot_links_context_en']),
    axis=1
)
df_en['partial_overlap_percentage'] = df_en.apply(
    lambda row: partial_overlap_links(row['human_answer_links_list_en'], row['chatbot_links_context_en']),
    axis=1
)

# Process datasets for German
df_de['human_answer_links_list_de'] = df_de['human_answer_links_de'].apply(extract_links_from_human)
df_de['chatbot_links_context_de'] = df_de['chatbot_context_de'].apply(extract_links_from_chatbot)
df_de['human_links_used'] = df_de.apply(
    lambda row: compare_links(row['human_answer_links_list_de'], row['chatbot_links_context_de']),
    axis=1
)
df_de['partial_overlap_percentage'] = df_de.apply(
    lambda row: partial_overlap_links(row['human_answer_links_list_de'], row['chatbot_links_context_de']),
    axis=1
)

# Calculate and print the percentage of all human links used per dataset
percentage_all_en = (df_en['human_links_used'].mean() * 100) if not df_en.empty else 0
percentage_all_de = (df_de['human_links_used'].mean() * 100) if not df_de.empty else 0

print(f"\nPercentage of all human links used in the English dataset: {percentage_all_en:.2f}%")
print(f"Percentage of all human links used in the German dataset: {percentage_all_de:.2f}%")

# Calculate and print the average percentage of partial overlaps
avg_partial_overlap_en = df_en['partial_overlap_percentage'].mean() * 100 if not df_en.empty else 0
avg_partial_overlap_de = df_de['partial_overlap_percentage'].mean() * 100 if not df_de.empty else 0

print(f"\nAverage percentage of human links partially used by the chatbot in the English dataset: {avg_partial_overlap_en:.2f}%")
print(f"Average percentage of human links partially used by the chatbot in the German dataset: {avg_partial_overlap_de:.2f}%")

# Calculate and print the average percentage excluding no-link rows
def avg_partial_overlap_excluding_no_links(df, human_col, chatbot_col):
    filtered_df = df[(df[human_col].apply(len) > 0) | (df[chatbot_col].apply(len) > 0)]
    return filtered_df['partial_overlap_percentage'].mean() * 100 if not filtered_df.empty else 0

avg_partial_overlap_excl_no_links_en = avg_partial_overlap_excluding_no_links(df_en, 'human_answer_links_list_en', 'chatbot_links_context_en')
avg_partial_overlap_excl_no_links_de = avg_partial_overlap_excluding_no_links(df_de, 'human_answer_links_list_de', 'chatbot_links_context_de')

print(f"\nAverage percentage of human links partially used by the chatbot (excluding no-link rows) in the English dataset: {avg_partial_overlap_excl_no_links_en:.2f}%")
print(f"Average percentage of human links partially used by the chatbot (excluding no-link rows) in the German dataset: {avg_partial_overlap_excl_no_links_de:.2f}%")

# Calculate and print the average number of links used
avg_human_links_en = df_en['human_answer_links_list_en'].apply(len).mean() if not df_en.empty else 0
avg_chatbot_links_en = df_en['chatbot_links_context_en'].apply(len).mean() if not df_en.empty else 0
avg_human_links_de = df_de['human_answer_links_list_de'].apply(len).mean() if not df_de.empty else 0
avg_chatbot_links_de = df_de['chatbot_links_context_de'].apply(len).mean() if not df_de.empty else 0

print(f"\nAverage number of links used in the English dataset:\n  Human: {avg_human_links_en:.2f}\n  Chatbot: {avg_chatbot_links_en:.2f}")
print(f"Average number of links used in the German dataset:\n  Human: {avg_human_links_de:.2f}\n  Chatbot: {avg_chatbot_links_de:.2f}")

# Percentage of times the chatbot used no context links
no_links_chatbot_en_percentage = (df_en['chatbot_links_context_en'].apply(len) == 0).mean() * 100
no_links_chatbot_de_percentage = (df_de['chatbot_links_context_de'].apply(len) == 0).mean() * 100
print(f"\nPercentage of times the chatbot used no context links:\n  English: {no_links_chatbot_en_percentage:.2f}%\n  German: {no_links_chatbot_de_percentage:.2f}%")

# Percentage of times the human used no context links
no_links_human_en_percentage = (df_en['human_answer_links_list_en'].apply(len) == 0).mean() * 100
no_links_human_de_percentage = (df_de['human_answer_links_list_de'].apply(len) == 0).mean() * 100
print(f"\nPercentage of times the human used no context links:\n  English: {no_links_human_en_percentage:.2f}%\n  German: {no_links_human_de_percentage:.2f}%")

# Print results in a readable format
print("\nEnglish Dataset Results:")
for index, row in df_en.iterrows():
    print(f"Row {index + 1} - Human Links: {row['human_answer_links_list_en']}\nChatbot Links: {row['chatbot_links_context_en']}\nHuman Links Used: {row['human_links_used']}\nPartial Overlap Percentage: {row['partial_overlap_percentage']:.2f}%\n")

print("\nGerman Dataset Results:")
for index, row in df_de.iterrows():
    print(f"Row {index + 1} - Human Links: {row['human_answer_links_list_de']}\nChatbot Links: {row['chatbot_links_context_de']}\nHuman Links Used: {row['human_links_used']}\nPartial Overlap Percentage: {row['partial_overlap_percentage']:.2f}%\n")

# Save results
output_english = "../../testing/english_comparison_results.csv"
output_german = "../../testing/german_comparison_results.csv"

df_en.to_csv(output_english, index=False)
df_de.to_csv(output_german, index=False)

print(f"Results saved to {output_english} and {output_german}")



Percentage of all human links used in the English dataset: 15.15%
Percentage of all human links used in the German dataset: 18.18%

Average percentage of human links partially used by the chatbot in the English dataset: 26.11%
Average percentage of human links partially used by the chatbot in the German dataset: 25.86%

Average percentage of human links partially used by the chatbot (excluding no-link rows) in the English dataset: 21.34%
Average percentage of human links partially used by the chatbot (excluding no-link rows) in the German dataset: 18.44%

Average number of links used in the English dataset:
  Human: 1.97
  Chatbot: 2.64
Average number of links used in the German dataset:
  Human: 1.97
  Chatbot: 2.30

Percentage of times the chatbot used no context links:
  English: 33.33%
  German: 42.42%

Percentage of times the human used no context links:
  English: 15.15%
  German: 15.15%

English Dataset Results:
Row 1 - Human Links: []
Chatbot Links: []
Human Links Used: True
P

In [32]:
df_en['chatbot_links_context_en'][28]

['https://www.uni-osnabrueck.de/fileadmin/documents/public/4_forschung/4.3_nachwuchsfoerderung/zepros/Infothek/buwin-2017.pdf',
 'https://www.psychologie-cms.uni-osnabrueck.de/fileadmin/doc-lehreval/TB9_TextGest16.pdf',
 'https://osnadocs.ub.uni-osnabrueck.de/bitstream/urn:nbn:de:gbv:700-202002192601/6/thesis_mueller.pdf']