In [6]:
from collections import Counter
import numpy as np
import re 

def normalize_answer(s):
    # Lower case, remove punctuation, and whitespaces ### PLEASE CHECK If logic correct / desired ######## 
    return ' '.join(re.sub(r'[^A-Za-z0-9]', ' ', re.sub(r'\b(a|an|the)\b', ' ', s.lower())).split())

def metrics(annotation_answers, rag_answers):
    f1_scores, exact_matches, recall_scores = [], [], []
    
    for pred, truth in zip(annotation_answers, rag_answers):
        pred_tokens, truth_tokens = normalize_answer(pred).split(), normalize_answer(truth).split()
        common_tokens = Counter(pred_tokens) & Counter(truth_tokens)
        num_common = sum(common_tokens.values())
        
        prec = (num_common / len(pred_tokens))
        rec = (num_common / len(truth_tokens)) 

        f1_scores.append( 0 if prec+rec == 0 else (2*(prec*rec)/(prec + rec)))
        exact_matches.append(int(pred_tokens == truth_tokens))
        recall_scores.append(rec if truth_tokens else 0)
    
    return np.mean(f1_scores), np.mean(exact_matches), np.mean(recall_scores)

In [9]:
#MODIFIED
import numpy as np

def metrics(annotation_answers, rag_answers):
    f1_scores, exact_matches, recall_scores = [], [], []
    
    for pred, truth in zip(annotation_answers, rag_answers):
        pred_tokens, truth_tokens = normalize_answer(pred).split(), normalize_answer(truth).split()

        # Avoid division by zero by checking if pred_tokens or truth_tokens are empty
        if len(pred_tokens) == 0 or len(truth_tokens) == 0:
            prec = 0
            rec = 0
        else:
            common_tokens = Counter(pred_tokens) & Counter(truth_tokens)
            num_common = sum(common_tokens.values())
            prec = (num_common / len(pred_tokens))
            rec = (num_common / len(truth_tokens))

        f1_scores.append(0 if prec + rec == 0 else (2 * (prec * rec) / (prec + rec)))
        exact_matches.append(int(pred_tokens == truth_tokens))
        recall_scores.append(rec if truth_tokens else 0)

    return np.mean(f1_scores), np.mean(exact_matches), np.mean(recall_scores)


In [2]:
"""
Citation: 
    The use of chatgpt for the normalize answer and for using the counter function only
"""

'\nCitation: \n    The use of chatgpt for the normalize answer and for using the counter function only\n'

In [4]:
def getFileData(filename):
    f = open(filename, "r")
    data = f.readlines()
    f.close()
    data = [i.strip() for i in data]
    return data

In [28]:
# GPT4All Embeddings + Llama2

refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/system_output_1.txt")

metrics(refrence_answers, system_generated_answers)

(0.23105094889028244, 0.0, 0.17675869722203136)

In [27]:
# all-miniLM-l6-v2 embeddings + LLama2

refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/system_output_2.txt")

metrics(refrence_answers, system_generated_answers)

(0.3569133369324437, 0.0379746835443038, 0.33798993223464463)

In [26]:
# mistral embeddings + LLama2

refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/system_output_3.txt")

metrics(refrence_answers, system_generated_answers)

(0.37367435697686446, 0.02531645569620253, 0.3185538110816663)

In [25]:
# all-miniLM-l6-v2 embeddings + Dragon Mistral

refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/system_output_5.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.04237111803781401, 0.0, 0.04669124425295719)

In [24]:
refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/miniLM_mistral.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.03734369574591945, 0.0, 0.04852857655033772)

In [23]:
refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/colbert.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.4142937759054067, 0.05063291139240506, 0.3685443983028275)

In [32]:
refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/hybrid.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.3522762395816397, 0.06329113924050633, 0.3310342456998603)

In [37]:
refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/rerank.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.2881011370509792, 0.012658227848101266, 0.2710761187019919)

In [15]:
# Mistral embeddings

refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/MistralEmbed.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.3097838381975181, 0.02531645569620253, 0.2442174549551543)

In [16]:
# Reranking embeddings
refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/rerank_system_out.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.2833292586347966, 0.02531645569620253, 0.25814614882449805)

In [19]:
# Mistral embeddings

refrence_answers = getFileData("SubmissionData/test/reference_answers.txt")
system_generated_answers = getFileData("SubmissionData/system_outputs/MistralInstruct8x7.txt")

system_generated_answers = [i for i in system_generated_answers if i != ""]
metrics(refrence_answers, system_generated_answers)

(0.15575757065770926, 0.0, 0.13497957757571014)

In [33]:
f = open("FINALQUESTIONS.txt", "r")
lines = f.readlines()
f.close()

In [36]:
for i in range(len(lines)//20):
    count = 1
    for j in lines[i*20:i*20 + 20]:
        print("{}. {}".format(count, j))
        count += 1

1. What is another name for the vehicle being raced in sweepstakes?

2. What's the course number for large language models methods and application?

3. When will the classes begin in the Fall 2024 semester?

4. In spring 2024, How many units is course 10315?

5. In the TAPLoss paper, what does TAP stand for?

6. What is the purpose of the ACL 60/60 evaluation sets?

7. In summer 2024, What is the last day of Mini-5 classes?

8. What number do all of the Drama classes start with?

9. Carnegie Mellon University is home to how many members of the National Academy of Medicine (NAM)? 

10. What class room was advanced NLP taught last semester?

11. Which LTI faculty member is an author on "Accelerating Diffusion-based Combinatorial Optimization Solvers by Progressive Distillation"?

12. At what conference was "Fully Unsupervised Topic Clustering of Unlabelled Spoken Audio Using Self-Supervised Representation Learning and Topic Model" published?

13. What is the full name of the conference w