### ANALYZE GENERATION
Metrics:
- Time taken to generate index
- Embedded token count

In [None]:
from src.rule_based_model import RuleBasedModel
from src.slide_model import SlideModel
from src.semantic_model import SemanticModel
from src.nner_model import NNERModel

# INITIALIZE MODEL
#model = RuleBasedModel()
#model = SemanticModel()
#model = SlideModel()
model = NNERModel()

#model_name = "rule"
#model_name = "semantic"
#model_name = "slide"
model_name = "nner"

In [None]:
from src.model_analysis import analyze_generation
import pandas as pd

DOC_PATH = "data/docs/Wikipedia/"

# FOR ALL MODELS...
print(f"Testing {model_name}")
results = analyze_generation(
    model=model,
    doc_path=DOC_PATH,
    model_name=model_name
)

# SAVE RESULTS
results_df = pd.DataFrame(results)
results_df.to_csv("results/"+model_name+"_generation_results.csv", index=False)
results_df

### ANALYZE RETRIEVAL
Metrics:
- Time taken to retrieve context (per query, avg over all queries, etc.)
- Quality of Retrieved Context (scored per query, avg over all queries, etc.)

In [1]:
from src.rule_based_model import RuleBasedModel
from src.slide_model import SlideModel
from src.semantic_model import SemanticModel
from src.nner_model import NNERModel

# INITIALIZE MODEL
#model = RuleBasedModel()
#model = SemanticModel()
#model = SlideModel()
model = NNERModel()

#model_name = "rule"
#model_name = "semantic"
#model_name = "slide"
model_name = "nner"

In [2]:
from src.model_analysis import analyze_retrieval
import pandas as pd

# LOAD INDEX FROM FILE
model.load_index(model_name)

QUERY_PATH = "data/Wikipedia_queries.csv"

# FOR ALL MODELS...
print(f"Testing {model_name}")
results = analyze_retrieval(
    model=model,
    model_name=model_name,
    queries_path=QUERY_PATH
)

# SAVE RESULTS
results_df = pd.DataFrame(results)
results_df.to_csv("results/"+model_name+"_retrieval_results.csv", index=False)
results_df

Testing nner
Model Output: 0.0962972342967987
Predicted Location: 8358
Model Output: 0.08347170054912567
Predicted Location: 7245
Model Output: 0.04662247747182846
Predicted Location: 4047
Model Output: 0.2554393410682678
Predicted Location: 22171
Model Output: 0.03373712673783302
Predicted Location: 2928
Model Output: 0.10984930396080017
Predicted Location: 9534
Model Output: 0.02521708235144615
Predicted Location: 2189
Model Output: 0.053985077887773514
Predicted Location: 4686
Model Output: 0.030847670510411263
Predicted Location: 2677
Model Output: 0.06172872707247734
Predicted Location: 5358
Model Output: 0.42061594128608704
Predicted Location: 36507
Model Output: 0.1892017275094986
Predicted Location: 16422
Model Output: 0.1275513619184494
Predicted Location: 11071
Model Output: 0.15507470071315765
Predicted Location: 13460
Model Output: 0.18617695569992065
Predicted Location: 16159
Model Output: 0.046706780791282654
Predicted Location: 4054
Model Output: 0.15298400819301605
Pred

Unnamed: 0,query_time,query,answer,response_0
0,0.520981,What is an AI accelerator?,"An AI accelerator, also referred to as deep le...",hyperparameter called the receptive field of t...
1,0.445098,Where are AI accelerators often used?,"AI accelerators are used in various devices, i...",many layers can be quickly trained on GPU by s...
2,1.138172,How have GPUs been utilized for AI acceleration?,"Graphics processing units or GPUs, that are sp...",of large language models developed by Anthropi...
3,0.609225,What has been the progress of AI accelerator t...,"In the 1990s, attempts were made to create hig...",data. They are an efficient way of performing ...
4,0.586924,What is the difference between CPUs and AI acc...,While CPUs are used for running AI workloads a...,BERT's state-of-the-art performance on these n...
...,...,...,...,...
105,0.392378,What is the Unified Modeling Language (UML)?,The Unified Modeling Language (UML) is a gener...,simulate artificial synapses. Examples include...
106,0.379270,What was the motivation behind the creation of...,The creation of UML was primarily motivated by...,the maximum number of training generations has...
107,0.430935,When and by whom was UML developed?,UML was developed at Rational Software in 1994...,many of them together in a network can perform...
108,0.404046,Who manages UML and when was it adopted as a s...,UML was adopted as a standard by the Object Ma...,(activation 1.0) through strong positive bias ...


In [10]:
import numpy as np
import pandas as pd

#model_name = "rule"
#model_name = "semantic"
#model_name = "slide"
model_name = "nner"

results_df = pd.read_csv("results/"+model_name+"_generation_results.csv")
print(results_df["generation_time"])
print(results_df["emb_token_count"])

0    3610.46143
Name: generation_time, dtype: float64
0    60427
Name: emb_token_count, dtype: int64


In [ ]:
# AVG QUERY TIMES
# rule - 0.17476807363643523
# semantic - 0.6432608154536203
# slide - 0.4724323227273999
# nner - 0.4823404518167742

# TOKENS EMBEDDED
# rule - 155331
# semantic - 124026
# slide - 122817
# nner - 60427

# TIME TO GENERATE
# rule - 5.029715
# semantic - 3347.915188
# slide - 19.734072
# nner - 3610.46143

# QUALITY RESPONSE COUNT
# rule -
# semantic - 
# slide - 
# nner - 

In [2]:
from src.model_analysis import response_quality_analysis
import pandas as pd

#model_name = "rule"
#model_name = "semantic"
#model_name = "slide"
model_name = "nner"

results = response_quality_analysis("results/"+model_name+"_retrieval_results.csv")

# SAVE RESULTS
results_df = pd.DataFrame(results)
results_df.to_csv("results/"+model_name+"_resp_quality_results.csv", index=False)
results_df

Responses: 110it [02:44,  1.49s/it]


Unnamed: 0,0
0,(D) The Retrieved Context contains none of the...
1,(C) The Retrieved Context contains almost none...
2,(D) The Retrieved Context contains none of the...
3,(D) The Retrieved Context contains none of the...
4,(D) The Retrieved Context contains none of the...
...,...
105,(D) The Retrieved Context contains none of the...
106,(D) The Retrieved Context contains none of the...
107,(D) The Retrieved Context contains none of the...
108,(D) The Retrieved Context contains none of the...


In [4]:
import pandas as pd

#model_name = "rule"
#model_name = "semantic"
#model_name = "slide"
model_name = "nner"

results_df = pd.read_csv("results/"+model_name+"_resp_quality_results.csv")

class_counts = {
    "A": 0, 
    "B": 0,
    "C": 0,
    "D": 0,
    "E": 0,
    "TOTAL": 0
}

for i, row in results_df.iterrows():
    if row[0].startswith("(A)"):
        class_counts["A"] += 1
    if row[0].startswith("(B)"):
        class_counts["B"] += 1
    if row[0].startswith("(C)"):
        class_counts["C"] += 1
    if row[0].startswith("(D)"):
        class_counts["D"] += 1
    if row[0].startswith("(E)"):
        class_counts["E"] += 1
    class_counts["TOTAL"] += 1

class_counts

  if row[0].startswith("(A)"):
  if row[0].startswith("(B)"):
  if row[0].startswith("(C)"):
  if row[0].startswith("(D)"):
  if row[0].startswith("(E)"):


{'A': 4, 'B': 3, 'C': 8, 'D': 94, 'E': 1, 'TOTAL': 110}