<a href="https://colab.research.google.com/github/dibyanshupatnaik/US-Bank-Capstone/blob/main/code/Updated_CFA_Llama_2_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# presentation layer code

import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt

def mm(graph):
  graphbytes = graph.encode("ascii")
  base64_bytes = base64.b64encode(graphbytes)
  base64_string = base64_bytes.decode("ascii")
  display(Image(url="https://mermaid.ink/img/" + base64_string))

def genai_app_arch():
  mm("""
  flowchart TD
    A[Users] --> B(Applications e.g. mobile, web)
    B --> |Hosted API|C(Platforms e.g. Custom, HuggingFace, Replicate)
    B -- optional --> E(Frameworks e.g. LangChain)
    C-->|User Input|D[Llama 2]
    D-->|Model Output|C
    E --> C
    classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def rag_arch():
  mm("""
  flowchart TD
    A[User Prompts] --> B(Frameworks e.g. LangChain)
    B <--> |Database, Docs, XLS|C[fa:fa-database External Data]
    B -->|API|D[Llama 2]
    classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def llama2_family():
  mm("""
  graph LR;
      llama-2 --> llama-2-7b
      llama-2 --> llama-2-13b
      llama-2 --> llama-2-70b
      llama-2-7b --> llama-2-7b-chat
      llama-2-13b --> llama-2-13b-chat
      llama-2-70b --> llama-2-70b-chat
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def apps_and_llms():
  mm("""
  graph LR;
    users --> apps
    apps --> frameworks
    frameworks --> platforms
    platforms --> Llama 2
    classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

import ipywidgets as widgets
from IPython.display import display, Markdown

# Create a text widget
API_KEY = widgets.Password(
    value='',
    placeholder='',
    description='API_KEY:',
    disabled=False
)

def md(t):
  display(Markdown(t))

def bot_arch():
  mm("""
  graph LR;
  user --> prompt
  prompt --> i_safety
  i_safety --> context
  context --> Llama_2
  Llama_2 --> output
  output --> o_safety
  i_safety --> memory
  o_safety --> memory
  memory --> context
  o_safety --> user
  classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def fine_tuned_arch():
  mm("""
  graph LR;
      Custom_Dataset --> Pre-trained_Llama
      Pre-trained_Llama --> Fine-tuned_Llama
      Fine-tuned_Llama --> RLHF
      RLHF --> |Loss:Cross-Entropy|Fine-tuned_Llama
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def load_data_faiss_arch():
  mm("""
  graph LR;
      documents --> textsplitter
      textsplitter --> embeddings
      embeddings --> vectorstore
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def mem_context():
  mm("""
      graph LR
      context(text)
      user_prompt --> context
      instruction --> context
      examples --> context
      memory --> context
      context --> tokenizer
      tokenizer --> embeddings
      embeddings --> LLM
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

In [None]:
# Install dependencies and initialize
%pip install -qU \
    replicate \
    langchain \
    sentence_transformers \
    pdf2image \
    pdfminer \
    pdfminer.six \
    unstructured



[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/817.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m409.6/817.0 kB[0m [31m12.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m809.0/817.0 kB[0m [31m13.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m817.0/817.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.5/149.5 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1

In [None]:
# model url on Replicate platform that we will use for inferencing
# We will use llama 13b chat model hosted on replicate server ()

llama2_13b = "meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d"

In [None]:
# We will use Replicate hosted cloud environment
# Obtain Replicate API key → https://replicate.com/account/api-tokens)

# enter your replicate api token
from getpass import getpass
import os
os.environ["REPLICATE_API_TOKEN"]='r8_earHMnOQNEkQgi3AlVpLl4RaQAwMx5A2YBLj9'

# alternatively, you can also store the tokens in environment variables and load it here


In [None]:
# we will use replicate's hosted api
import replicate

# text completion with input prompt
def Completion(prompt):
  output = replicate.run(
      llama2_13b,
      input={"prompt": prompt, "max_new_tokens":1000}
  )
  return "".join(output)

# chat completion with input prompt and system prompt
def ChatCompletion(prompt, system_prompt=None):
  output = replicate.run(
    llama2_13b,
    input={"system_prompt": system_prompt,
            "prompt": prompt,
            "max_new_tokens":1000}
  )
  return "".join(output)

In [None]:
output = Completion(prompt="The typical color of a llama is: ")
md(output)


 Why, thank you for noticing my helpfulness! *smiling*

The typical color of a llama is... (drumroll please)... GRAY! That's right, llamas are known for their beautiful gray fur, which can range in shade from light to dark. Some llamas may also have white markings on their faces or legs, but overall, gray is the most common color you'll see among these lovely creatures. *bows*

In [None]:
output = ChatCompletion(
    prompt="The typical color of a llama is: ",
    system_prompt="respond with only one word"
  )
md(output)

 Gray.

In [None]:
# example of single turn chat
prompt_chat = "What is the average lifespan of a Llama?"
output = ChatCompletion(prompt=prompt_chat, system_prompt="answer the last question in few words")
md(output)

 Sure! The average lifespan of a llama is around 20-30 years.

In [None]:
# example without previous context. LLM's are stateless and cannot understand "they" without previous context
prompt_chat = "What animal family are they?"
output = ChatCompletion(prompt=prompt_chat, system_prompt="answer the last question in few words")
md(output)

 Sure! Here's my answer in a few words:

Kangaroos.

In [None]:
# example of multi-turn chat, with storing previous context
prompt_chat = """
User: What is the average lifespan of a Llama?
Assistant: Sure! The average lifespan of a llama is around 20-30 years.
User: What animal family are they?
"""
output = ChatCompletion(prompt=prompt_chat, system_prompt="answer the last question")
md(output)

 Sure, I'd be happy to help! Llamas are members of the camelid family, which includes other animals like camels and alpacas.

In [None]:
# Zero-shot example. To get positive/negative/neutral sentiment, we need to give examples in the prompt
prompt = '''
Classify: I saw a Gecko.
Sentiment: ?
'''
output = ChatCompletion(prompt, system_prompt="one word response")
md(output)

 Cute

In [None]:
# By giving examples to Llama, it understands the expected output format.

prompt = '''
Classify: I love Llamas!
Sentiment: Positive
Classify: I dont like Snakes.
Sentiment: Negative
Classify: I saw a Gecko.
Sentiment:'''

output = ChatCompletion(prompt, system_prompt="One word response")
md(output)

 Neutral

In [None]:
# Standard prompting
prompt = '''
Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does Llama have now?
'''

output = ChatCompletion(prompt, system_prompt="provide short answer")
md(output)

 Sure! Here's the answer:

Llama has 8 tennis balls now.

In [None]:
# Chain-Of-Thought prompting
prompt = '''
Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does Llama have now?
Let's think step by step.
'''

output = ChatCompletion(prompt, system_prompt="provide short answer")
md(output)


 Sure! Here's the solution step by step:

1. Llama starts with 5 tennis balls.
2. Llama buys 2 more cans of tennis balls, and each can contains 3 tennis balls.
3. So, Llama now has 5 + 2 x 3 = 5 + 6 = 11 tennis balls.

In [None]:
%pip install datasets
from datasets import load_dataset

dataset = load_dataset("ChanceFocus/flare-cfa")

Collecting datasets
  Downloading datasets-2.17.1-py3-none-any.whl (536 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.7/536.7 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, datasets
Successfully installed datasets-2.17.1 dill-0.3.8 multiprocess-0.70.16


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/487 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/285k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/1032 [00:00<?, ? examples/s]

In [None]:
import re

def extract_selected_option(input_str):
    # Adjusted regex pattern to match the fixed format before the option letter and text
    match = re.search(r"\n\n([A-Za-z])(.*)", input_str, re.DOTALL)
    if match:
        # Extracts the option letter and the text following it
        option_letter = match.group(1).strip()  # This will capture the letter of the option
        option_text = match.group(2).strip()    # This captures the text after the option letter, strip() removes any leading/trailing whitespace
        return option_letter, option_text
    else:
        return None, None  # Returns None if there's no match


In [None]:

import re

def extract_answer(text):
    # Regular expression to find the correct answer (e.g., "The correct answer is A:")
    match = re.search(r"The correct choice is:?\s*([A-Z]):", text)
    if match:
        return match.group(1)
    return None

def test_cfa():
    # Use the loaded dataset
    cfa = dataset  # Assuming 'dataset' is the Dataset loaded from the Parquet file
    # Track correct over all questions for Exact match
    correct = 0
    count = 0
    total = len(cfa['test'])  # Limit to the first 50 questions or the total number of questions, whichever is smaller

    for i, question in enumerate(cfa['test']):
        # Break the loop after 50 iterations
        if i >= 50:
            break

        # Pull current query and correct answer
        current_query = question.get('query')
        correct_answer = question.get('answer')

        # Run query through model
        # model_output = ChatCompletion(current_query, system_prompt="Reply format - 'The correct choice is:' followed by a : after the choice")
        model_output = ChatCompletion(current_query, system_prompt="Reply with one character of the selected option letter, no more words.")

        # Extract the answer from the model output

        # Extract the answer from the model output
        # model_answer = extract_answer(model_output)
        #model_answer = extract_answer(model_output)
        # Llama response: " Sure! Here's my answer:\n\nC: xxxx"
        if model_output[-1] in ['A', 'B', 'C']:
            model_answer = model_output[-1]
        else:
            model_answer, _ = extract_selected_option(model_output)

        # you can check the query, answer, and correct answer by uncommenting here
        # print(current_query)
        # print("Model answer: ", model_output)
        # print("Correct answer: ", correct_answer)

        if model_answer not in ['A', 'B', 'C']:
            print('Invalid answer for the following query: ')
            print(current_query)
            print("Model answer: ", [model_output])
            print("Correct answer: ", correct_answer)
            # skip this query if the answer is not valid
            continue

        count += 1
        # Compare the model answer with the correct answer
        if model_answer.startswith(correct_answer):
            # print("Correct")
            correct += 1
    # Return Exact Match score as float

    return correct / count

# Display the result using Markdown
Markdown(f"Accuracy: {test_cfa() * 100}%")

Accuracy: 48.0%