In [1]:
# !uvicorn causal.text_generation_api:app --reload --host 127.0.0.1 --port 8889

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
import pandas as pd
from datasets import load_from_disk
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np
import requests
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from langchain import PromptTemplate
from utils.evaluate import calculate_cosine_similarity
from tqdm import tqdm
import psutil
import subprocess
import signal
import re

  from .autonotebook import tqdm as notebook_tqdm


Check if the api is running

In [2]:
def find_uvicorn_processes():
    uvicorn_processes = []
    for process in psutil.process_iter(['pid', 'name', 'cmdline']):
        if 'uvicorn' in process.info['name'].lower() or \
                ('uvicorn' in process.info['cmdline'] if process.info['cmdline'] else False):
            uvicorn_processes.append(process.info)
 
    return uvicorn_processes
 
def kill_process_by_pid(pid):
    try:
        process = psutil.Process(pid)
        process.terminate()  # Alternatively, use process.kill() for a forceful termination
        print(f"Process {pid} has been terminated.")
    except psutil.NoSuchProcess:
        print(f"No process found with PID {pid}.")

uvicorn_processes = find_uvicorn_processes()

if uvicorn_processes:
    print("The follwoing is ruuning: ")
    for process in uvicorn_processes:
        print(f"PID: {process['pid']}, Name: {process['name']}, Command: {' '.join(process['cmdline'])}")
else:
    print("Uvicore unfound")

The follwoing is ruuning: 
PID: 24290, Name: uvicorn, Command: /home/llama/Sandbox_IA/env/bin/python /home/llama/Sandbox_IA/env/bin/uvicorn app.main:app --reload --port 8001
PID: 1261240, Name: python, Command: /home/llama/Personal_Directories/srb/causalEnv/bin/python -m uvicorn causal.text_generation_api_llama2_7B:app --reload --host 127.0.0.1 --port 8899


Open the server using uvicorn

In [5]:
directory = "causal"
module = "text_generation_api_llama2_13B"
model = "LLama_13B"
host = "127.0.0.1"
port = 8890
quantization = "4bit"
python_executable = "~/Personal_Directories/srb/causalEnv/bin/python"
test_num = 300

In [6]:
import subprocess
from concurrent.futures import ThreadPoolExecutor
 
# %nohup run_uvicorn_command(directory, your_module, host,port)
uvicorn_command = f"{python_executable} -m uvicorn {directory}.{module}:app --reload --host {host} --port {port}"
subprocess.Popen(uvicorn_command, shell=True)

<Popen: returncode: None args: '~/Personal_Directories/srb/causalEnv/bin/pyt...>

INFO:     Will watch for changes in these directories: ['/home/llama/Personal_Directories/srb/causalllm-main']
INFO:     Uvicorn running on http://127.0.0.1:8890 (Press CTRL+C to quit)
INFO:     Started reloader process [1262030] using StatReload
Process SpawnProcess-1:
Traceback (most recent call last):
  File "/usr/lib64/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib64/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/llama/Personal_Directories/srb/causalEnv/lib64/python3.9/site-packages/uvicorn/_subprocess.py", line 78, in subprocess_started
    target(sockets=sockets)
  File "/home/llama/Personal_Directories/srb/causalEnv/lib64/python3.9/site-packages/uvicorn/server.py", line 65, in run
    return asyncio.run(self.serve(sockets=sockets))
  File "/usr/lib64/python3.9/asyncio/runners.py", line 44, in run
    return loop.run_until_complete(main)
  File "/usr/lib64/python3.

In [6]:
def extract_answer(input_string):
    match = re.search(r'<Answer>(.*?)<\/Answer>', input_string)
    if match:
        answer = match.group(1)
        return answer, True
    else:
        return input_string, False  

Prepare dataset

In [7]:
pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 20)
pd.set_option("max_colwidth", 1000)

local_dataset_path = "resource/data/databricks-dolly-15k"
dataset = load_from_disk(local_dataset_path)
df = dataset.to_pandas()
test_df = df[df["category"] == "closed_qa"]
test = test_df.head(test_num)

In [8]:
url = "http://127.0.0.1:8899/generate-text"
system_prompt = """
Please strictly answer my question, but format it using custom tags like <Answer>Your answer here<Answer>. If the question cannot be answered, just answer with "I don't know". """
template = """
<s>[INST] <<SYS>>
{system_prompt}
<</SYS>>

Context: {context_message}
Question: {question_message}

[/INST]

"""

# template = """
# Instruction:{system_prompt}
# Context: {context_message}
# Question: {question_message}
# """
prompt_template = PromptTemplate.from_template(template)

columns = [
    "temperature",
    "model",
    "ground_truth_response",
    "prompt"
]
result_df = pd.DataFrame(columns=columns)

# build the preprocessed csv files
for index, row in test.iterrows():
    question = row["instruction"]
    context = row["context"]
    response = row["response"]
    prompt = prompt_template.format(
        system_prompt=system_prompt, context_message=context, question_message = question
    )

    for temperature in np.arange(0.1, 2.0, 0.3):
        data = {
            "temperature": temperature,
            "model": model,
            "ground_truth_response": response,
            "prompt": str(prompt),
        }
        result_df = pd.concat([result_df, pd.DataFrame([data])], ignore_index=True)


result_df.to_excel(f"{module}_{quantization}_{test_num}.xlsx")

  result_df = pd.concat([result_df, pd.DataFrame([data])], ignore_index=True)


In [10]:
result_df = pd.read_excel(f"{module}_{quantization}_{test_num}.xlsx")
result_df["temperature"] = result_df["temperature"].round(2)

In [11]:
from utils.huggingfaceUtil import generate_response_hf

In [12]:
from utils.huggingfaceUtil import generate_response_hf
from datetime import datetime
from tqdm import tqdm
import pandas as pd
import time  # Import the time module
 
csv_filename = f"{module}_{quantization}_{test_num}.csv"

pbar = tqdm(total=len(result_df))

responses = []

similarities = []
 
for index, row in result_df.iterrows():
    start_time = time.time()  # Record the start time
    response = generate_response_hf(row)
    end_time = time.time()  # Record the end time
    elapsed_time = end_time - start_time  # Calculate the elapsed time
    updated_row = row.copy()
    response_str = response[0]["generated_text"]
    updated_row["full answer"] = response_str
    answer, label = extract_answer(response_str)
    updated_row["generated_response"] = answer
    updated_row["label"] = label
    updated_row["elapsed_time"] = elapsed_time  # Add elapsed time to the row
    # Add the timestamp to the updated_row
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    updated_row["timestamp"] = timestamp
    updated_dataframe = pd.DataFrame([updated_row])
    updated_dataframe.to_csv(csv_filename, index=False, mode="a", header=not index)
    pbar.update(1)
    
pbar.close()




0.1


  0%|          | 1/2100 [00:14<8:35:57, 14.75s/it]

INFO:     127.0.0.1:52494 - "POST /generate-text HTTP/1.1" 200 OK
0.4


  0%|          | 2/2100 [00:16<3:59:36,  6.85s/it]

INFO:     127.0.0.1:32872 - "POST /generate-text HTTP/1.1" 200 OK
0.7


  0%|          | 3/2100 [00:17<2:27:22,  4.22s/it]

INFO:     127.0.0.1:32878 - "POST /generate-text HTTP/1.1" 200 OK
1.0


  0%|          | 4/2100 [00:18<1:43:56,  2.98s/it]

INFO:     127.0.0.1:32892 - "POST /generate-text HTTP/1.1" 200 OK
1.3


  0%|          | 5/2100 [00:19<1:19:56,  2.29s/it]

INFO:     127.0.0.1:32894 - "POST /generate-text HTTP/1.1" 200 OK
1.6


  0%|          | 6/2100 [00:20<1:08:31,  1.96s/it]

INFO:     127.0.0.1:32910 - "POST /generate-text HTTP/1.1" 200 OK
1.9


  0%|          | 7/2100 [00:21<1:00:45,  1.74s/it]

INFO:     127.0.0.1:32912 - "POST /generate-text HTTP/1.1" 200 OK
0.1


  0%|          | 8/2100 [00:23<53:34,  1.54s/it]  

INFO:     127.0.0.1:37186 - "POST /generate-text HTTP/1.1" 200 OK
0.4


  0%|          | 9/2100 [00:24<48:49,  1.40s/it]

INFO:     127.0.0.1:37192 - "POST /generate-text HTTP/1.1" 200 OK
0.7


  0%|          | 10/2100 [00:25<45:26,  1.30s/it]

INFO:     127.0.0.1:37206 - "POST /generate-text HTTP/1.1" 200 OK
1.0


  1%|          | 11/2100 [00:26<43:11,  1.24s/it]

INFO:     127.0.0.1:37214 - "POST /generate-text HTTP/1.1" 200 OK
1.3


  1%|          | 12/2100 [00:27<42:23,  1.22s/it]

INFO:     127.0.0.1:37228 - "POST /generate-text HTTP/1.1" 200 OK
1.6


  1%|          | 13/2100 [00:28<41:01,  1.18s/it]

INFO:     127.0.0.1:37234 - "POST /generate-text HTTP/1.1" 200 OK
1.9


  1%|          | 14/2100 [00:30<46:02,  1.32s/it]

INFO:     127.0.0.1:37240 - "POST /generate-text HTTP/1.1" 200 OK
0.1


  1%|          | 15/2100 [00:36<1:40:06,  2.88s/it]

INFO:     127.0.0.1:37250 - "POST /generate-text HTTP/1.1" 200 OK
0.4


  1%|          | 16/2100 [00:43<2:17:47,  3.97s/it]

INFO:     127.0.0.1:59130 - "POST /generate-text HTTP/1.1" 200 OK
0.7


  1%|          | 17/2100 [00:51<3:01:56,  5.24s/it]

INFO:     127.0.0.1:37616 - "POST /generate-text HTTP/1.1" 200 OK
1.0


  1%|          | 18/2100 [00:58<3:24:50,  5.90s/it]

INFO:     127.0.0.1:37974 - "POST /generate-text HTTP/1.1" 200 OK
1.3
