In [1]:
# ## Evaluation of Deployed Model Performance

import time
import pandas as pd
from transformers import BartTokenizer
import wandb
import os
import sys
from dotenv import load_dotenv

sys.path.append("../src")
from inference import HuggingFaceEndpoint
load_dotenv()


ENDPOINT_URL = os.getenv("MODEL_URL")  
HF_TOKEN = os.getenv("HF_TOKEN")

client = HuggingFaceEndpoint(endpoint_url=ENDPOINT_URL, hf_token=HF_TOKEN)
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")


  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:

test_inputs = [
    "Amanda: I baked cookies. Do you want some?\nJerry: Sure!\nAmanda: I'll bring you tomorrow :-)",
    "Olivia: Who are you voting for in this election?\nOliver: Liberals as always.\nOlivia: Me too!!\nOliver: Great",
    """Tim: Hi, what's up?
Kim: Bad mood tbh, I was going to do lots of stuff but ended up procrastinating
Tim: What did you plan on doing?
Kim: Oh you know, uni stuff and unfucking my room
Kim: Maybe tomorrow I'll move my ass and do everything
Kim: We were going to defrost a fridge so instead of shopping I'll eat some defrosted veggies
Tim: For doing stuff I recommend Pomodoro technique where u use breaks for doing chores
Tim: It really helps
Kim: thanks, maybe I'll do that
Tim: I also like using post-its in kaban style""",
    """Edward: Rachel, I think I'm in ove with Bella..
rachel: Dont say anything else..
Edward: What do you mean??
rachel: Open your fu**ing door.. I'm outside""",
    """Sam: hey overheard rick say something
Sam: i don't know what to do :-/
Naomi: what did he say??
Sam: he was talking on the phone with someone
Sam: i don't know who
Sam: and he was telling them that he wasn't very happy here
Naomi: damn!!!
Sam: he was saying he doesn't like being my roommate
Naomi: wow, how do you feel about it?
Sam: i thought i was a good rommate
Sam: and that we have a nice place
Naomi: that's true man!!!
Naomi: i used to love living with you before i moved in with me boyfriend
Naomi: i don't know why he's saying that
Sam: what should i do???
Naomi: honestly if it's bothering you that much you should talk to him
Naomi: see what's going on
Sam: i don't want to get in any kind of confrontation though
Sam: maybe i'll just let it go
Sam: and see how it goes in the future
Naomi: it's your choice sam
Naomi: if i were you i would just talk to him and clear the air""",
    """Neville: Hi there, does anyone remember what date I got married on?
Don: Are you serious?
Neville: Dead serious. We're on vacation, and Tina's mad at me about something. I have a strange suspicion that this might have something to do with our wedding anniversary, but I have nowhere to check.
Wyatt: Hang on, I'll ask my wife.
Don: Haha, someone's in a lot of trouble :D
Wyatt: September 17. I hope you remember the year ;)""",
    """John: Ave. Was there any homework for tomorrow?
Cassandra: hello :D Of course, as always :D
John: What exactly?
Cassandra: I'm not sure so I'll check it for you in 20minutes.
John: Cool, thanks. Sorry I couldn't be there, but I was busy as fuck...my stupid boss as always was trying to piss me off
Cassandra: No problem, what did he do this time?
John: Nothing special, just the same as always, treating us like children, commanding to do this and that...
Cassandra: sorry to hear that. but why don't you just go to your chief and tell him everything?
John: I would, but I don't have any support from others, they are like goddamn pupets and pretend that everything's fine...I'm not gonna fix everything for everyone
Cassandra: I understand...Nevertheless, just try to ignore him. I know it might sound ridiculous as fuck, but sometimes there's nothing more you can do.
John: yeah I know...maybe some beer this week?
Cassandra: Sure, but I got some time after classes only...this week is gonna be busy
John: no problem, I can drive you home and we can go to some bar or whatever.
Cassandra: cool. ok, I got this homework. it's page 15 ex. 2 and 3, I also asked the others to study another chapter, especially the vocabulary from the very first pages. Just read it.
John: gosh...I don't know if I'm smart enough to do it :'D
Cassandra: you are, don't worry :P Just circle all the words you don't know and we'll continue on Monday.
John: ok...then I'll try my best :D
Cassandra: sure, if you will have any questions just either text or call me and I'll help you.
John: I hope I won't have to waste your time xD
Cassandra: you're not wasting my time, I'm your teacher, I'm here to help. This is what I get money for, also :P
John: just kidding :D ok, so i guess we'll stay in touch then
Cassandra: sure, have a nice evening :D
John: you too, se ya
Cassandra: Byeeeee""",
    """Sarah: I found a song on youtube and I think you'll like it
James: What song?
Sarah: <file_other>
James: Oh. I know it!
James: I heard it before in some compilation
Sarah: I can't stop playing it over and over
James: That's exactly how I know lyrics to all of the songs on my playlist :D
Sarah: Haha. No lyrics here though. Instrumental ;D
James: Instrumental songs are different kind of music.
James: But you have to remember that the activity you do when you listen to this song
James: Is the actvity your brain will connect to the song
And everytime you play this song at home
You'll be thinking of your work
Sarah: Yeah, I know that. That's why we sometimes say - I used to like that song, but now it just reminds me of bad memories
James: Yup. Everytime you change your partner, you have to get rid of your favorite music :D
Sarah: Hahaha. True, true.""",
    """Noah: When and where are we meeting? :)
Madison: I thought you were busy...?
Noah: Yeah, I WAS. I quit my job.
Madison: No way! :o :o :o Why? I thought you liked it...?
Noah: Well, I used to, until my boss turned into a complete cock... Long story.""",
    """Matt: Do you want to go for date?
Agnes: Wow! You caught me out with this question Matt.
Matt: Why?
Agnes: I simply didn't expect this from you.
Matt: Well, expect the unexpected.
Agnes: Can I think about it?
Matt: What is there to think about?
Agnes: Well, I don't really know you.
Matt: This is the perfect time to get to know eachother
Agnes: Well that's true.
Matt: So let's go to the Georgian restaurant in Kazimierz.
Agnes: Now your convincing me.
Matt: Cool, saturday at 6pm?
Agnes: That's fine.
Agnes: I can pick you up on the way to the restaurant.
Matt: That's really kind of you.
Agnes: See you on saturday.
Matt: Yes, looking forward to it.
Agnes: Me too."""
]



In [3]:

wandb.init(
    project="llm-inference-monitoring",
    name="hf-endpoint-latency-test",
    config={
        "endpoint_cost_per_hour": 0.27,
        "num_requests": len(test_inputs),
        "metric": "latency_ms"
    }
)


[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from C:\Users\PROGRESSIVE\_netrc.


[34m[1mwandb[0m: Currently logged in as: [33mchibu[0m ([33mchibu-babcock-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:

latencies = []
success_count = 0
total_tokens = 0
summaries = []

# Endpoint billing info
endpoint_cost_per_hour = 0.27
seconds_per_hour = 3600
endpoint_cost_per_sec = endpoint_cost_per_hour / seconds_per_hour  # USD per second

for idx, dialogue in enumerate(test_inputs, 1):
    start_time = time.time()
    try:
        summary_text = client.summarize(dialogue)
        success_count += 1

        input_tokens = len(tokenizer(dialogue)["input_ids"])
        output_tokens = len(tokenizer(summary_text)["input_ids"])
        total_tokens += input_tokens + output_tokens

        error = 0

    except Exception as e:
        summary_text = f"Error: {str(e)}"
        error = 1

    finally:
        latency = (time.time() - start_time) * 1000  # ms
        latencies.append(latency)
        summaries.append(summary_text)

        wandb.log({
            "latency_ms": latency,
            "error": error
        })

        wandb.finish()



average_latency = sum(latencies) / len(latencies) if latencies else 0
reliability = (success_count / len(test_inputs) * 100) if test_inputs else 0

# Total cost based on endpoint billing (latency in seconds)
total_cost_seconds = sum(latencies) / 1000 * endpoint_cost_per_sec
# Cost per 1K tokens (latency-based estimate)
cost_per_1k_tokens = (total_cost_seconds / total_tokens * 1000) if total_tokens > 0 else 0


results_df = pd.DataFrame({
    "Dialogue #": range(1, len(test_inputs)+1),
    "Latency (ms)": [round(l, 2) for l in latencies],
    "Summary": summaries
})

print("=== Inference Results ===")
display(results_df)

print("\n=== Performance Summary ===")
print(f"Average Latency (ms): {average_latency:.2f}")
print(f"Response Reliability (%): {reliability:.2f}")
print(f"Total Estimated Cost for 10 Requests ($): {total_cost_seconds:.6f}  Based on $0.27/hr endpoint")
print(f"Estimated Cost per 1K Tokens ($): {cost_per_1k_tokens:.6f}  Latency-based, token-aware")



=== Inference Results ===


Unnamed: 0,Dialogue #,Latency (ms),Summary
0,1,2978.52,Jerry and Amanda baked cookies. Amanda will br...
1,2,2914.32,Oliver is voting for Liberals this election.
2,3,4286.81,Kim is in a bad mood. She was going to do lots...
3,4,3999.49,Edward thinks he's in ove with Bella. Rachel i...
4,5,3624.94,Sam overheard rick saying he doesn't like bein...
5,6,3000.6,Neville and his wife Tina are on vacation and ...
6,7,5368.77,Cassandra will check John's homework for tomor...
7,8,3941.37,Sarah found a song on youtube and James can't ...
8,9,2761.81,Noah quit his job because his boss turned into...
9,10,3227.35,in Kazimierz on Saturday at 6pm. Agnes picks ...



=== Performance Summary ===
Average Latency (ms): 3610.40
Response Reliability (%): 100.00
Total Estimated Cost for 10 Requests ($): 0.002708  Based on $0.27/hr endpoint
Estimated Cost per 1K Tokens ($): 0.001399  Latency-based, token-aware


### Brief Findings
* Latency: ~3.9 per request, reasonable for BART-large inference.
* Reliability: 100%, all requests succeeded.
* Total cost for 10 requests: very low (~$0.0029).
* Token-based estimate: ~$0.0015 per 1K tokens.
* Test setup is small, reproducible, and sufficient for performance demonstration.