In [1]:
%pip install -qU python-dotenv langchain langchain-openai loguru


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('preprocessed_data.csv'); df

Unnamed: 0.1,Unnamed: 0,timestamp,edge,bitrate,rtt
0,0,2024-06-07 00:02:35,ba-df,323058.250000,18.34
1,1,2024-06-07 00:07:36,ba-df,385048.800000,17.84
2,2,2024-06-07 00:12:36,ba-df,366947.733333,17.83
3,3,2024-06-07 00:17:36,ba-df,360345.733333,17.86
4,4,2024-06-07 00:22:36,ba-df,331049.733333,17.82
...,...,...,...,...,...
10318,10318,2024-06-11 15:03:36,ba-ce,329547.933333,12.46
10319,10319,2024-06-11 15:08:36,ba-ce,361486.000000,12.49
10320,10320,2024-06-11 15:13:36,ba-ce,365202.066667,12.51
10321,10321,2024-06-11 15:18:35,ba-ce,329374.666667,12.56


In [4]:
df.drop(columns=['Unnamed: 0'], inplace=True)

In [5]:
df['qoe'] = df['bitrate'] / df['rtt']

In [6]:
df['client'] = df['edge'].apply(lambda x: x.split('-')[0])
df['server'] = df['edge'].apply(lambda x: x.split('-')[1])

In [7]:
df.head()

Unnamed: 0,timestamp,edge,bitrate,rtt,qoe,client,server
0,2024-06-07 00:02:35,ba-df,323058.25,18.34,17614.953653,ba,df
1,2024-06-07 00:07:36,ba-df,385048.8,17.84,21583.452915,ba,df
2,2024-06-07 00:12:36,ba-df,366947.733333,17.83,20580.355207,ba,df
3,2024-06-07 00:17:36,ba-df,360345.733333,17.86,20176.132885,ba,df
4,2024-06-07 00:22:36,ba-df,331049.733333,17.82,18577.426113,ba,df


In [8]:
queries = [
    'qual o cliente com a pior qualidade na aplicacao de vídeo streaming?',
    'qual é a melhor estratégia ed troca de servidor para maximizar a qualidade de experiência do cliente X?',
    'qual servidor tem a qualidade de experencia mais consistente?'
]

chain of thought

qual o cliente com a pior qualidade na aplicacao de vídeo streaming?

qual cliente - entao temos que comparar clientes
pior qualidade - buscar a metrica qualidade para cada cliente, e responder indicando o pior.

para cada cliente






In [9]:
# load from .env file

from dotenv import load_dotenv
load_dotenv()

# or hardcode here
# import os
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_API_KEY"] = ''
# os.environ["OPENAI_API_KEY"] = ''


True

In [10]:
resp = df.loc[df['client'] == 'rj']['qoe'].describe()
str(resp)

'count     5125.000000\nmean     19309.673939\nstd      16241.934837\nmin        252.190247\n25%       7516.051966\n50%      11234.509930\n75%      19817.277380\nmax      65011.750260\nName: qoe, dtype: float64'

## Tools

In [11]:
qoe = df.loc[df['server'] == 'pi'][['qoe','client']]
consistency = qoe.groupby('client').var()
consistency

Unnamed: 0_level_0,qoe
client,Unnamed: 1_level_1
ba,1094504.0
rj,1055988.0


In [53]:
from langchain_core.tools import tool

@tool
def list_clients()-> str:
    '''return a list of client names'''
    
    clients = list(df['client'].unique())
    return ", ".join(clients)

@tool
def list_servers()-> str:
    '''return a list of client names'''
    
    servers = list(df['server'].unique())
    return ", ".join(servers)

@tool
def get_quality_by_client(client: str) -> dict:
    '''
    get quality of experience (QoE), measured in kbps/ms, for a given client

    args:
        client(str) - client name
    
    response:
        qoe(dict) - give the QoE for a client by each server available
    '''
    client = client.lower().strip()
    resp = df.loc[df['client'] == client][['qoe', 'server']].groupby('server').mean()
    qoe = resp.to_dict()['qoe']
    return qoe

@tool
def get_consistency_by_server(server: str) -> str:
    '''
    get the consistency of a server. the value is a variance measure
    lower values means better consistency
    
    args:
        server(str) - server name
    response:
        consistency(str) - consistency for a server by each client connected
    '''
    server = server.lower().strip()
    qoe = df.loc[df['server'] == server][['qoe','client']]
    consistency = qoe.groupby('client').var().to_dict()['qoe']
    return f'''
    server: {server}
    clients conected:{consistency}
    '''

# get_quality_by_client.invoke({'client':'rj'})
get_consistency_by_server.invoke({'server': 'pi'})


"\n    server: pi\n    clients conected:{'ba': 1094503.504389175, 'rj': 1055988.485413321}\n    "

In [54]:
tools = [
    list_clients, 
    list_servers, 
    get_quality_by_client, 
    get_consistency_by_server
]

tools_by_name = {
    'list_clients':list_clients,
    'list_servers':list_servers,
    'get_quality_by_client': get_quality_by_client,
    'get_consistency_by_server': get_consistency_by_server
}

In [55]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

llm = ChatOpenAI(model="gpt-4o-mini")

In [56]:
llm_with_tools = llm.bind_tools(tools)

In [57]:
from loguru import logger


def query(query: str, context: list[str] = []):
    context = "\n".join(context)
    query = f'''
    reply using function calls and the context. 
    If you do not have data reply with: I was not trained to solve this problem.

    some context:
    {context}

    query: {query}
    '''
    messages = [HumanMessage(query)]

    ai_msg = llm_with_tools.invoke(query)
    messages.append(ai_msg)

    logger.debug(messages)
    # call tools
    
    tool_calls = ai_msg.tool_calls
    while True:
        for tool_call in tool_calls:
            # logger.debug(tool_call)
            selected_tool = tools_by_name[tool_call['name'].lower()]
            tool_msg = selected_tool.invoke(tool_call)
            logger.debug(tool_call)
            messages.append(tool_msg)
        # call model for final response
        response = llm_with_tools.invoke(messages)

        if len(response.tool_calls) > 0:
            messages.append(response)
            tool_calls = response.tool_calls
            continue
        break

    print(response.content)

In [43]:
query('what is the quality of experience for client RJ')

[32m2024-09-18 21:15:50.069[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mquery[0m:[36m19[0m - [34m[1m[HumanMessage(content='\n    reply using function calls and the context. If you do not have data reply with: I was not trained to solve this problem.\n\n    some context:\n    \n\n    query: what is the quality of experience for client RJ\n    ', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_cePjjYFlHXz89jHJ0XwFkcGY', 'function': {'arguments': '{"client":"RJ"}', 'name': 'get_quality_by_client'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 229, 'total_tokens': 245, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_2d87079ca9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-c992c4aa-699d-4453-8e3f-ab85e4a7dac6-0', tool_calls=[{'name': 'get_quality_by_clien

The quality of experience (QoE) for client RJ is as follows:

- Server CE: 9821.78 kbps/ms
- Server DF: 16570.90 kbps/ms
- Server ES: 44991.91 kbps/ms
- Server PI: 6804.17 kbps/ms


In [44]:
query('who are the clients available?')

[32m2024-09-18 21:15:55.292[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mquery[0m:[36m19[0m - [34m[1m[HumanMessage(content='\n    reply using function calls and the context. If you do not have data reply with: I was not trained to solve this problem.\n\n    some context:\n    \n\n    query: who are the clients available?\n    ', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_5zyAw1ag6YaD3JXfsyOdYiWg', 'function': {'arguments': '{}', 'name': 'list_clients'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 225, 'total_tokens': 235, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_2d87079ca9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-9d45b3ad-06eb-4601-a284-7397df49ee98-0', tool_calls=[{'name': 'list_clients', 'args': {}, 'id': 'call_5zyAw1ag6YaD3JXfsyOd

The available clients are: ba, rj.


In [45]:
query('who is the client with worst quality of experience?')

[32m2024-09-18 21:16:00.105[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mquery[0m:[36m19[0m - [34m[1m[HumanMessage(content='\n    reply using function calls and the context. If you do not have data reply with: I was not trained to solve this problem.\n\n    some context:\n    \n\n    query: who is the client with worst quality of experience?\n    ', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_hoYAjliszHvkc21MOAdt91ck', 'function': {'arguments': '{}', 'name': 'list_clients'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 229, 'total_tokens': 239, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_2d87079ca9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-9284220a-36b9-417f-9b3c-136ba802a1ca-0', tool_calls=[{'name': 'list_clients', 'args': {}, 'id': 'call

The client with the worst quality of experience is "rj," with the following QoE values:

- CE: 9821.78 kbps/ms
- DF: 16570.90 kbps/ms
- ES: 44991.91 kbps/ms
- PI: 6804.17 kbps/ms


In [46]:
query('qual é a melhor estratégia ed troca de servidor para maximizar a qualidade de experiência do cliente RJ?')

[32m2024-09-18 21:16:12.718[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mquery[0m:[36m19[0m - [34m[1m[HumanMessage(content='\n    reply using function calls and the context. If you do not have data reply with: I was not trained to solve this problem.\n\n    some context:\n    \n\n    query: qual é a melhor estratégia ed troca de servidor para maximizar a qualidade de experiência do cliente RJ?\n    ', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_qjUHR46zCSmlzfRjyMRIq08Q', 'function': {'arguments': '{}', 'name': 'list_clients'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 239, 'total_tokens': 249, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_2d87079ca9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-3802975e-bc6c-45e7-afdc-1ca7c8031180-0', tool_ca

A melhor estratégia de troca de servidor para maximizar a qualidade de experiência do cliente RJ pode ser avaliada considerando a qualidade de experiência (QoE) e a consistência dos servidores disponíveis. 

### Qualidade de Experiência (QoE) para o cliente RJ:
- **df**: 16,570.90 kbps/ms
- **pi**: 6,804.17 kbps/ms
- **ce**: 9,821.78 kbps/ms
- **es**: 44,991.91 kbps/ms

### Consistência dos Servidores:
- **df**: Variância para RJ é 1,059,988.49
- **pi**: Variância para RJ é 3,329,997.40
- **ce**: Variância para RJ é 3,329,997.40
- **es**: Variância para RJ é 133,261,099.97

### Análise:
- O servidor **es** oferece a melhor QoE (44,991.91 kbps/ms) para o cliente RJ, mas apresenta uma variância muito alta, indicando menor consistência.
- O servidor **df** tem uma QoE razoável (16,570.90 kbps/ms) e uma variância relativamente baixa, o que sugere uma melhor consistência.
  
### Conclusão:
Uma estratégia eficaz poderia ser priorizar o uso do servidor **df** para garantir uma experiência mai

In [58]:
query('qual a consistencia do servidor PI?')

[32m2024-09-18 21:19:02.788[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mquery[0m:[36m19[0m - [34m[1m[HumanMessage(content='\n    reply using function calls and the context. If you do not have data reply with: I was not trained to solve this problem.\n\n    some context:\n    \n\n    query: qual a consistencia do servidor PI?\n    ', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_5aK8Ib4JlWB0JPoKEx4esaKz', 'function': {'arguments': '{"server":"PI"}', 'name': 'get_consistency_by_server'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 227, 'total_tokens': 244, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_e9627b5346', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-af7995be-88d0-444c-b97e-accda7419bc5-0', tool_calls=[{'name': 'get_consistency_by_server', 

A consistência do servidor PI é medida pela variância dos clientes conectados. Os valores são os seguintes:

- Cliente BA: 1094503.50
- Cliente RJ: 1055988.49

Valores mais baixos indicam melhor consistência.


In [59]:
query('qual o servidor mais consistente?')

[32m2024-09-18 21:19:19.684[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mquery[0m:[36m19[0m - [34m[1m[HumanMessage(content='\n    reply using function calls and the context. If you do not have data reply with: I was not trained to solve this problem.\n\n    some context:\n    \n\n    query: qual o servidor mais consistente?\n    ', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_8pRmZ9LRsnzsX77X48vGv4SK', 'function': {'arguments': '{}', 'name': 'list_servers'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 225, 'total_tokens': 235, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_2d87079ca9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-af1a0ac3-7e07-4c6b-8436-4b983463cc08-0', tool_calls=[{'name': 'list_servers', 'args': {}, 'id': 'call_8pRmZ9LRsnzsX77X4

Para determinar qual servidor é o mais consistente, analisamos os dados de consistência de cada um:

- **Servidor df**: 
  - Clientes conectados: {'ba': 9593719.37, 'rj': 8532851.08}
  
- **Servidor pi**: 
  - Clientes conectados: {'ba': 1094503.50, 'rj': 1055988.49}
  
- **Servidor ce**: 
  - Clientes conectados: {'ba': 4397685.60, 'rj': 3329997.40}
  
- **Servidor es**: 
  - Clientes conectados: {'ba': 59095624.36, 'rj': 133261099.97}

Com base nas informações, o servidor **pi** parece ter os valores mais baixos de variação, indicando que é o mais consistente entre os servidores analisados.


In [60]:
context = [
    '''
    QoE formula is: qoe = bitrate / latency

    how a 10% increase in latency affect qoe?
    is a simple division, so changes to the dividend or divisor part of the formula
    can be represented as multiplying the old value by ((1 + change to dividend) / (1 + change to divisor))

    a = 1 + 0 = 1
    b = 1 + 0.1 = 1.1
    new_qoe = qoe * 1 / (1.1)
    '''
]
resp = query("Se a latˆencia aumentar 20%, como isso afeta a QoE do cliente RJ?", context)

[32m2024-09-18 21:19:43.748[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mquery[0m:[36m19[0m - [34m[1m[HumanMessage(content='\n    reply using function calls and the context. If you do not have data reply with: I was not trained to solve this problem.\n\n    some context:\n    \n    QoE formula is: qoe = bitrate / latency\n\n    how a 10% increase in latency affect qoe?\n    is a simple division, so changes to the dividend or divisor part of the formula\n    can be represented as multiplying the old value by ((1 + change to dividend) / (1 + change to divisor))\n\n    a = 1 + 0 = 1\n    b = 1 + 0.1 = 1.1\n    new_qoe = qoe * 1 / (1.1)\n    \n\n    query: Se a latˆencia aumentar 20%, como isso afeta a QoE do cliente RJ?\n    ', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_ZrWolOEGh1h5cYAIQ4oZSxnb', 'function': {'arguments': '{"client":"RJ"}', 'name': 'get_quality_by_client'}, 'type': 'function'}], 'refusal'

Para calcular como um aumento de 20% na latência afeta a QoE do cliente RJ, precisamos aplicar a fórmula de QoE e considerar a mudança na latência.

A QoE original do cliente RJ com cada servidor é:

- **CE**: 9821.78 kbps / 1 (latência original) = 9821.78
- **DF**: 16570.90 kbps / 1 (latência original) = 16570.90
- **ES**: 44991.91 kbps / 1 (latência original) = 44991.91
- **PI**: 6804.17 kbps / 1 (latência original) = 6804.17

Agora, com um aumento de 20% na latência, o novo divisor será 1.2. A nova QoE será:

- **CE**: 9821.78 / 1.2 = 8184.82 kbps
- **DF**: 16570.90 / 1.2 = 13809.08 kbps
- **ES**: 44991.91 / 1.2 = 37492.42 kbps
- **PI**: 6804.17 / 1.2 = 5669.39 kbps

Portanto, a QoE do cliente RJ seria afetada da seguinte forma com um aumento de 20% na latência:

- **CE**: 8184.82 kbps
- **DF**: 13809.08 kbps
- **ES**: 37492.42 kbps
- **PI**: 5669.39 kbps
