In [None]:
import time

import pandas as pd
import dotenv
import os
import sys
import datetime
sys.path.append('./_01_Simulation/')

import _01_Simulation.LLM_Edge_Usability
import _01_Simulation.LLM_Dynamic_Weights
import _01_Simulation.LLM_MetaModel
import _01_Simulation.LLM_Function_Calling

In [None]:
# load .env file
dotenv.load_dotenv()

In [None]:
# Deactivate deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

# 1. Edge usability

In [None]:
# Params (currently only considers openai models
approaches = ["zeroshot", "fewshot"]

In [None]:
# load evaluation csv
df_evaluation = pd.read_csv(os.path.join(os.getenv('RESOURCES'), 'EvaluationDataset.csv'), sep=";")

In [None]:
df_evaluation.head()

In [None]:
# TODO Remove this statement to evaluate the whole dataset. This will induce extensive costs for API usage.
df_evaluation = df_evaluation.iloc[:2]

In [None]:
# Invoke GPT-3.5 to obtain the predictions for edge-usability

for approach in approaches:
    print(f"Approach: {approach}")
    predictions = []
    for index, row in df_evaluation.iterrows():
        model_output = _01_Simulation.LLM_Edge_Usability.invoke_llm(row["action"], model_type="openai", approach=approach)
        prediction = _01_Simulation.LLM_Edge_Usability.parse_response(model_output)
        predictions.append(prediction)
        print(f"Finished {index+1}/{len(df_evaluation)}")
    
    df_evaluation["prediction"] = predictions
    df_evaluation.to_csv(os.path.join(os.getenv('RESULTS'), f'eval-res-edge-usability-openai-{approach}-{datetime.datetime.now().isoformat()}.csv'), sep=";", index=False)

# 2. Dynamic Edge Weights

In [None]:
# Params (currently only considers openai models
approaches = ["zeroshot", "fewshot"]

In [None]:
# load evaluation csv
df_evaluation = pd.read_csv(os.path.join(os.getenv('RESOURCES'), 'EvaluationDataset-dynamic-edge-weight.csv'), sep=";")

In [None]:
df_evaluation.head()

In [None]:
# TODO Remove this statement to evaluate the whole dataset. This will induce extensive costs for API usage.
df_evaluation = df_evaluation.iloc[:2]

In [None]:
df_evaluation

In [None]:
# Invoke GPT-3.5 to obtain the predictions for dynamic-edge-weights

for approach in approaches:
    print(f"Approach: {approach}")
    predictions = []
    result_types = []
    for index, row in df_evaluation.iterrows():
        model_output, _, result_type = _01_Simulation.LLM_Dynamic_Weights.invoke_llm_chain(row["action"], model_type="openai", approach=approach)
        prediction = _01_Simulation.LLM_Dynamic_Weights.parse_output_weights(model_output)
        predictions.append(prediction)
        result_types.append(result_type)
        print(f"Finished {index+1}/{len(df_evaluation)}")
    
    df_evaluation["prediction"] = predictions
    df_evaluation["result_type"] = result_types
    df_evaluation.to_csv(os.path.join(os.getenv('RESULTS'), f'eval-res-dynamic-openai-{approach}-{datetime.datetime.now().isoformat()}.csv'), sep=";", index=False)

# 3. Functioncalling

In [None]:
# load evaluation csv
df_evaluation = pd.read_csv(os.path.join(os.getenv('RESOURCES'), 'EvaluationDataset-node-incidents.csv'), sep=";")

In [None]:
df_evaluation.head()

In [None]:
# TODO Remove this statement to evaluate the whole dataset. This will induce extensive costs for API usage.
df_evaluation = df_evaluation.iloc[:2]

In [None]:
node_incidents = []
for index, row in df_evaluation.iterrows():
    node_incident = _01_Simulation.LLM_Function_Calling.invoke_llm(row["examples"])
    node_incidents.append(', '.join(node_incident))
    print(f"Finished {index+1}/{len(df_evaluation)}")

df_evaluation["node_incidents"] = node_incidents

df_evaluation.to_csv(os.path.join(os.getenv('RESULTS'), f'eval-res-function-calling-{datetime.datetime.now().isoformat()}.csv'), sep=";", index=False)

# 4. Meta-Model

In [None]:
# load evaluation csv
df_evaluation = pd.read_csv(os.path.join(os.getenv('RESOURCES'), 'EvaluationDataset-metamodel.csv'), sep=";")

In [None]:
df_evaluation.head()

In [None]:
# TODO Remove this statement to evaluate the whole dataset. This will induce extensive costs for API usage.
df_evaluation = df_evaluation.iloc[:2]

In [None]:
output_usabilities = []
output_dynamics = []
output_lengths = []
output_times = []
output_nodes = []
output_nodes_times = []
for index, row in df_evaluation.iterrows():
    output_usability, output_dynamic, output_length, output_time, output_node, output_nodes_time, _ = _01_Simulation.LLM_MetaModel.invoke_llm(row["examples"])
    output_usabilities.append(output_usability)
    output_dynamics.append(output_dynamic)
    output_lengths.append(output_length)
    output_times.append(output_time)
    output_nodes.append(output_node)
    output_nodes_times.append(output_nodes_time)
    print(f"Finished {index+1}/{len(df_evaluation)}")

df_evaluation["output_usability"] = output_usabilities
df_evaluation["output_dynamic"] = output_dynamics
df_evaluation["output_length"] = output_lengths
df_evaluation["output_time"] = output_times
df_evaluation["output_node"] = output_nodes

df_evaluation.to_csv(os.path.join(os.getenv('RESULTS'), f'eval-res-metamodel-{datetime.datetime.now().isoformat()}.csv'), sep=";", index=False)