In [1]:
import os
import pandas as pd
from groq import Groq
from itertools import product
from time import sleep

In [2]:
client = Groq(
				# base_url="https://api.groq.com/openai/v1",
				api_key=os.getenv('GROQ_API_KEY')
				)

In [3]:
all_models = ['gemma2-9b-it', 'llama3-8b-8192', 'mixtral-8x7b-32768', 'llama-3.1-8b-instant', 'deepseek-r1-distill-llama-70b', 'llama3-70b-8192']

Tirando a média de todas as temperaturas dos modelos temos uma temperatura de 0.4975 que vamos aproximar para 5

In [4]:
def process(model_type, messages):
	response = client.chat.completions.create(
											model=model_type,
											messages=messages,
											temperature=0.5
										)
	return response.choices[0].message.content

In [5]:
base_path_babi = '/Users/guisalesfer/CTM_implementation/tasks/babi-tasks'
base_path_rocstories = '/Users/guisalesfer/CTM_implementation/tasks/RocStories'

def babi_eval():
    os.makedirs(os.path.join(base_path_babi, 'results_llm'), exist_ok=True)
    for model, task in list(product(all_models, os.listdir(os.path.join(base_path_babi, 'results_ctm')))):
        os.makedirs(os.path.join(base_path_babi, 'results_llm', model), exist_ok=True)
        test_df = pd.read_csv(os.path.join(base_path_babi, 'results_ctm', task)).dropna()[['question', 'y']]
        messages = []
        messages.append({
            "role": "system",
            "content": "You will receive a text that provides context for a logical problem. You must solve the problem and return the answer. The answer need to be most straight as possible and do not add text after o before the answer."
        })
        for i, row in test_df.iterrows():
            messages.append({
                "role": "user",
                "content": row['question']
            })
            response = process(model, messages)
            messages.append({
                "role": "system",
                "content": response
            })
            test_df.loc[i, 'y_hat'] = response
            sleep(2)
        sleep(15)
        test_df.to_csv(os.path.join(base_path_babi, 'results_llm', model, f'{task}.csv'), index=False)

In [30]:
babi_eval()

In [6]:
def rocstories_eval():
    os.makedirs(os.path.join(base_path_rocstories, 'results_llm'), exist_ok=True)
    tasks = pd.read_csv(os.path.join(base_path_rocstories, 'results_ctm', 'rocstories_result.csv'))
    tasks['input'] = tasks.apply(lambda x: f"{x['context']}\n{x['question']}", axis=1)
    for model in all_models:
        final_df = pd.DataFrame(columns = ['task', 'input', 'y', 'y_hat'])
        os.makedirs(os.path.join(base_path_rocstories, 'results_llm', model), exist_ok=True)
        for i, task in tasks.iterrows():
            messages = []
            messages.append({
                "role": "system",
                "content": "You will be provided with a sequence of story fragments that together form a coherent narrative. At certain point, you will be presented with a question offering two possible endings to the story. Your task is to analyze both options and determine which ending best aligns with the logical flow, thematic consistency, and causal coherence of the preceding story. Return only the number of the correct ending."
            })

            messages.append({
                "role": "user",
                "content": task['input']
            })
            response = process(model, messages)
            final_df.loc[len(final_df)] = {
                                            'task': task['input'], 
                                            'input': task['input'], 
                                            'y': task['y'], 
                                            'y_hat': response
                                        }
        final_df.to_csv(os.path.join(base_path_rocstories, 'results_llm', model, 'rocstories_result.csv'), index=False)

In [7]:
rocstories_eval()

### Quality check

In [2]:
import pandas as pd
df = pd.read_csv('/Users/guisalesfer/CTM_implementation/bibliography and literature review/Quality Assessment.csv')

In [7]:
df.replace({1:'Yes', 2:'Partially', 3:'Not Aplicable', 4:'No'}, inplace=True)

In [9]:
df.to_csv('/Users/guisalesfer/CTM_implementation/bibliography and literature review/Quality Assessment.csv', index=False)

In [12]:
df.columns

Index(['Artigo',
       'Os componentes específicos da arquitetura de Blum dedicados ao raciocínio lógico estão claramente identificados?',
       'Os resultados demonstram capacidade de resolver problemas de lógica de diferentes níveis de complexidade?',
       'Foi realizada alguma análise de custo computacional ou eficiência do modelo?',
       'Os resultados demonstram consistência em diferentes subconjuntos do dataset?',
       'A metodologia está claramente descrita e é reproduzível?',
       'Há descrição detalhada dos datasets utilizados, incluindo possíveis vieses?'],
      dtype='object')

In [22]:
df.index

RangeIndex(start=0, stop=17, step=1)

In [21]:
df.apply(lambda x: print(f"\n {x['Artigo']} & {x['Os componentes específicos da arquitetura de Blum dedicados ao raciocínio lógico estão claramente identificados?']} & {x['Os resultados demonstram capacidade de resolver problemas de lógica de diferentes níveis de complexidade?']} & {x['Foi realizada alguma análise de custo computacional ou eficiência do modelo?']} & {x['Os resultados demonstram consistência em diferentes subconjuntos do dataset?']} & {x['A metodologia está claramente descrita e é reproduzível?']} & {x['Há descrição detalhada dos datasets utilizados, incluindo possíveis vieses?']} \\\ \n\hline"), axis=1)


 A Theoretical Computer Science Perspective on Consciousness  & Yes & Partially & Not Aplicable & Not Aplicable & Yes & Not Aplicable \\ 
\hline

 A Theoretical Computer Science Perspective on Consciousness and Artificial General Intelligence  & Yes & Partially & Not Aplicable & Not Aplicable & Yes & Not Aplicable \\ 
\hline

 A Theoretical Computer Science Perspective on Free Will & Yes & Partially & Not Aplicable & Not Aplicable & Yes & Not Aplicable \\ 
\hline

 A blueprint for conscious machines & No & Not Aplicable & No & Not Aplicable & Partially & Not Aplicable \\ 
\hline

 A cognitive theory of consciousness  & Yes & Yes & Not Aplicable & Not Aplicable & Yes & Not Aplicable \\ 
\hline

 A theory of consciousness from a theoretical computer science perspective: Insights from the Conscious Turing Machine & Yes & Yes & Not Aplicable & Not Aplicable & Yes & Not Aplicable \\ 
\hline

 A universal knowledge model and cognitive architectures for prototyping AGI  & Not Aplicable & Par

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
dtype: object