# Compile responses and compute DAT


## dependencies

In [1]:
import sys
sys.path.append('..') 
from scripts import dat
import pandas as pd
import numpy as np
import json
import pandas as pd
import os
import re
import glob
import warnings
warnings.filterwarnings('ignore')


## Compile adherence

## 1. Load data and model

In [2]:
# GloVe model from https://nlp.stanford.edu/projects/glove/
model_dat = dat.Model("../model/glove.840B.300d.txt", "../model/words.txt")

## Load human data

In [3]:
final_human_data = pd.read_csv('../human_data_dat/ai-creativity.csv')
DAT_100k = final_human_data['score']

## Load language models data

In [73]:
# Define a dictionary to store the results of model.dat(words)
results_dict = {'Temperature': [], 'Strategy': [], 'Score': [], 'Model': [], 'Control': [], 'Words': []}

# define counters
counter_gemini = 0
counter_gpt4_turbo = 0
counter_gpt4_0613 = 0
counter_gpt4_1106 = 0
counter_gpt3 = 0
counter_gpt4 = 0
counter_claude = 0
counter_claude21 = 0
counter_claude3 = 0
counter_claude35_sonnet = 0
counter_stablelm = 0
counter_stablelmoasst = 0
counter_red = 0
counter_vicuna = 0
counter_llama3 = 0
counter_llama3_70b = 0
counter_capy = 0

# Loop through each file in the data path
for file in sorted(glob.glob('../machine_data_dat/*.json')):
    # Open the file and load the JSON data
    with open(file, 'r') as f:
        data = json.load(f)
    file = os.path.basename(file)
    # Loop through each key in the JSON data
    for i in data.keys():
        # Split the words into a list
        words = data[i].split()

        # Find the indices of '1.' to '10.'
        indices = [m.start() for m in re.finditer(r'\b(?:[1-9]|10)\.', data[i])]  

        # Extract the words after '1.' to '10.' and store them in a new list
        new_words = []
        for idx in range(len(indices)):
            start_idx = indices[idx] + len(re.match(r'\b(?:[1-9]|10)\.', data[i][indices[idx]:]).group())
            if idx < len(indices) - 1:
                new_words.append(data[i][start_idx:indices[idx + 1]].strip())
            else:
                new_words.append(data[i][start_idx:].strip())

        # if no numbers are found, split by newline or bullet points
        if new_words == []:
            pattern = r'\b(?:[1-9]|10)\.\s*|\n\s*[-*]\s*'
            parts = re.split(pattern, data[i])
            # Filter out empty strings and strip whitespace
            new_words = [part.strip() for part in parts if part.strip()]
        
        if new_words != []:
            words = new_words

        # Split words that are comma-separated, newline-separated, or space-separated in a single string
        if len(words) == 1:
            words = [word for sublist in words for word in re.split(r'[,\n\s]+', sublist) if word]
        
        # Some lists contain an introductory statement to be removed
        if len(words) > 15:
            # Find the index of the colon
            try:
                colon_word = next((word for word in words if ':' in word), None)
                colon_index = words.index(colon_word)
                # Extract the words after the colon
                words = words[colon_index + 1:]
            except:
                print(file, words)
                continue
        # Remove "<nb>)" and "*" items from the list
        if len(words) > 15:
            words = [word for word in words if not re.match(r'\d+\)', word) and not re.match(r'\*', word)]
        if len(words) > 15:
            continue

        # lowercase the words
        words = [word.lower().strip() for word in words]
        
        # Define the strategy based on the file name
        if 'thes' in file:
            strategy = 'Thesaurus'
        elif 'oppo' in file:
            strategy = 'Opposition'
        elif 'ety' in file:
            strategy = 'Etymology'
        elif 'rand' in file:
            strategy = 'Random'
        elif 'none' in file:
            strategy = 'Original instructions'
        elif 'nothing' in file:
            strategy = 'Control'
        else:
            strategy = 'Original instructions'
        # Define the temperature based on the file name
        if 'temp1.5' in file:
            condition = 'High'
        elif 'temp0.5' in file:
            condition = 'Low'
        elif 'temp1.0' in file:
            condition = 'Mid'
        elif 'temp0.7' in file or "temp0.8" in file:
            condition = 'Mid'
        elif 'temp0.2' in file:
            condition = 'Low'
        elif 'temp0.9' in file:
            condition = 'High'
            if 'gemini' in file:
                condition = 'Mid'
        elif 'temp1.2' and 'claude' in file:
            condition = 'High'
        else:
            condition = 'Mid'
        
        # Define the model based on the file name
        if 'sample_gpt4' in file:
            llm = 'GPT-4'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_gpt4 += 1
        elif 'gpt4-0613' in file:
            llm = 'GPT-4-0613'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_gpt4_0613 += 1
        elif 'gpt4-1106' in file:
            llm = 'GPT-4-1106'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_gpt4_1106 += 1
        elif 'turbo' in file:
            llm = 'GPT-4-turbo'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_gpt4_turbo += 1
        elif 'gemini' in file:
            llm = 'GeminiPro1'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_gemini += 1
        elif 'sample_claude_' in file:
            llm = 'Claude'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_claude += 1
        elif 'claude_2-1' in file:
            llm = 'Claude2.1'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_claude21 += 1
        elif 'claude_3' in file:
            llm = 'Claude3'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_claude3 += 1
        elif 'claude3-5-sonnet' in file:
            llm = 'Claude3.5-sonnet'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_claude35_sonnet += 1
        elif 'stablelm' in file:
            llm = 'StableLM'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_stablelm += 1
        elif 'redpajama7B' in file:
            llm='RedPajama'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_red += 1
        elif 'vicuna' in file:
            llm = 'Vicuna'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_vicuna += 1
        elif 'llama3_dat' in file:
            llm = 'Llama3'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_llama3 += 1
        elif 'llama3_70B' in file:
            llm = 'Llama3-70B'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_llama3_70b += 1
        elif 'capybarra' in file:
            llm = 'Capybara'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_capy += 1
        else:
            llm = 'GPT-3'
            if condition == 'Mid' and strategy == 'Original instructions':
                counter_gpt3 += 1
        
        # Loop through each word in the list
        score = model_dat.dat(words)
        # Append the results to the dictionary
        results_dict['Temperature'].append(condition)
        results_dict['Strategy'].append(strategy)
        results_dict['Score'].append(score)
        results_dict['Words'].append(words)
        results_dict['Model'].append(llm)
        # Add a columns with binary Control vs. experimental
        if strategy == 'Control':
            results_dict['Control'].append('Control')
        elif strategy == 'Original instructions':
            results_dict['Control'].append('Original instructions')
        else:
            results_dict['Control'].append('Strategy')

# Convert the results dictionary to a Pandas DataFrame
results_df = pd.DataFrame(results_dict)

Number of valid words 10
Number of valid words 10
Number of valid words 10
Number of valid words 10
Number of valid words 9
capybarra_dat_temp1.0_none02.json ['A', 'lake', 'a', 'sandwich', 'an', 'envelope', 'a', 'jet', 'a', 'design', 'a', 'rose', 'a', 'galaxy', 'a', 'joke', 'a', 'quartet', 'an', 'anchor.']
Number of valid words 4
Number of valid words 11
Number of valid words 10
Number of valid words 14
Number of valid words 9
Number of valid words 11
Number of valid words 9
Number of valid words 9
Number of valid words 10
Number of valid words 9
Number of valid words 10
capybarra_dat_temp1.0_none02.json ['A', 'tree', 'an', 'apple', 'a', 'star', 'a', 'pencil', 'a', 'keyboard', 'a', 'smile', 'a', 'book', 'a', 'piano', 'a', 'cloud', 'and', 'a', 'rainbow.']
Number of valid words 10
Number of valid words 10
Number of valid words 10
Number of valid words 7
Number of valid words 11
Number of valid words 8
Number of valid words 10
Number of valid words 10
Number of valid words 10
Number of va

## Compile adherence

In [74]:
counting = {
    "GPT-3": counter_gpt3,
    "GPT-4": counter_gpt4,
    "Claude": counter_claude,
    "Claude2.1": counter_claude21,
    "Claude3": counter_claude3,
    "Claude3.5-sonnet": counter_claude35_sonnet,
    "StableLM": counter_stablelm,
    "RedPajama": counter_red,
    "Vicuna": counter_vicuna,
    "GPT-4-turbo": counter_gpt4_turbo,
    "GeminiPro1": counter_gemini,
    "GPT-4-0613": counter_gpt4_0613,
    "GPT-4-1106": counter_gpt4_1106,
    "Llama3": counter_llama3,
    "Llama3-70B": counter_llama3_70b,
    "Capybara": counter_capy,
}
pd.DataFrame(counting.items(), columns=['Model', 'Count']).to_csv('total_responses_per_model.csv', index=False)

In [75]:
counting

{'GPT-3': 682,
 'GPT-4': 504,
 'Claude': 989,
 'Claude2.1': 756,
 'Claude3': 578,
 'Claude3.5-sonnet': 557,
 'StableLM': 1488,
 'RedPajama': 1277,
 'Vicuna': 1000,
 'GPT-4-turbo': 500,
 'GeminiPro1': 800,
 'GPT-4-0613': 799,
 'GPT-4-1106': 496,
 'Llama3': 489,
 'Llama3-70B': 457,
 'Capybara': 452}

## Concatenate all human and machine data 

In [76]:
# concatenate with final sample
results_df = pd.concat([results_df, pd.DataFrame({'Temperature': np.tile(None, len(DAT_100k)),
                                                  'Strategy': np.tile('Original instructions', len(DAT_100k)),
                                                  'Score': np.array(DAT_100k),
                                                  'Model': np.tile('Human (100k)', len(DAT_100k)),
                                                  'Control': np.tile('Original instructions', len(DAT_100k))})])

In [77]:
results_df.Model.value_counts()

Human (100k)        100000
GPT-3                 5556
GPT-4                 5298
StableLM              4979
GeminiPro1            3660
Claude                3212
RedPajama             2444
Vicuna                2000
Claude2.1             1556
GPT-4-turbo           1255
Claude3               1082
GPT-4-0613             799
Claude3.5-sonnet       557
GPT-4-1106             496
Llama3                 489
Llama3-70B             457
Capybara               452
Name: Model, dtype: int64

## Save all machine and human data

In [78]:
results_df.to_csv('concatenated_results_2024-11-08.csv', index=False)


In [79]:
results_df.head()

Unnamed: 0,Temperature,Strategy,Score,Model,Control,Words
0,Mid,Original instructions,88.121512,Capybara,Original instructions,"[asteroid, whisper, raspberry, cartography, cr..."
1,Mid,Original instructions,75.945662,Capybara,Original instructions,"[tree, smile, yarn, ocean, candle, truck, feat..."
2,Mid,Original instructions,79.783903,Capybara,Original instructions,"[ocean, pencil, cosmos, iron, coffee, book, sy..."
3,Mid,Original instructions,90.081696,Capybara,Original instructions,"[chameleon, oscilloscope, zeppelin, xylophone,..."
4,Mid,Original instructions,85.911356,Capybara,Original instructions,"[ocean, equation, rhinoceros, symphony, fragme..."


In [80]:
results_df.loc[results_df['Control']=='Original instructions'].groupby('Model')['Score'].mean().sort_values()


Model
StableLM            62.924910
RedPajama           71.512219
Claude              75.117860
Claude2.1           75.951824
GPT-4-turbo         75.961742
GPT-3               77.833634
Vicuna              78.931210
Claude3.5-sonnet    79.065690
Claude3             80.201013
GeminiPro1          80.532483
Human (100k)        81.043458
GPT-4-0613          81.521169
GPT-4-1106          82.766461
Capybara            83.158111
Llama3-70B          83.779676
GPT-4               84.194218
Llama3              84.628047
Name: Score, dtype: float64

In [81]:
# Count of NaN values per model and condition
nan_count_per_model_condition = results_df[results_df['Score'].isna()].groupby(['Model']).size().reset_index(name='NaN_Count')
print(nan_count_per_model_condition)
nan_indices = results_df[results_df['Score'].isna()].index
nan_words = results_df.loc[nan_indices, 'Words']
print(nan_words)
nan_words_per_model = results_df[results_df['Score'].isna()].groupby('Model')['Words'].apply(list).reset_index(name='NaN_Words')
print(nan_words_per_model)

         Model  NaN_Count
0     Capybara         23
1       Claude         69
2      Claude3          2
3        GPT-3        105
4        GPT-4        598
5  GPT-4-turbo         84
6   GeminiPro1         38
7       Llama3         12
8    RedPajama        957
9     StableLM       1755
5               [*laughs*, that's, quite, the, challenge!]
5                                                      NaN
57       [true/false: you are a young adult with a love...
57                                                     NaN
73       [that's, an, interesting, challenge!, let, me,...
                               ...                        
31991                                                  NaN
32172    [buttercup - a bright yellow wildflower, geode...
32172                                                  NaN
32247    ["desert" - a dry, sandy region with very litt...
32247                                                  NaN
Name: Words, Length: 7286, dtype: object
         Model          