In [None]:
# automated_program_similarity_analysis

import os
import importlib.util
import types
from transformers import AutoTokenizer
import numpy as np
import pandas as pd
from nltk import sent_tokenize

file = 'data/small_text.csv'
df = pd.read_csv(file)
sentences = []
for paragraph in df['text']:
    sentences.extend(sent_tokenize(paragraph))
sentences = sentences[:10]

folder = "automation_results_bert"
programs = []
for layer in range(12):
    print(f"Loading programs for Layer {layer}...")
    for head in range(12):
        # print(f"Loading program for Layer {layer}, Head {head}...")
        # code is in llm_code subfolder (e.g. automation_results_bert\llm_code\layer0_head0_code.py)
        code_path = os.path.join(folder, "llm_code", f"layer{layer}_head{head}_code.py")
        # code_path = os.path.join(folder, "llm_code", f"programs-layer_{layer}", f"{head}_output.py")
        if os.path.exists(code_path):
            spec = importlib.util.spec_from_file_location(f"layer{layer}_head{head}", code_path)
            module = importlib.util.module_from_spec(spec)
            module.__dict__['np'] = np
            try:
                spec.loader.exec_module(module)
                for attr_name in dir(module):
                    attr = getattr(module, attr_name)
                    if isinstance(attr, types.FunctionType):
                        programs.append(attr)
                        break
            except Exception as e:
                print(f"Error loading program for Layer {layer}, Head {head}: {e}")
                continue
print(f"Loaded {len(programs)} programs.")

            
            S[i, j] = np.mean(similarities)

In [None]:
sentence_data = sentences[:10]
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def program_similarity(att_one, att_two):
    def js_divergence(p: np.ndarray, q: np.ndarray) -> float:
        p = np.clip(p, 1e-12, 1.0)
        q = np.clip(q, 1e-12, 1.0)
        p /= p.sum()
        q /= q.sum()
        m = 0.5 * (p + q)
        return 0.5 * (np.sum(p * np.log(p / m)) + np.sum(q * np.log(q / m))) 

    jensonshannon_distances = []
    for row_att, row_out in zip(att_one, att_two):
        jensonshannon_distances.append(np.sqrt(js_divergence(row_att, row_out)))
    score = np.mean(jensonshannon_distances)
    return score

x = len(programs)
S = np.zeros((x, x))
for i in range(1, x):
    if i in [106, 128]:
        S[i, :] = 0.8
        continue
    print(f"calculating hypothesis similarities [{i}]: {programs[i].__name__}")
    for j in range(x):
        if j % 36 == 0: print(f"\tinner loop {j/x:.0%}")
        if j in [106, 128]:
            S[i, j] = 0.8
            continue

        if i != j:
            similarities = []
            program_one = programs[i]
            program_two = programs[j]

            for sentence in sentence_data:
                h1, activations_one = program_one(sentence, tokenizer)
                try:
                    h2, activations_two = program_two(sentence, tokenizer)
                except Exception as e:
                    print(f"Error processing sentence with programs {i} and {j}: {e}")
                    continue
                similarities.append(program_similarity(activations_one, activations_two))
        
            S[i, j] = np.mean(similarities)

In [None]:
def group_similar_programs(programs, S, threshold=0.6):
    groups, used = [], set()
    for i in range(len(programs)):
        if i in used: continue
        group = [i]
        used.add(i)
        
        changed = True
        while changed:
            changed = False
            for group_member in group:
                for j in range(len(programs)):
                    if j not in used and S[group_member, j] < threshold:
                        group.append(j)
                        used.add(j)
                        changed = True
        groups.append([programs[idx].__name__ for idx in group])
    
    return groups

groups = group_similar_programs(programs, S, threshold=0.2)
for i, group in enumerate(groups):
    print(f"Group {i+1}: {group}\n")

In [None]:
# make S a symmetric matrix
S = (S + S.T) / 2

In [None]:
S

In [None]:
def group_similar_programs(programs, S, threshold=0.6):
    groups, used = [], set()
    for i in range(len(programs)):
        if i in used:
            continue
        
        group = [i]
        used.add(i)
        
        for j in range(len(programs)):
            if j not in used and S[i, j] < threshold:
                group.append(j)
                used.add(j)
        
        groups.append([programs[idx].__name__ for idx in group])
    
    return groups

groups = group_similar_programs(programs, S, threshold=0.1)
for i, group in enumerate(groups):
    print(f"Group {i+1}: {group}\n")

In [None]:
# loop through bert and bert2 scores, take best score and build sq_Score matrix

scores = np.array([])

for layer in range(12):
    for head in range(12):
        score1_path = f"automation_bert/scores/layer{layer}_head{head}_score.txt"
        score2_path = f"automation_bert_2/scores/layer{layer}_head{head}_score.txt"
        score1, score2 = -1, -1
        if os.path.exists(score1_path):
            with open(score1_path, "r") as f:
                try:
                    score1 = float(f.read().strip())
                except:
                    score1 = 1
        if os.path.exists(score2_path):
            with open(score2_path, "r") as f:
                try:
                    score2 = float(f.read().strip())
                except:
                    score2 = 1
        best_score = min(score1, score2)
        scores = np.append(scores, best_score)

sq_scores = scores.reshape((12, 12))

In [None]:
sq_score = np.reshape(scores, (12, 12))

colors = "Grays"
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
masked_sq = np.ma.masked_where(sq_score == -1, sq_score)
norm = PowerNorm(gamma=1.8, vmin=sq_score.min(), vmax=sq_score.max())
cmap = plt.cm.get_cmap(colors).copy()
cmap.set_bad(color='gray')
im2 = ax.imshow(masked_sq, cmap=cmap, aspect='auto', norm=norm)
im2.set_clim(vmin=0, vmax=1)
cbar = plt.colorbar(im2, ax=ax)
ax.set_xticks(range(12))
ax.set_yticks(range(12))
ax.set_xticklabels([i for i in range(12)], rotation=90)
ax.set_yticklabels([i for i in range(12)])
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
# put a space element in between automation and scores in text
title = (
    r'$\mathbf{Automation \ Scores}$'  # \mathbf makes the text bold
    '\n\nMethod: Greedy Refinement'
    f'\nScores | {model.config.architectures[0]}'  # Example: replaced model.config...
)
plt.title(f"{title}\n")
plt.show()
plt.savefig('automation_scores_greedy_refinement_k1.png', dpi=300)

In [None]:
# get the (layer, head) associated with the 10 highest scores from automation_results_bert, print the function name from the python file too

import os
folder = "automation_results_bert/scores"
scores = []
for layer in range(12):
    for head in range(12):
        score_path = os.path.join(folder, f"layer{layer}_head{head}_score.txt")
        if os.path.exists(score_path):
            with open(score_path, "r") as f:
                score = float(f.read().strip())
                scores.append((layer, head, score))

names = []
for layer, head, _ in scores:
    code_path = os.path.join("automation_results_bert", "llm_code", f"layer{layer}_head{head}_code.py")
    if os.path.exists(code_path):
        with open(code_path, "r") as f:
            for line in f:
                if line.strip().startswith("def "):
                    func_name = line.strip().split()[1].split('(')[0]
                    names.append(func_name)
                    break
    else:
        names.append("N/A")


highest_scores = sorted(scores, key=lambda x: x[2])[:10]
print("Top 10 highest scores (layer, head, score):")
for (layer, head, score), func_name in zip(highest_scores, names):
    print(f"Layer {layer}, Head {head}: {score} | Function: {func_name}")

In [None]:
scores

In [None]:
# find "coreferent_entity_focus" function which (layer, head) is it

for layer in range(12):
    for head in range(12):
        code_path = os.path.join("automation_results_bert", "llm_code", f"layer{layer}_head{head}_code.py")
        if os.path.exists(code_path):
            with open(code_path, "r") as f:
                for line in f:
                    if line.strip().startswith("def coreferent_entity_focus"):
                        print(f"'coreferent_entity_focus' found in Layer {layer}, Head {head}")
                        break

In [None]:
# get the (layer, head) associated with the following python functions: ['punctuation_conjunction_attention', 'punctuation_conjunction_dependency', 'conjunction_coherence', 'coordination_alignment'], also print score

import os
folder = "automation_results_bert/scores"
functions = ['punctuation_conjunction_attention', 'punctuation_conjunction_dependency', 'conjunction_coherence', 'coordination_alignment']
folders = []

for layer in range(12):
    for head in range(12):
        code_path = os.path.join("automation_results_bert", "llm_code", f"layer{layer}_head{head}_code.py")
        if os.path.exists(code_path):
            with open(code_path, "r") as f:
                code = f.read()
                for func in functions:
                    if f"def {func}(" in code:
                        folders.append((layer, head, func)) 

print("Functions found in (layer, head, function):")
for layer, head, func in folders:
    print(f"Layer {layer}, Head {head}: {func}")

print("Scores found in (layer, head, function):\n")
for layer, head, func in folders:
    score_path = os.path.join(folder, f"layer{layer}_head{head}_score.txt")
    if os.path.exists(score_path):
        with open(score_path, "r") as f:
            score = f.read().strip()
            print(f"Layer {layer}, Head {head}: {func} - Score: {float(score):.2f}")

In [None]:
# !pip uninstall matplotlib
# !pip install matplotlib
import matplotlib.pyplot as plt

name_to_idx = {fn.__name__: i for i, fn in enumerate(programs)}
new_order = [name_to_idx[name] for group in groups for name in group]
S_grouped = S[np.ix_(new_order, new_order)]
colors = "Purples_r"
fig, ax = plt.subplots(1, 1, figsize=(8,8))
im2 = ax.imshow(S_grouped, cmap=colors, aspect='auto')
# ax.set_axis_off()
ax.set_xticks(range(len(programs)))
ax.set_yticks(range(len(programs)))
ax.set_xticklabels([p.__name__ for p in programs], rotation=90)
ax.set_yticklabels([p.__name__ for p in programs])
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.title("Similarity Matrix\n", weight='bold')
plt.show()

In [None]:
# sum scores in roberta/scores txt

import os
folder = "automation_results_roberta/scores"
scores = []
for filename in os.listdir(folder):
    if filename.endswith(".txt"):
        with open(os.path.join(folder, filename), 'r') as f:
            content = f.read().strip()
            try:
                content = float(content)
                if content != content:  # Check for NaN
                    content = 1.0
            except ValueError:
                content = 1.0
            scores.append(content)

scores

In [None]:
import matplotlib.pyplot as plt

In [None]:
!pip install regex
import regex

In [None]:
# get all programs from automation_refinement/master_list and load these python functions as patterns = [executable functions]

import os
import importlib.util
import types
import numpy as np
from transformers import AutoTokenizer, AutoModel
import regex as re
import pandas as pd
import spacy
from transformers import PreTrainedTokenizerBase, PreTrainedModel
from typing import Optional, Tuple, Callable
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

folder = "automation_refinement_gpt2/master_list"
patterns = []

for filename in os.listdir(folder):
    if filename.endswith(".py"):
        code_path = os.path.join(folder, filename)
        spec = importlib.util.spec_from_file_location(f"module_{filename[:-3]}", code_path)
        module = importlib.util.module_from_spec(spec)
        module.__dict__['np'] = np
        # get pretrainedtokenizerbase, from typing import Optional, Tuple, Callable
        module.__dict__['PreTrainedTokenizerBase'] = PreTrainedTokenizerBase
        module.__dict__['Optional'] = Optional
        module.__dict__['Tuple'] = Tuple
        module.__dict__['Callable'] = Callable
        module.__dict__['spacy'] = spacy
        

        try:
            spec.loader.exec_module(module)
            for attr_name in dir(module):
                attr = getattr(module, attr_name)
                if isinstance(attr, types.FunctionType):
                    patterns.append(attr)
                    break
        except Exception as e:
            print(f"Error loading program from {filename}: {e}")
            continue

print(f"Loaded {len(patterns)} patterns.")
for i, prog in enumerate(patterns):
    print(f"{i}: {prog.__name__}")

In [None]:
!python -m spacy download en_core_web_sm
import spacy
nlp = spacy.load("en_core_web_sm")

In [None]:
# make patterns_strong excluding ["adverbial_modulation", "conjunction_based_grouping", "dependencies", "pos_alignment", "semantics_comma_separation"]:
patterns_strong = [p for p in patterns if p.__name__ not in ["adverbial_modulation", "conjunction_based_grouping", "dependencies", "pos_alignment", "semantics_comma_separation"]]
len(patterns_strong)

In [None]:
test_sentences = sentences[135:137]
scores = np.zeros((model.config.num_hidden_layers, model.config.num_attention_heads))
top_k = 1

for layer in range(num_layers):
    for head in range(num_heads):
        print(f"Analyzing Layer {layer}, Head {head}...")
        sentence_scores = []
        for sentence in test_sentences:
            inputs = tokenizer(sentence, return_tensors="pt")
            outputs = model(**inputs, output_attentions=True)
            attention = outputs.attentions[layer][0, head].detach().numpy()
            y = attention.flatten()

            X = []
            for pattern in patterns_strong:
                X.append(pattern(sentence, torch_tokenizer)[1].flatten())
            X_n = np.array(X).T
            y = y.flatten()

            # avoid ValueError: Input X contains NaN.
            X_n = np.nan_to_num(X_n)
            y = np.nan_to_num(y)

            reg = LinearRegression().fit(X_n, y)
            side_length = int(np.sqrt(len(y)))
            y = y.reshape((side_length, side_length))

            # pred_att = reg.intercept_ + sum(coef * mat for coef, mat in zip(reg.coef_, X))
            # pred att should be the intercept + sum of top_k patterns based on magnitude of coef
            top_indices = np.argsort(np.abs(reg.coef_))[-top_k:]
            pred_att = reg.intercept_ + sum(reg.coef_[i] * X[i] for i in top_indices)
            pred_att = pred_att.reshape((side_length, side_length))

            if top_k == 1:
                #pred_att is just the single pattern with highest coef, it isn't equal to a sum at all
                fn_highest_coeff = patterns_strong[np.argmax(np.abs(reg.coef_))]
                pred_att = fn_highest_coeff(sentence, torch_tokenizer)[1]
                print(f"Using pattern: {fn_highest_coeff.__name__}")

            jensonshannon_distances = []
            for row_att, row_out in zip(y, pred_att):
                jensonshannon_distances.append(np.sqrt(js_divergence(row_att, row_out)))
            score = np.mean(jensonshannon_distances)
            sentence_scores.append(score)
        
        scores[layer, head] = np.mean(sentence_scores)
        # print(f"Score for Layer {layer}, Head {head}: {scores[layer, head]}")

# for each head: do linear interpolation on patterns, set pred_att to sum with top_k hypotheses based on parameter magnitude

In [None]:
# PLOT DIFFERENT SUMMARY SCORES FOR THE MODEL

max_score = model.config.num_hidden_layers * model.config.num_attention_heads
raw_scores = [111, 92, 62, 65, 56]
labels = ['Random \nToken Baseline', 'Automatic\nPrograms', 'Auto, K=1\nPrograms', 'Best Fit\nPrograms', 'Linear Weight\nPrograms']
colors = ['darkred', 'darkblue', '#6aa84f', '#800080', "darkorange"]

# Normalize scores: lower scores become higher bars
scores = [(score / max_score) for score in raw_scores]
fig, ax = plt.subplots(figsize=(8, 5))
bars = plt.bar(labels, scores, color=colors, width=0.6)

# Add text labels on top of bars
for bar, raw, norm in zip(bars, raw_scores, scores):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height + 0.02,
             f'{norm:.2f}\n[ {int(raw)} / {max_score} ]', ha='center', va='bottom', fontsize=14)
ax.set_facecolor('#F5F5F5')

plt.ylim(0, 1.0)
# plt.title('Normalized Error (1 - Score / Max Score)')
plt.xticks(fontsize=14)
plt.text(plt.xlim()[0]-0.7, plt.ylim()[1]+0.05, '[bad hypotheses]', ha='left', va='bottom', fontsize=12, color='gray')
plt.text(plt.xlim()[0]-0.7, plt.ylim()[0]-0.13, '[well-fitting\nhypotheses]', ha='left', va='bottom', fontsize=12, color='gray')
plt.ylabel('Normalized Model Scores', fontsize=16, labelpad=20)
plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.tight_layout()
plt.show()

In [None]:
np.save("bert_head_greedy_scores_2.npy", scores)

In [None]:
np.nanmean(scores)

In [None]:
# convert all nan values to mean in scores

mean_score = np.nanmean(scores)
scores = np.where(np.isnan(scores), mean_score, scores)

In [None]:
from matplotlib.colors import PowerNorm
sq_score = scores

colors = "Grays"
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
# masked_sq = np.ma.masked_where(sq_score == -1, sq_score)
# masked_sq converts na to mean
masked_sq = np.ma.masked_invalid(sq_score)
norm = PowerNorm(gamma=1.8, vmin=sq_score.min(), vmax=sq_score.max())
cmap = plt.cm.get_cmap(colors).copy()
cmap.set_bad(color='gray')
im2 = ax.imshow(masked_sq, cmap=cmap, aspect='auto', norm=norm)
im2.set_clim(vmin=0, vmax=1)
cbar = plt.colorbar(im2, ax=ax)
ax.set_xticks(range(12))
ax.set_yticks(range(12))
ax.set_xticklabels([i for i in range(12)], rotation=90)
ax.set_yticklabels([i for i in range(12)])
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
# put a space element in between automation and scores in text
title = (
    r'$\mathbf{Automation \ Scores}$'  # \mathbf makes the text bold
    '\n\nMethod: Refinement w/ K=1'
    f'\nScores | {model.config.architectures[0]}'  # Example: replaced model.config...
)
plt.title(f"{title}\n")
plt.show()

In [None]:
print(scores)

In [None]:
np.save("data/bert_head_greedy_scores.npy", scores)

In [None]:
sq_score = np.reshape(scores, (12, 12))

colors = "Grays"
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
masked_sq = np.ma.masked_where(sq_score == -1, sq_score)
norm = PowerNorm(gamma=1.8, vmin=sq_score.min(), vmax=sq_score.max())
cmap = plt.cm.get_cmap(colors).copy()
cmap.set_bad(color='gray')
im2 = ax.imshow(masked_sq, cmap=cmap, aspect='auto', norm=norm)
im2.set_clim(vmin=0, vmax=1)
cbar = plt.colorbar(im2, ax=ax)
ax.set_xticks(range(12))
ax.set_yticks(range(12))
ax.set_xticklabels([i for i in range(12)], rotation=90)
ax.set_yticklabels([i for i in range(12)])
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
# put a space element in between automation and scores in text
title = (
    r'$\mathbf{Automation \ Scores}$'  # \mathbf makes the text bold
    '\n\nMethod: Greedy Refinement'
    f'\nScores | {model.config.architectures[0]}'  # Example: replaced model.config...
)
plt.title(f"{title}\n")
plt.show()
plt.savefig('automation_scores_refinement_k1.png', dpi=300)

In [None]:
def find_outliers(data):
    data = np.array(data)
    Q1 = np.percentile(data, 25)
    Q3 = np.percentile(data, 75)

    IQR = Q3 - Q1
    lower_bound = Q1 - (1.5 * IQR)
    upper_bound = Q3 + (1.5 * IQR)

    outliers = data[(data < lower_bound) | (data > upper_bound)]
    return outliers, lower_bound, upper_bound, Q1, Q3, IQR

outliers, lower_bound, upper_bound, Q1, Q3, IQR = find_outliers(scores)

def plot_scores_boxplot(scores):
    plt.figure(figsize=(3.2,8))
    plt.boxplot(
        scores,
        positions=[0.75], 
        vert=True,
        patch_artist=True,
        medianprops={'color': 'black', 'linewidth': 3},
        boxprops={'facecolor': 'gray', 'edgecolor': 'black'},
        flierprops={'marker': 'D', 'markerfacecolor': 'black', 'markersize': 3, 'linestyle': 'none'}
    )

    plt.title('Auto, K=1 | BERT', fontsize=14, weight='bold')
    plt.ylabel('Automation Scores', fontsize=12)
    plt.xticks([])
    plt.ylim(0, 1.05)

    #insert the text 'WIP' in center of plot
    # plt.text(0.75, 0.5, 'WIP', fontsize=12, ha='center', va='center')

    plt.grid(axis='y', linestyle='--', alpha=0.6)
    x = np.ones_like(scores)
    plt.scatter(
        x,
        scores,
        color='gray',
        edgecolor='black',
        s=30,
        alpha=0.9,
    )
    plt.show()

flattened_scores = scores.flatten()
plot_scores_boxplot(flattened_scores)

In [None]:
import matplotlib.pyplot as plt
colors = "Grays_r"
plt.rcParams['text.usetex'] = False

score_threshold = 0.4

fig, ax = plt.subplots(1, 1, figsize=(8, 6))
highlighted_sq = np.where(sq_score < score_threshold, sq_score, np.nan)
# make all non-highlighted values white (1.0)
highlighted_sq = np.where(np.isnan(highlighted_sq), 1.0, highlighted_sq)
norm = PowerNorm(gamma=1.8, vmin=sq_score.min(), vmax=sq_score.max())
cmap = plt.cm.get_cmap(colors).copy()
cmap.set_bad(color='gray')
im2 = ax.imshow(highlighted_sq, cmap=cmap, aspect='auto', norm=norm)
im2.set_clim(vmin=0, vmax=1)
cbar = plt.colorbar(im2, ax=ax)
ax.set_xticks(range(12))
ax.set_yticks(range(12))
ax.set_xticklabels([i for i in range(12)], rotation=90)
ax.set_yticklabels([i for i in range(12)])
# import matplotlib
# print("usetex:", matplotlib.rcParams['text.usetex'])
# plt.rcParams['text.usetex'] = False

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
# make automation bold
# get number of highlighted scores
num_highlighted = np.sum(sq_score < score_threshold)
title = (
    r'$\mathbf{Highlighted\ Scores}$'
    '\n\nMethod: Refinement w/ K=1'
    f'\n {num_highlighted} scores < {score_threshold} ({num_highlighted/(len(sq_score)**2)*100:.0f}%) | {model.config.architectures[0]}\n')
# title = "Automation Scores\n"
plt.title(title)
plt.show()

In [None]:
from nltk import sent_tokenize
import csv

# use bert
torch_model = AutoModel.from_pretrained("bert-base-uncased")
torch_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

num_layers = torch_model.config.num_hidden_layers
num_heads = torch_model.config.num_attention_heads
activations = {} 

file = 'data/small_text.csv'
df = pd.read_csv(file)
sentences = []
for paragraph in df['text']:
    sentences.extend(sent_tokenize(paragraph))
sentences = sentences[:10_000]

short = sentences[:8]
csv_file_name = "data/bert_refinement.csv"
with open(csv_file_name, 'a', newline='') as file:
    writer = csv.writer(file)
    for pattern in patterns:
        if pattern.__name__ in ["adverbial_modulation", "conjunction_based_grouping", "dependencies", "pos_alignment", "semantics_comma_separation"]: continue
        print(f"\nCurrently Analyzing pattern: {pattern.__name__}")
        avg_score = []
        for idx, sentence in enumerate(short):
            print(f"\tProcessing sentence {idx}/{len(short)}")
            for i in range(num_layers):
                for j in range(num_heads):
                    if i != 3 or j != 9: continue
                    nlp = spacy.load("en_core_web_sm")
                    score = score_prediction(torch_model, torch_tokenizer, (i, j), pattern, sentence, distance="jsd", output=False)
                    if score < 0.55:
                        avg_score.append((idx, pattern.__name__, i, j, score))
        
        score_dict = {}
        for idx, pattern_name, i, j, score in avg_score:
            score_dict.setdefault((i, j), []).append((pattern_name, score))
        for (i, j), values in score_dict.items():
            scores = [score for _, score in values]
            avg_score_val = sum(scores) / len(scores)
            pattern_name = values[0][0]
            activations[(i, j)] = (pattern_name, avg_score_val)
            print(f"Layer {i}, Head {j} - Score: {avg_score_val:.2f}")
            writer.writerow([i, j, pattern.__name__, avg_score_val])

In [None]:
# get all programs from automation_refinement/master_list and load these python functions as patterns = [executable functions]
# loop over heads and using three sentences get an average score for each head for each pattern, save the name and best score of best fitting pattern
# build a matrix (layers * heads) with the best fitting pattern_name and best_Score

In [None]:
# PLOT DIFFERENT SUMMARY SCORES FOR THE MODEL

import matplotlib.pyplot as plt

max_score = 144
raw_scores = [114, 92.05, 56, 77.9, 67]
labels = ['Random Token\n Baseline', 'Automatic\nPrograms', 'Automatic\nw/ Refinement', 'Best Fit\nPrograms', 'Linear Weight\nPrograms']
colors = ['darkred', 'darkblue', '#FF8C00', '#6aa84f', '#800080']

# Normalize scores: lower scores become higher bars
scores = [(score / max_score) for score in raw_scores]
fig, ax = plt.subplots(figsize=(8, 5))
bars = plt.bar(labels, scores, color=colors, width=0.6)

# Add text labels on top of bars
for bar, raw, norm in zip(bars, raw_scores, scores):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height + 0.02,
             f'{norm:.2f}', ha='center', va='bottom', fontsize=14)
ax.set_facecolor('#F5F5F5')

plt.ylim(0, 1.0)
# plt.title('Normalized Error (1 - Score / Max Score)')
plt.xticks(fontsize=14)
plt.text(plt.xlim()[0]-0.7, plt.ylim()[1]+0.05, '[bad hypotheses]', ha='left', va='bottom', fontsize=12, color='gray')
plt.text(plt.xlim()[0]-0.7, plt.ylim()[0]-0.13, '[well-fitting\nhypotheses]', ha='left', va='bottom', fontsize=12, color='gray')
plt.ylabel('Normalized Model Scores', fontsize=16, labelpad=20)
plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.tight_layout()
plt.show()

In [None]:
    def forward(self, x, edge_index,n_id, feature_vec):
        transformed_feature = self.feature_transform(feature_vec)

        text = [self.texts[i] for i in n_id.cpu().numpy()]
        tokens = self.tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors='pt')

        tokens = tokens.to(edge_index.device)
        input_embeddings = self.text_model.get_input_embeddings()(tokens['input_ids'])
        
        if self.soft == False:
            outputs = self.text_model(inputs_embeds=input_embeddings)
            hidden_states = outputs.last_hidden_state
            text_embedding = hidden_states[:, 0, :]

        graph_embedding = transformed_feature
        count = 0
        
        for gcn_layer in self.gcn_layers:
            edge_index = edge_index.long()
            
            if self.soft:
                graph_embedding = graph_embedding.unsqueeze(1)
                modified_embeddings = torch.cat((graph_embedding, input_embeddings), dim=1)
                attention_mask = tokens['attention_mask']
                batch_size = attention_mask.shape[0]
                new_token_mask = torch.ones((batch_size, 1), dtype=attention_mask.dtype, device=attention_mask.device)
                attention_mask = torch.cat([new_token_mask, attention_mask], dim=1)
                outputs = self.text_model(inputs_embeds=modified_embeddings, attention_mask=attention_mask)
                hidden_states = outputs.last_hidden_state
                text_embedding = hidden_states[:, 0, :]

            graph_embedding_for_attention = graph_embedding.squeeze(1).unsqueeze(0)  # [1, batch_size, embedding_dim]
            text_embedding_for_attention = text_embedding.unsqueeze(0)  # [1, batch_size, embedding_dim]
            text_to_graph_attention, _ = self.cross_attention(graph_embedding_for_attention, 
                                                              text_embedding_for_attention,text_embedding_for_attention)
            text_to_graph_attention = text_to_graph_attention.squeeze(0)  # [batch_size, embedding_dim]

            combined_embedding = (text_embedding + text_to_graph_attention) / 2
            graph_embedding = gcn_layer(combined_embedding, edge_index)

        return self.classifier(graph_embedding),graph_embedding