## Data
10 tasks, 10k training examples per task.

10 epochs, learning rate $1\times 10^{-4}$, batch size = 128

### pip Installation

In [None]:
! pip install pyvene
! pip install nnsight

Check installed package

In [None]:
!pip list

### Set-up

In [None]:
import torch
from torch.utils.data import DataLoader
import random
import copy
import itertools
import numpy as np
import pyvene as pv
from tqdm import tqdm, trange


from sklearn.metrics import classification_report
from transformers import get_linear_schedule_with_warmup

from pyvene import CausalModel
from pyvene.models.mlp.modelings_mlp import MLPConfig
from pyvene import create_mlp_classifier
from pyvene import (
    IntervenableModel,
    VanillaIntervention,
    RotatedSpaceIntervention,
    LowRankRotatedSpaceIntervention,
    RepresentationConfig,
    IntervenableConfig,
)

In [None]:
seed = 516
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### The WordLogic Task
One task example:
$[(t_2=t_4)\wedge\neg(t_0=t_4)]\vee (t_1 = t_3)$

In [None]:
import pandas as pd

# 加载 GPT-2 词汇表
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
vocab = list(tokenizer.get_vocab().keys())  # 获取所有词汇

# sampling
samples = []
for _ in range(10000):
    t0 = np.random.choice(vocab)
    t1 = t0 if np.random.random() < 0.5 else random.choice(vocab) # t1 有 50% 的概率等于 t0
    t2 = np.random.choice(vocab)
    t3 = t1 if np.random.random() < 0.3 else random.choice(vocab) # t3 有 30% 的概率等于 t1
    t4 = t2 if np.random.random() < 0.5 else random.choice(vocab) # t4 有 50% 的概率等于 t2
    label = ((t2 == t4) and not (t0 == t4)) or (t1 == t3)
    samples.append((t0 + ',' + t1 + ',' + t2 + ',' + t3 + ',' + t4, label))

# save to a file
df = pd.DataFrame(samples, columns=["texts", "labels"])
df.to_csv("sample_data.tsv", sep="\t", index=False)

In [None]:
# construct the hypithesize causal model
variables = ["T0", "T1", "T2", "T3", "T4", "E", "NE", "IS_SYN", "AND", "OR"]

reps = [s[0].split(",") for s in samples]
values = {}
values["T0"] = [lst[0] for lst in reps]
values["T1"] = [lst[1] for lst in reps]
values["T2"] = [lst[2] for lst in reps]
values["T3"] = [lst[3] for lst in reps]
values["T4"] = [lst[4] for lst in reps]
values["E"] = [True, False]
values["NE"] = [True, False]
values["IS_SYN"] = [True, False]
values["AND"] = [True, False]
values["OR"] = [True, False]

parents = {
    "T0": [],
    "T1": [],
    "T2": [],
    "T3": [],
    "T4": [],
    "E": ["T2", "T4"],
    "NE": ["T0", "T4"],
    "IS_SYN": ["T1", "T3"],
    "AND": ["E", "NE"],
    "OR": ["AND", "IS_SYN"]
}

def FILLER():
    return reps[0]

# def IS_SYN(x, y):
#   return flag

functions = {
    "T0": FILLER,
    "T1": FILLER,
    "T2": FILLER,
    "T3": FILLER,
    "T4": FILLER,
    "E": lambda x, y: x == y,
    "NE": lambda x, y: x != y,
    "IS_SYN": lambda x, y: x == y,
    "AND": lambda x, y: x and y,
    "OR": lambda x, y: x or y
}

pos = {
    "T0": (0.2, 0),
    "T1": (0.8, 0),
    "T2": (1.4, 0),
    "T3": (2, 0),
    "T4": (2.6, 0),
    "E": (0.5, 1),
    "NE": (1.4, 1),
    "IS_SYN": (2.3, 1),
    "AND": (1.1, 2),
    "OR": (1.1, 3)
}

equality_model = CausalModel(variables, values, parents, functions, pos=pos)
equality_model.print_structure()

### Multiple Tasks

In [None]:
import pandas as pd

# 加载 GPT-2 词汇表
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
vocab = list(tokenizer.get_vocab().keys())  # 获取所有词汇

# Number of samples
num_samples = 100000

tasks = [
    lambda p, q, r: p and q and r,
    lambda p, q, r: not p and q and r, 
    lambda p, q, r: p and not q and r, #
    lambda p, q, r: not p and not q and r,#
    lambda p, q, r: p and q and not r,
    lambda p, q, r: not p and q and not r, 
    lambda p, q, r: p and not q and not r, 
    lambda p, q, r: not p and not q and not r,
]

samples = [] 
selected_tasks = [] 

for task_id, task in enumerate(tasks): 
  np.random.seed(task_id + 5) 
  single_task_sample =[] 
  num_true = 0 

  # Precompute random choices
  t0_choices = np.random.choice(vocab, num_samples)
  t1_choices = np.random.choice(vocab, num_samples)
  t2_choices = np.random.choice(vocab, num_samples)
  t3_choices = np.random.choice(vocab, num_samples)
  t4_choices = np.random.choice(vocab, num_samples)

  # Generate samples
  for i in range(num_samples):
      t0 = t0_choices[i]
      t1 = t0 if np.random.random() < 0.5 else t1_choices[i]
      t2 = t2_choices[i]
      t3 = t1 if np.random.random() < 0.5 else t3_choices[i]
      t4 = t2 if np.random.random() < 0.5 else t4_choices[i]
      p, q, r = (t2 == t4), (t0 == t4), (t1 == t3)
      label = task(p, q, r)
      if label: 
        num_true += 1 
      single_task_sample.append((f"{t0},{t1},{t2},{t3},{t4}", str(label))) 
  if num_true > 0.25 * num_samples and num_true < 0.75 * num_samples: 
      samples += single_task_sample 
      selected_tasks.append(task_id)

print(selected_tasks)

# Save to a file
df = pd.DataFrame(samples, columns=["texts", "labels"])
df.to_csv(f"sample_data_{num_samples}.tsv", sep="\t", index=False)

## Import the GPT2 models and finetuning GPT2



In [243]:
from transformers import GPT2LMHeadModel, AutoModelForCausalLM, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling, EarlyStoppingCallback
import torch
from tqdm.auto import tqdm
from torch.utils.data import Dataset, random_split
from sklearn.model_selection import train_test_split
import os
import random
import pandas as pd

The following is the code to finetune GPT2.
1. Load the model

In [269]:
# Load model and tokenizer
model_path_ft = "./ft_model/fine_tuned_gpt2"
if os.path.exists(model_path_ft):
  print(f"Loading existing model from {model_path_ft}")
  model = GPT2LMHeadModel.from_pretrained(model_path_ft)
  tokenizer = GPT2Tokenizer.from_pretrained(model_path_ft)
  print("Model loaded successfully!")
elif os.path.exists("./pretrained_model/pretrained_tuned_gpt2"):
  print(f"Loading existing model from ./pretrained_model/pretrained_tuned_gpt2")
  model = GPT2LMHeadModel.from_pretrained("./pretrained_model/pretrained_tuned_gpt2")
  tokenizer = GPT2Tokenizer.from_pretrained("./pretrained_model/pretrained_tuned_gpt2")
  print("Model loaded successfully!")
else:
  print(f"Did not find existing model from {model_path_ft}")
  print("Loading a new model from hugging face")
  model = GPT2LMHeadModel.from_pretrained('gpt2')
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
  tokenizer.pad_token = tokenizer.eos_token
  model.config.pad_token_id = model.config.eos_token_id
  print("Model loaded successfully!")
  # Save the model
  os.mkdir("./pretrained_model/")
  model.save_pretrained("./pretrained_model/pretrained_tuned_gpt2")
  tokenizer.save_pretrained("./pretrained_model/pretrained_tuned_gpt2")

Loading existing model from ./pretrained_model/pretrained_tuned_gpt2
Model loaded successfully!


In [228]:
model.save_pretrained("./pretrained_model/pretrained_tuned_gpt2_medium")
tokenizer.save_pretrained("./pretrained_model/pretrained_tuned_gpt2_medium")

('./pretrained_model/pretrained_tuned_gpt2_medium/tokenizer_config.json',
 './pretrained_model/pretrained_tuned_gpt2_medium/special_tokens_map.json',
 './pretrained_model/pretrained_tuned_gpt2_medium/vocab.json',
 './pretrained_model/pretrained_tuned_gpt2_medium/merges.txt',
 './pretrained_model/pretrained_tuned_gpt2_medium/added_tokens.json')

Prepare the dataset class

In [270]:
class WordLogicDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length):
        # Tokenize with padding and truncation
        self.encodings = tokenizer(texts,
                                 truncation=True,
                                 padding=True,
                                 max_length=max_length,
                                 return_tensors="pt")
        self.texts = texts

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = item['input_ids']
        item['text'] = self.texts[idx]  # Store the original text for comparison
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

Prepare the data

In [271]:
def formattext(texts, label, context_texts, context_label, context_len):
    context_len =  min(context_len, len(context_texts))
    # pick context_len number of context_texts randomly
    formatted_texts = [f"{text}={label}" for text, label in zip(texts, label)]
    formatted_context_texts = [f"{text}={label}" for text, label in zip(context_texts, context_label)]
    for i, text in enumerate(formatted_texts):
        context_texts = random.sample(formatted_context_texts, context_len)
        context_text = "\n".join(context_texts)
        formatted_texts[i] = f"{context_text}\n{text}"
    return formatted_texts

In [274]:
# Prepare the data
# texts = ["EGA,Duration,for,reportprint,for", "Similar,agency,Examples,drm,Similar", "SPONSORED,foundland,eways,reportprint,SPONSORED", "Colorado,oreAnd,ophe,Timer,esting", "Warren,puter,ActionCode,Hol,Warren", "iffs,omics,Cas,omic,adobe"]
# labels = [1, 0, 1, 1, 0, 1]  # Your labels
df = pd.read_csv("sample_data_100000_2.tsv", sep="\t")
texts = df["texts"].tolist()
labels = [str(label) for label in df["labels"].tolist()]

nums_contexts = 3000
num_task = 2
size_per_task = len(texts) // num_task
format_texts = []
for i in range(num_task):
    context_texts = texts[size_per_task * i: size_per_task * i + nums_contexts]
    context_labels = labels[size_per_task * i: size_per_task * (i) + nums_contexts]
    format_texts += formattext(texts[size_per_task * (i) + nums_contexts: size_per_task *(i+1)], labels[size_per_task * (i) + nums_contexts: size_per_task *(i+1)], context_texts, context_labels, 5)

max_length = 16
# First split: separate test set (80% train+val, 20% test)
train_val_texts, test_texts = train_test_split(
    format_texts, test_size=0.2, random_state=35
)

# Second split: separate train and validation from the remaining 80%
train_texts, val_texts = train_test_split(
    train_val_texts, test_size=0.25, random_state=42  # 0.25 of 80% = 20% of total
)

# Create datasets
train_dataset = WordLogicDataset(train_texts, tokenizer, max_length=max_length)
val_dataset = WordLogicDataset(val_texts, tokenizer, max_length=max_length)
test_dataset = WordLogicDataset(test_texts, tokenizer, max_length=max_length)


In [275]:
print(test_texts[0])
print(size_per_task)
print(test_texts[len(test_texts) // num_task])

Ġmasters,Ġmasters,Ġrematch,Ġmasters,Ġrematch=False
ĠBrilliant,ĠBrilliant,Ġaccents,ĠBrilliant,Ġmastered=True
Ġthou,Ġthou,September,Ġthou,September=False
oaded,Ġimproperly,WOR,Ġasteroid,WOR=False
Ġsentient,ĠHead,Ġairst,ĠHead,Ġairst=False
Ġaccounts,ĠClarence,ĠArtificial,ĠClarence,ĠJuice=True
100000
Pin,Ġwondering,Ġ302,Ġappointment,Ġdissu=False
ĠPCR,ADS,ĠEV,Ġbonded,ĠEV=False
Ġannually,ĠEthan,ĠContinental,ĠHDMI,ham=False
Eat,Eat,ĠdÃ©,Eat,ĠdÃ©=False
Getting,ĠPap,Ġsections,ithing,Ġpiv=False
ighty,ighty,waukee,ighty,waukee=False


In [276]:
print(f"Number of training examples: {len(train_dataset)}")
print(f"Number of validation examples: {len(val_dataset)}")
print(f"Number of test examples: {len(test_dataset)}")
print(f"Number of context examples: {len(context_texts)}")

Number of training examples: 116400
Number of validation examples: 38800
Number of test examples: 38800
Number of context examples: 3000


Training parameters

In [277]:
# Check if CUDA is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
model.to(device)
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, 
    mlm=False
)
# Define training arguments
training_args = TrainingArguments(
    output_dir='./ft_model/results',
    dataloader_num_workers=10,
    num_train_epochs=20,
    per_device_train_batch_size= 128,  # REMEMBER TO CAHNGE THIS WHEN TRAINING on GPU
    per_device_eval_batch_size= 128,
    learning_rate=1e-4,
    warmup_steps = 500,
    weight_decay = 0.01,
    logging_dir='./ft_model/logs',
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",        # Save at the end of each epoch
    load_best_model_at_end=True,  # Load the best model when training ends
    metric_for_best_model="loss"
)

Using device: cuda




In [278]:
class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def training_step(self, model, inputs, return_loss=True):
        """Override training step to display loss in progress bar"""
        outputs = super().training_step(model, inputs, return_loss)
        if hasattr(self, 'progress_bar'):
            # outputs is the loss tensor directly
            self.progress_bar.set_postfix({'loss': outputs.item()})
        return outputs

    def train(self, resume_from_checkpoint=None, **kwargs):
        """Override train to add progress bar"""
        # Calculate total steps
        num_update_steps_per_epoch = len(self.train_dataset) // self.args.per_device_train_batch_size
        total_train_steps = num_update_steps_per_epoch * self.args.num_train_epochs

        # Create progress bar
        self.progress_bar = tqdm(
            total=total_train_steps,
            desc="Training",
            position=0,
            leave=True
        )

        # Train
        result = super().train(resume_from_checkpoint=resume_from_checkpoint, **kwargs)

        # Close progress bar
        self.progress_bar.close()
        return result

    def evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys=None, metric_key_prefix="eval"):
        """Override evaluation loop to add progress bar"""
        eval_bar = tqdm(
            total=len(dataloader),
            desc=description,
            position=0,
            leave=True
        )

        output = super().evaluation_loop(
            dataloader,
            description,
            prediction_loss_only,
            ignore_keys,
            metric_key_prefix
        )
        eval_bar.close()
        return output

In [279]:
def sft(model, training_args, train_dataset, val_dataset):
  # Create Trainer
  trainer = CustomTrainer(
      model=model,
      args=training_args,
      train_dataset= train_dataset,
      data_collator=data_collator,
      eval_dataset=val_dataset,
      callbacks=[EarlyStoppingCallback(early_stopping_patience=4)]  # early stopping callback
  )

  # Train the model
  trainer.train()

In [None]:
sft(model, training_args, train_dataset, val_dataset)

Training:   0%|          | 0/18180 [00:00<?, ?it/s]

In [None]:
# Save the model
model.save_pretrained("./ft_model/fine_tuned_gpt2")
tokenizer.save_pretrained("./ft_model/fine_tuned_gpt2")

In [None]:
!zip -r ft_model.zip ft_model/fine_tuned_gpt2

  adding: ft_model_classifier/fine_tuned_gpt2/ (stored 0%)
  adding: ft_model_classifier/fine_tuned_gpt2/config.json (deflated 53%)
  adding: ft_model_classifier/fine_tuned_gpt2/merges.txt (deflated 53%)
  adding: ft_model_classifier/fine_tuned_gpt2/vocab.json (deflated 68%)
  adding: ft_model_classifier/fine_tuned_gpt2/model.safetensors (deflated 7%)
  adding: ft_model_classifier/fine_tuned_gpt2/tokenizer_config.json (deflated 56%)
  adding: ft_model_classifier/fine_tuned_gpt2/special_tokens_map.json (deflated 74%)


For prediction given a sentence

In [None]:
# For inference
def predict(text, model, tokenizer, device):
    text = text.rsplit('=', 1)
    inputs = tokenizer(text[0] + '=', return_tensors="pt").to(device)
    outputs = model.generate(
        inputs['input_ids'],
        max_length=len(inputs['input_ids'][0]) + 2,  # Just enough to generate the label. Two tokens include the label and EOS
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=1
    )

    predicted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    #print(predicted_text)
    return predicted_text.rsplit('=',1)[1].rstrip()

In [None]:
def evaluate_model(model, test_texts, tokenizer, device):
    model.eval()
    total_correct = 0
    total_samples = len(test_texts)
    for i in range(total_samples):
        label = predict(test_texts[i], model, tokenizer, device)
        correct_label = test_texts[i].rsplit('=',1)[1]
        #print(len(label), len(correct_label))
        #print(f"{i} sample: predict {label}, correct: {correct_label}")
        if correct_label in label:
            total_correct += 1
        # else:
        #     print(f"Wrong answer! Predict: {label}")
    
    accuracy = total_correct / total_samples
    return accuracy

In [None]:
print(test_texts[0])

In [None]:
print("Starting evaluation...")
results = evaluate_model(model, test_texts, tokenizer, device)
# Print results

In [None]:
print(f"Test Accuracy: {results:.4f}")