In [1]:
import numpy as np
import pandas as pd
import re
import string
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score
from transformers import (set_seed,
                          TrainingArguments,
                          Trainer,
                          GPT2Config,
                          GPT2Tokenizer,
                          AdamW,
                          get_linear_schedule_with_warmup,
                          GPT2ForSequenceClassification)
import gym

# Hyper paramater

In [2]:
max_len = 64 # Max lenght of the text for input
batch_size = 32
epochs = 6
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_labels = 4

# loading data

In [3]:
df = pd.read_json("hf://datasets/NathanGavenski/LunarLander-v2/teacher.jsonl", lines=True)

# Loading model and tokenizer

In [4]:
print('Loading gpt-2 model')
model_config = GPT2Config.from_pretrained(pretrained_model_name_or_path='gpt2', num_labels=4)

print('Loading tokenizer...')
tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model_name_or_path='gpt2')
tokenizer.padding_side = "left"
tokenizer.pad_token = tokenizer.eos_token

print('Loading model...')
model = GPT2ForSequenceClassification.from_pretrained(pretrained_model_name_or_path='gpt2', config=model_config)
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = model.config.eos_token_id
model.to(device)

Loading gpt-2 model


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Loading model...




model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2ForSequenceClassification(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (score): Linear(in_features=768, out_features=4, bias=False)
)

In [5]:
df.head()

Unnamed: 0,obs,actions,rewards,episode_starts
0,"[0.001747226691804, 1.399963617324829, 0.17696...",2,1.145592,1
1,"[0.003545951796695, 1.38917601108551, 0.181636...",2,2.836008,0
2,"[0.005403709597885001, 1.378986597061157, 0.18...",2,3.144166,0
3,"[0.007226371672004001, 1.36941385269165, 0.183...",2,2.81107,0
4,"[0.009204101748764001, 1.360573768615722, 0.19...",2,0.116871,0


In [6]:
df['obs'] = df['obs'].apply(lambda obs: ' '.join([f"{x:.6f}" for x in obs]))

# Dataset and Collator

In [7]:
class DatasetCreator(Dataset):
    def __init__(self, processed_data, train):
        self.data = processed_data
        self.train = train

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        line = self.data.iloc[index]
        if self.train:
            return {'text': line['obs'], 'label': line['actions']}
        else:
            return {'text': line['obs'], 'label': 0}

# Class to tokenize and process the text for input to the dataloader
class GPT2_collator(object):
    def __init__(self, tokenizer, max_seq_len=512):
        self.tokenizer = tokenizer
        self.max_seq_len = max_seq_len
        return

    def __call__(self, sequences):
        texts = [sequence['text'] for sequence in sequences]
        labels = [int(sequence['label']) for sequence in sequences]
        inputs = self.tokenizer(text=texts,
                                return_tensors='pt',
                                padding=True,
                                truncation=True,
                                max_length=self.max_seq_len)
        inputs.update({'labels': torch.tensor(labels)})
        return inputs

# Training function

In [8]:
# Function for training
def train(dataloader, optimizer, scheduler, device):
    global model
    model.train()
    predictions_labels = []
    true_labels = []
    total_loss = 0

    for batch in tqdm(dataloader, total=len(dataloader)):
        true_labels += batch['labels'].numpy().flatten().tolist()
        batch = {k:v.type(torch.long).to(device) for k,v in batch.items()}
        optimizer.zero_grad()
        outputs = model(**batch)
        loss, logits = outputs[:2]
        total_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        predictions_labels += logits.argmax(axis=-1).flatten().tolist()
    avg_epoch_loss = total_loss / len(dataloader)
    return predictions_labels, true_labels, avg_epoch_loss

# Validation function

In [9]:
# Function for validation
def validate(dataloader, device):
    global model
    model.eval()
    predictions_labels = []
    true_labels = []
    total_loss = 0

    for batch in tqdm(dataloader, total=len(dataloader)):
        true_labels += batch['labels'].numpy().flatten().tolist()
        batch = {k:v.type(torch.long).to(device) for k,v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
            loss, logits = outputs[:2]
            total_loss += loss.item()
            predictions_labels += logits.argmax(axis=-1).flatten().tolist()
    avg_epoch_loss = total_loss / len(dataloader)
    return predictions_labels, true_labels, avg_epoch_loss

# Predict Func

In [10]:
def predict(dataloader, device):
    global model
    model.eval()
    predictions_labels = []

    for batch in tqdm(dataloader, total=len(dataloader)):
        batch = {k:v.type(torch.long).to(device) for k,v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
            _, logits = outputs[:2]
            predictions_labels += logits.argmax(axis=-1).flatten().tolist()
    return predictions_labels

In [11]:
df_val = df[0:19000]
df_val.shape

(19000, 4)

In [12]:
gpt2_collator = GPT2_collator(tokenizer=tokenizer, max_seq_len=max_len)

# Train Test split

In [13]:
train_data = DatasetCreator(df, train=True)
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=gpt2_collator)


val_data = DatasetCreator(df_val, train=True)
val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True, collate_fn=gpt2_collator)

In [14]:
optimizer = AdamW(model.parameters(), lr = 5e-5, eps = 1e-8, weight_decay=0.01)



In [15]:
for x in val_dataloader:
    print(tokenizer.decode(x['input_ids'][0]))
    break

<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>0.049382 -0.001237 -0.000006 -0.000000 -0.000409 0.000002 1.000000 1.000000


# Training

In [16]:
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = total_steps)
loss = []
accuracy = []
val_loss_list = []
val_accuracy_list = []

for epoch in tqdm(range(epochs)):
    train_labels, true_labels, train_loss = train(train_dataloader, optimizer, scheduler, device)
    train_acc = accuracy_score(true_labels, train_labels)
    print('epoch: %.2f train accuracy %.2f' % (epoch, train_acc))
    loss.append(train_loss)
    accuracy.append(train_acc)

    val_labels, val_true_labels, val_loss = validate(val_dataloader, device)
    val_acc= accuracy_score(val_true_labels, val_labels)
    print('epoch: %.2f validation accuracy %.2f' % (epoch, val_acc))
    val_loss_list.append(val_loss)
    val_accuracy_list.append(val_acc)

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/12000 [00:00<?, ?it/s]

epoch: 0.00 train accuracy 0.80


  0%|          | 0/594 [00:00<?, ?it/s]

epoch: 0.00 validation accuracy 0.86


  0%|          | 0/12000 [00:00<?, ?it/s]

epoch: 1.00 train accuracy 0.87


  0%|          | 0/594 [00:00<?, ?it/s]

epoch: 1.00 validation accuracy 0.90


  0%|          | 0/12000 [00:00<?, ?it/s]

epoch: 2.00 train accuracy 0.90


  0%|          | 0/594 [00:00<?, ?it/s]

epoch: 2.00 validation accuracy 0.93


  0%|          | 0/12000 [00:00<?, ?it/s]

epoch: 3.00 train accuracy 0.92


  0%|          | 0/594 [00:00<?, ?it/s]

epoch: 3.00 validation accuracy 0.93


  0%|          | 0/12000 [00:00<?, ?it/s]

epoch: 4.00 train accuracy 0.93


  0%|          | 0/594 [00:00<?, ?it/s]

epoch: 4.00 validation accuracy 0.96


  0%|          | 0/12000 [00:00<?, ?it/s]

epoch: 5.00 train accuracy 0.94


  0%|          | 0/594 [00:00<?, ?it/s]

epoch: 5.00 validation accuracy 0.97


In [21]:
from sklearn.metrics import confusion_matrix
import numpy as np

def predict_and_get_true_labels(dataloader, device):
    global model
    model.eval()
    predictions_labels = []
    true_labels = []

    for batch in tqdm(dataloader, total=len(dataloader)):
        # Assuming the batch contains 'labels' for true labels
        true_labels += batch['labels'].flatten().tolist()

        batch = {k: v.type(torch.long).to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
            _, logits = outputs[:2]
            predictions_labels += logits.argmax(axis=-1).flatten().tolist()

    return predictions_labels, true_labels

# Assuming dataloader contains true labels under the key 'labels'
predictions, true_labels = predict_and_get_true_labels(val_dataloader, device)

# Create the confusion matrix
conf_matrix = confusion_matrix(true_labels, predictions)

print("Confusion Matrix:")
print(conf_matrix)

  0%|          | 0/594 [00:00<?, ?it/s]

Confusion Matrix:
[[5407   61   53   95]
 [  53 1827   28    2]
 [  31   24 9466   27]
 [  84    8   52 1782]]


In [22]:
print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.97      0.96      0.97      5616
           1       0.95      0.96      0.95      1910
           2       0.99      0.99      0.99      9548
           3       0.93      0.93      0.93      1926

    accuracy                           0.97     19000
   macro avg       0.96      0.96      0.96     19000
weighted avg       0.97      0.97      0.97     19000



In [23]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [24]:
model.push_to_hub("ErnestBeckham/gptController")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ErnestBeckham/gptController/commit/79bed1060b52548b111a6ef1bb8b05d8c5b7b095', commit_message='Upload GPT2ForSequenceClassification', commit_description='', oid='79bed1060b52548b111a6ef1bb8b05d8c5b7b095', pr_url=None, pr_revision=None, pr_num=None)