In [None]:
import sys
if 'google.colab' in sys.modules: 
    !pip install datasets transformers==4.37.2 evaluate accelerate optimum auto-gptq
    from google.colab import drive
    drive.mount('/content/drive')

file_path = '/content/drive/My Drive/VSM_BRIMS_03_02.csv'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer

In [None]:
df = pd.read_csv('/content/drive/My Drive/VSM_BRIMS_03_02.csv', header=0 )

df.columns = ['task', 'participant', 'trial', 'decision_type', 'choice', 'OEE1', 'OEE2', 'CT1', 'CT2']

print(df.head())
df['multiclass_target'] = df['choice'] * 3 + df['decision_type']
print(df.head())

   task  participant  trial  decision_type  choice  OEE1  OEE2  CT1  CT2
0     0            0      0              0       1    88    86   46   48
1     0            0      1              1       1    88    86   46   48
2     0            0      2              0       1    88    86   46   48
3     0            0      3              0       1    88    86   46   48
4     0            0      4              0       1    88    86   46   48
   task  participant  trial  decision_type  choice  OEE1  OEE2  CT1  CT2  \
0     0            0      0              0       1    88    86   46   48   
1     0            0      1              1       1    88    86   46   48   
2     0            0      2              0       1    88    86   46   48   
3     0            0      3              0       1    88    86   46   48   
4     0            0      4              0       1    88    86   46   48   

   multiclass_target  
0                  3  
1                  4  
2                  3  
3            

In [18]:
if 'task' in df.columns:
    print("Task column is present.")
else:
    print("Task column is missing. Available columns:", df.columns)

Task column is present.


In [None]:
import pandas as pd

# Prepare the question template
question_template = (
      "Our manufacturing line has two sections with potential defect sources: pre-assembly (0) and assembly (1). "
    "Pre-assembly takes {CT1} seconds with an Overall Equipment Effectiveness(OEE) rate of {OEE1}%, while assembly takes {CT2} seconds with an OEE rate of {OEE2}%. "
    "To reduce total assembly time by 4 seconds, we need to identify which section can be shortened with minimal defect increase. "
    "It's important to note that reducing cycle time will also lead to an increase in headcount costs."
    "There are two options: reduce pre-assembly time (0) or reduce assembly time (1).\nQ: Which section do you choose to optimize? A: "
)

text = []

# Iterate over each task
for task in df['task'].unique():
    df_task = df[df['task'] == task]
    print(task)
    if not df_task.empty:
        OEE1 = df_task['OEE1'].iloc[0]
        OEE2 = df_task['OEE2'].iloc[0]
        CT1 = df_task['CT1'].iloc[0]
        CT2 = df_task['CT2'].iloc[0]
        prompt = question_template.format(OEE1=OEE1, OEE2=OEE2, CT1=CT1, CT2=CT2)

        for index, row in df_task.iterrows():
            text.append(prompt)
    else:
        num_trials_expected = 15  
        text.extend(["Data not available for this task."] * num_trials_expected)

if len(text) < len(df):
    text.extend(["Data missing due to processing error."] * (len(df) - len(text)))
elif len(text) > len(df):
    text = text[:len(df)]

# Add the generated text as a new column in the DataFrame
df['text'] = text


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


In [20]:
dat = Dataset.from_pandas(df)
dat

Dataset({
    features: ['task', 'participant', 'trial', 'decision_type', 'choice', 'OEE1', 'OEE2', 'CT1', 'CT2', 'multiclass_target', 'text'],
    num_rows: 2012
})

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load model
model_ckpt = 'TheBloke/LLama-2-7B-GPTQ'
model = AutoModelForCausalLM.from_pretrained(
    model_ckpt,
    device_map="auto",
    revision="main"
)
tokenizer = AutoTokenizer.from_pretrained(model_ckpt, use_fast=True)

print (model.config.to_json_string())

Some weights of the model checkpoint at TheBloke/LLama-2-7B-GPTQ were not used when initializing LlamaForCausalLM: ['model.layers.0.mlp.down_proj.bias', 'model.layers.0.mlp.gate_proj.bias', 'model.layers.0.mlp.up_proj.bias', 'model.layers.0.self_attn.k_proj.bias', 'model.layers.0.self_attn.o_proj.bias', 'model.layers.0.self_attn.q_proj.bias', 'model.layers.0.self_attn.v_proj.bias', 'model.layers.1.mlp.down_proj.bias', 'model.layers.1.mlp.gate_proj.bias', 'model.layers.1.mlp.up_proj.bias', 'model.layers.1.self_attn.k_proj.bias', 'model.layers.1.self_attn.o_proj.bias', 'model.layers.1.self_attn.q_proj.bias', 'model.layers.1.self_attn.v_proj.bias', 'model.layers.10.mlp.down_proj.bias', 'model.layers.10.mlp.gate_proj.bias', 'model.layers.10.mlp.up_proj.bias', 'model.layers.10.self_attn.k_proj.bias', 'model.layers.10.self_attn.o_proj.bias', 'model.layers.10.self_attn.q_proj.bias', 'model.layers.10.self_attn.v_proj.bias', 'model.layers.11.mlp.down_proj.bias', 'model.layers.11.mlp.gate_proj.b

{
  "_name_or_path": "TheBloke/LLama-2-7B-GPTQ",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_length": 4096,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "quantization_config": {
    "batch_size": 1,
    "bits": 4,
    "block_name_to_quantize": null,
    "cache_block_outputs": true,
    "damp_percent": 0.01,
    "dataset": null,
    "desc_act": false,
    "exllama_config": {
      "version": 1
    },
    "group_size": 128,
    "max_input_length": null,
    "model_seqlen": null,
    "module_name_preceding_first_block": null,
    "modules_in_block_to_quantize": null,
    "pad_token_id": null,
    "quant_method": "gptq",
    "sym": tru

In [22]:
tokenizer.pad_token = tokenizer.eos_token
batch_tokenizer = lambda batch: tokenizer(batch['text'], padding=True, truncation=True)

#  Tokenizing the dataset
dat = dat.map(batch_tokenizer, batched=True)


Map:   0%|          | 0/2012 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [24]:
# Loading the model and moving it to the GPU if available
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

device

device(type='cuda')

In [None]:
from torch.utils.data import DataLoader, Dataset
#evaluted on identical data to llm-actr, ensure no data leakage
dat = dat.train_test_split(test_size=0.2, seed=42)
train_texts = dat['train']['text']
train_labels = dat['train']['choice']
test_texts = dat['test']['text']
test_labels = dat['test']['choice']

class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

dataset = TextDataset(test_texts, test_labels, tokenizer, max_length=128)
data_loader = DataLoader(dataset, batch_size=16, shuffle=False)


In [26]:
import torch
import torch.nn as nn

class PretrainedLlaMa(nn.Module):
    def __init__(self, base_model, num_classes=2):
        super(PretrainedLlaMa, self).__init__()
        self.base_model = base_model
        self.classifier = nn.Linear(base_model.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask=None):
        device = input_ids.device
        inputs = {
            "input_ids": input_ids.to(device),
            "attention_mask": attention_mask.to(device)
        }

        with torch.no_grad():
            outputs = self.base_model(
                **inputs,
                output_hidden_states=True
            )
            last_hidden_state = outputs.hidden_states[-1]

        features = last_hidden_state[:, -1, :]
        logits = self.classifier(features.float())

        return logits

model = PretrainedLlaMa(model)

In [27]:
model.to (device)

PretrainedLlaMa(
  (base_model): LlamaForCausalLM(
    (model): LlamaModel(
      (embed_tokens): Embedding(32000, 4096, padding_idx=0)
      (layers): ModuleList(
        (0-31): 32 x LlamaDecoderLayer(
          (self_attn): LlamaSdpaAttention(
            (rotary_emb): LlamaRotaryEmbedding()
            (k_proj): QuantLinear()
            (o_proj): QuantLinear()
            (q_proj): QuantLinear()
            (v_proj): QuantLinear()
          )
          (mlp): LlamaMLP(
            (act_fn): SiLU()
            (down_proj): QuantLinear()
            (gate_proj): QuantLinear()
            (up_proj): QuantLinear()
          )
          (input_layernorm): LlamaRMSNorm()
          (post_attention_layernorm): LlamaRMSNorm()
        )
      )
      (norm): LlamaRMSNorm()
    )
    (lm_head): Linear(in_features=4096, out_features=32000, bias=False)
  )
  (classifier): Linear(in_features=4096, out_features=2, bias=True)
)

In [None]:
#random performance
import numpy as np
import torch.nn as nn

model.eval()
all_preds = []
all_labels = []
all_logits = []
with torch.no_grad():
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        logits = model(input_ids=input_ids, attention_mask=attention_mask)
        all_logits.append(logits.cpu())
        all_preds.append(torch.argmax(logits, dim=1).cpu())
        all_labels.append(labels.cpu())

# Concatenate all predictions, logits, and labels
all_preds = torch.cat(all_preds, dim=0)
all_labels = torch.cat(all_labels, dim=0)
all_logits = torch.cat(all_logits, dim=0)

# Calculate Negative Log-Likelihood (NLL)
criterion = nn.NLLLoss()
log_probs = torch.log_softmax(all_logits, dim=-1)
nll_loss = criterion(log_probs, all_labels).item()

print(f"NLL: {nll_loss:.4f}")

NLL: 0.9039
