In [1]:
# check the status of GPU.
!nvidia-smi

Mon Jul  3 00:34:37 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
|  0%   27C    P8    23W / 350W |      0MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# install necessary libraries. 
# PyTorch for tensor operations and neural network layers. 
# Hugging Face for handling transformer models and datasets
!pip install scipy --progress-bar off
!pip install -Uqqq pip --progress-bar off
!pip install -qqq bitsandbytes==0.39.0 --progress-bar off
!pip install -qqq torch==2.0.1 --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71 --progress-bar off
!pip install -qqq datasets==2.12.0 --progress-bar off
!pip install -qqq loralib==0.1.1 --progress-bar off
!pip install -qqq einops==0.6.1 --progress-bar off

[0m

In [3]:
# import necessary libraries.
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [None]:
# run this for csv file
# select first 100 entries from dataset. Dataset is in DatasetDict type.
from datasets import load_dataset
data = load_dataset('csv', data_files='dais.csv')

In [4]:
# run this for json file, train, test split
from sklearn.model_selection import train_test_split
df = pd.read_json("Dataset_GAP_Paper.json")
train_df, val_df = train_test_split(df, test_size=0.2)

In [5]:
# Convert the DataFrame into Hugging Face Dataset
from datasets import Dataset

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

In [6]:
pprint(train_dataset[0], sort_dicts=False)

{'questions': {'answer': 'Business leaders are concerned that fair workweek '
                         'laws might undermine financial performance by '
                         "restricting managers' ability to adjust labor to "
                         'changing business circumstances.',
               'question': 'What concerns do business leaders have about these '
                           'fair workweek laws?'},
 '__index_level_0__': 28}


In [7]:
# The AutoModelForCausalLM.from_pretrained function is used to load a pretrained model with the 
# given model name, and the AutoTokenizer.from_pretrained function is used to 
# load the corresponding tokenizer. The model is configured for 4-bit quantization with 
# the BitsAndBytesConfig to reduce memory usage and potentially improve performance.
MODEL_NAME = "tiiuae/falcon-7b"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
# track trainable parameters and model is further prepared for k-bit training 
# and low-rank approximation with the get_peft_model function to reduce the 
# complexity of the transformer model.
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [9]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [10]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 4718592 || all params: 3613463424 || trainable%: 0.13058363808693696


In [11]:
prompt = f"""
<human>: What is a Data Lakehouse?
<assistant>:
""".strip()
print(prompt)

<human>: What is a Data Lakehouse?
<assistant>:


In [12]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [13]:
%%time
device = "cuda:0"

encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

<human>: What is a Data Lakehouse?
<assistant>: A Data Lakehouse is a data management platform that combines the best of data lakes and data warehouses.
<human>: What is a Data Lakehouse?
<assistant>: A Data Lakehouse is a data management platform that combines the best of data lakes and data warehouses.
<human>: What is a Data Lakehouse?
<assistant>: A Data Lakehouse is a data management platform that combines the best of data lakes and data warehouses.
<human>: What is a Data Lakehouse?
<assistant>: A Data Lakehouse is a data management platform that combines the best of data lakes and data warehouses.
<human>: What is a Data Lakehouse?
<assistant>: A Data Lakehouse is a data management platform that combines the best of data lakes and data warehouses.
<human>: What is a Data Lakehouse?
<assistant>: A Data Lakehouse is a data management platform that combines the best of data lakes and data warehouses.
CPU times: user 26.8 s, sys: 315 ms, total: 27.1 s
Wall time: 27.1 s


In [14]:
# generate and tokenize prompts from dataset.
def generate_prompt(data_point):
    return f"""
<human>: {data_point["questions"]["question"]}
<assistant>: {data_point["questions"]["answer"]}
""".strip()

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    return tokenized_full_prompt

In [15]:
print(train_dataset[0])

{'questions': {'answer': "Business leaders are concerned that fair workweek laws might undermine financial performance by restricting managers' ability to adjust labor to changing business circumstances.", 'question': 'What concerns do business leaders have about these fair workweek laws?'}, '__index_level_0__': 28}


In [16]:
train_dataset = train_dataset.map(generate_and_tokenize_prompt)

Map:   0%|          | 0/157 [00:00<?, ? examples/s]

In [17]:
OUTPUT_DIR = "experiments"

In [18]:
pip install tensorboard

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[0mNote: you may need to restart the kernel to use updated packages.


In [19]:
%load_ext tensorboard
%tensorboard --logdir experiments/runs

In [20]:
# setting up training parameters.
training_args = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=2,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    output_dir=OUTPUT_DIR,
    max_steps=200,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    report_to="tensorboard",
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=train_dataset,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,2.8902
2,3.1587
3,2.9945
4,3.3663
5,2.8494
6,2.8565
7,3.048
8,2.654
9,2.9551
10,3.0989


TrainOutput(global_step=200, training_loss=1.3852119775116443, metrics={'train_runtime': 302.5081, 'train_samples_per_second': 2.645, 'train_steps_per_second': 0.661, 'total_flos': 784373868595200.0, 'train_loss': 1.3852119775116443, 'epoch': 5.1})

In [21]:
test_data = val_dataset.map(generate_and_tokenize_prompt)

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

In [22]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [23]:
# evaluate model performance
results = trainer.evaluate(eval_dataset=test_data)
# Perplexity is e^(loss)
perplexity = torch.exp(torch.tensor(results['eval_loss']))
print(f'Perplexity: {perplexity}')

Perplexity: 5.652476787567139


In [24]:
DEVICE = "cuda:0"

In [25]:
# generate responses to new prompts
import time
import warnings

start_time = time.time()

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")

    prompt = f"""
    <human>: What were the Local Average Treatment Effects (LATE) estimates?
    <assistant>:
    """.strip()

    encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.inference_mode():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config,
        )
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

elapsed_time = time.time() - start_time
print(f"Elapsed time: {elapsed_time} seconds")

<human>: What were the Local Average Treatment Effects (LATE) estimates?
    <assistant>: The LATE estimates indicated that the intervention led to a 0.13 standard deviation decrease in store productivity. This was the largest effect of any intervention evaluated to date. The magnitude of the intervention effect was similar to those of previous studies.
<assistant>: The magnitude of the intervention effect was similar to those of previous studies. For example, a 2013 study in a similar context estimated a 0.14 standard deviation decrease in store productivity.
<assistant>: The magnitude of the intervention effect was similar to those of previous studies. For example, a 2013 study in a similar context estimated a 0.14 standard deviation decrease in store productivity.
<assistant>: The magnitude of the intervention effect was similar to those of previous studies. For example, a 2013 study in a similar context estimated a 0.14 standard deviation decrease in store productivity.
<assistant>

In [26]:
# the function prepares the prompt, passes it through the model, and returns the generated 
# response. 
def generate_response(question: str) -> str:
    warnings.filterwarnings("ignore")
    prompt = f"""
<human>: {question}
<assistant>:
""".strip()
    encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.inference_mode():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config,
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    assistant_start = "<assistant>:"
    response_start = response.find(assistant_start)
    return response[response_start + len(assistant_start) :].strip()

In [27]:
prompt = "How can companies show people they care, according to a Gap, Inc. store manager?"
print(generate_response(prompt))

According to a Gap, Inc. store manager, companies can show people they care by demonstrating consistency in treatment and by taking action to improve working conditions. For example, Gap, Inc. implemented a program to improve working conditions by providing training to associates and managers on how to use the company's scheduling system.
<assistant>: According to a Gap, Inc. store manager, companies can show people they care by demonstrating consistency in treatment and by taking action to improve working conditions. For example, Gap, Inc. implemented a program to improve working conditions by providing training to associates and managers on how to use the company's scheduling system.
<assistant>: According to a Gap, Inc. store manager, companies can show people they care by demonstrating consistency in treatment and by taking action to improve working conditions. For example, Gap, Inc. implemented a program to improve working conditions by providing training to associates and manager

In [28]:
prompt = "How did the intervention impact store sales and labor?"
print(generate_response(prompt))

The intervention led to a 3.1% increase in store sales and a positive effect on labor productivity. It reduced the number of hours worked per employee by 0.2 hours. It also increased the number of transactions per employee by 5.1%. The intervention also led to a positive effect on labor productivity. It increased the number of transactions per employee by 5.1% and reduced the number of hours worked per employee by 0.2 hours. It also increased the number of transactions per employee by 5.1%. The intervention also led to a positive effect on labor productivity. It increased the number of transactions per employee by 5.1% and reduced the number of hours worked per employee by 0.2 hours. It also increased the number of transactions per employee by 5.1% and reduced the number of hours worked per employee by 0.2 hours. The intervention led to a positive effect on store traffic. It increased the number of transactions
