# Download and import libraries

In [1]:
!pip install transformers datasets accelerate huggingface_hub peft bitsandbytes

Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting peft
  Downloading peft-0.13.0-py3-none-any.whl.metadata (13 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.1-py3-none-any.whl (471 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m8.9 MB/s[0m eta

In [2]:
import torch
import bitsandbytes as bnb
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from tqdm.auto import tqdm
from huggingface_hub import login
from torch.nn.utils import clip_grad_norm_

# Login to Hugging Face

In [3]:
# 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
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Load and split the NIFTY datset

In [4]:
dataset = load_dataset("raeidsaqur/NIFTY")
train_dataset = dataset['train']
val_dataset = dataset['valid']
test_dataset = dataset['test']

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/5.35k [00:00<?, ?B/s]

train.jsonl:   0%|          | 0.00/40.0M [00:00<?, ?B/s]

test.jsonl:   0%|          | 0.00/1.47M [00:00<?, ?B/s]

valid.jsonl:   0%|          | 0.00/2.52M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1477 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/317 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/317 [00:00<?, ? examples/s]

# Initialize the Llama 3.1-8B Model and Tokenizer

In [5]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
)

model_name = "meta-llama/Llama-3.1-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map='auto',
)

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Format prompts

In [53]:
def format_prompt(example):
    prompt = next(
        convo['value'] for convo in example['conversations'] if convo['role'] == 'user'
    )

    return prompt

train_prompts = []
train_labels = []

for example in train_dataset:
    prompt = format_prompt(example)
    train_prompts.append(prompt)

    response = example['label']
    train_labels.append(response.strip())

In [54]:
input_sequences = []
label_sequences = []

for prompt, answer in zip(train_prompts, train_labels):
    input_text = prompt + ' ' + answer
    input_sequences.append(input_text)

    prompt_ids = tokenizer.encode(prompt, add_special_tokens=False)
    answer_ids = tokenizer.encode(answer, add_special_tokens=False)

    labels = [-100] * len(prompt_ids) + answer_ids

    label_sequences.append(labels)

# Prepare and tokenize prompts and labels

In [55]:
max_length = 512

inputs = tokenizer(
    input_sequences,
    max_length=max_length,
    truncation=True,
    padding='max_length',
    return_tensors='pt',
)

def pad_labels(labels, max_length, pad_token_id=-100):
    labels = labels[:max_length]
    labels += [pad_token_id] * (max_length - len(labels))
    return labels

labels = [
    pad_labels(label_seq, max_length=max_length, pad_token_id=-100)
    for label_seq in label_sequences
]

labels = torch.tensor(labels)

inputs['labels'] = labels

inputs = {k: v.to(device) for k, v in inputs.items()}

# Set up training

In [56]:
model.train()

optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=1e-6)

# Training loop

In [62]:
# Training loop parameters
num_epochs = 1
batch_size = 1
num_samples = inputs['input_ids'].size(0)

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    for i in tqdm(range(0, num_samples, batch_size)):
        # Prepare batch
        input_ids = inputs['input_ids'][i:i+batch_size]
        attention_mask = inputs['attention_mask'][i:i+batch_size]
        labels = inputs['labels'][i:i+batch_size]

        optimizer.zero_grad()

        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        # Backward pass
        loss.backward()
        clip_grad_norm_(model.parameters(), max_norm=0.5)
        optimizer.step()

        if (i // batch_size) % 100 == 0:
            print(f"Step {i // batch_size}, Loss: {loss.item():.4f}")

            with torch.no_grad():
                generated_ids = model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=3,
                    do_sample=False,
                )

                generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0].strip().lower()

                if "answer:" in generated_text:
                  generated_text = generated_text.split("answer:")[1].strip().split()[0]

                if "rise" in generated_text or "buy" in generated_text:
                    generated_text = "Rise"
                elif "fall" in generated_text or "sell" in generated_text:
                    generated_text = "Fall"
                elif "neutral" in generated_text or "hold" in generated_text:
                    generated_text = "Neutral"


                print("\n--- Example ---")
                print(f"Prompt:\n{train_prompts[i]}")
                print(f"Model's Response:\n{generated_text}")
                print(f"Ground Truth:\n{train_labels[i]}")
                print("--- End of Example ---\n")


Epoch 1/1


  0%|          | 0/1477 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Step 0, Loss: 8.3757

--- Example ---
Prompt:
Project the $SPY index's movement by assessing market data and news headlines from 2010-01-06. Offer a reply as 'Rise', 'Fall', or 'Neutral', along with the predicted percentage change in a newline. Conclude with 'Neutral' if the envisaged modification is under 0.50%.
Context:date,open,high,low,close,adj_close,volume,pct_change,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
2010-01-05,113.26,113.68,112.85,113.63,87.7148,111579900.0,0.0026,0.8299,113.771,109.333,59.6988,177.9755,10.0699,111.237,109.658

China Officials Likely Knew of Bad Milk
Sony's CEO on Strategy in 3-D Technology
Copper Settles at 16-Month High
Gold Ends Near 3-Week High
Kraft Gets Antitrust Clearance
European Stocks Close Flat
M&S Sales Miss Expectations
Yen Boosts Japan's Exporters
Future Group: Value Retail Unit May Consider IPO
Dollar, Yen Gain on Haven Plays
Clothing Sales Sagged in December
Luxury Logos Draw Asian Shoppers
Mexican Stocks Have Rec

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Step 100, Loss: nan

--- Example ---
Prompt:
Assess market data and news headlines from 2010-07-08 to predict the $SPY index's direction. Offer a reply as 'Rise', 'Fall', or 'Neutral', along with the forecasted percentage change in a newline. Finalize with 'Neutral' if the expected shift is less than 0.50%.

Context:date,open,high,low,close,adj_close,volume,pct_change,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
2010-06-28,108.03,108.32,107.14,107.53,83.7443,169218600.0,-0.0032,-0.8943,113.3889,104.8861,43.7429,-64.4041,28.7534,109.3447,113.9583
2010-06-29,106.02,107.51,103.55,104.21,81.1587,373649500.0,-0.0309,-1.2229,113.7177,104.2253,39.9049,-153.5261,43.0789,109.02,113.7158
2010-06-30,103.92,104.88,102.88,103.22,80.3877,284101700.0,-0.0095,-1.5454,113.9611,103.2709,38.8533,-194.1781,45.2741,108.714,113.4522
2010-07-01,103.15,103.49,101.13,102.76,80.0295,382924800.0,-0.0045,-1.8172,114.0666,102.3704,38.3673,-219.2232,50.5942,108.414,113.1922
2010-07-02,103.11,1

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Step 200, Loss: nan

--- Example ---
Prompt:
Analyze market data and news headlines dated 2011-01-06 to forecast the direction of the $SPY index. Deliver a response as 'Fall', 'Neutral', or 'Rise', along with the anticipated percentage change in a newline. Conclude with 'Neutral' if the projected change is below 0.50%.

Context:date,open,high,low,close,adj_close,volume,pct_change,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
2010-12-27,125.13,125.77,125.04,125.65,98.8975,58126000.0,0.0004,1.4599,127.5787,119.4903,63.6983,97.1141,29.8281,122.173,120.2935
2010-12-28,125.9,125.95,125.5,125.83,99.0392,55309100.0,0.0014,1.4479,127.4691,120.2669,64.0376,98.1167,31.4966,122.3607,120.4805
2010-12-29,125.98,126.2,125.9,125.92,99.11,58033100.0,0.0007,1.4292,126.9198,121.5592,64.2107,98.8456,33.8323,122.557,120.6833
2010-12-30,125.8,126.13,125.53,125.72,98.9526,76616900.0,-0.0016,1.3824,126.7589,122.1911,63.5082,89.2655,26.9824,122.809,120.8447
2010-12-31,125.53,125.87,125.33

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Step 300, Loss: nan

--- Example ---
Prompt:
Project the $SPY index's movement by assessing market data and news headlines from 2011-07-11. Conclude with 'Neutral' if the envisaged modification is under 0.50%. Offer a reply as 'Neutral', 'Rise', or 'Fall', along with the predicted percentage change in a newline.

Context:date,open,high,low,close,adj_close,volume,pct_change,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
2011-07-01,132.09,134.1,131.78,133.92,106.3894,202385700.0,0.0148,-0.081,132.3739,125.3161,55.5274,104.5947,17.3925,130.078,131.9507
2011-07-05,133.78,134.08,133.39,133.81,106.302,165936000.0,-0.0008,0.2259,133.2543,124.9127,55.3101,121.1746,17.3925,130.0847,131.9588
2011-07-06,133.49,134.14,133.11,133.97,106.4292,143331600.0,0.0012,0.4766,134.0409,124.6271,55.5718,115.2786,14.8153,130.1483,131.9773
2011-07-07,135.16,135.7,134.88,135.36,107.5334,170464200.0,0.0104,0.7785,135.077,124.285,57.7926,153.238,24.6653,130.262,132.0257
2011-07-08,133.83,135.36

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Step 400, Loss: nan

--- Example ---
Prompt:
To predict the $SPY index's direction, analyze market data and news headlines from 2012-01-10. If the expected alteration is less than 0.50%, end with 'Neutral'. Offer a reply as 'Rise', 'Neutral', or 'Fall', coupled with the forecasted percentage change in a newline.

Context:date,open,high,low,close,adj_close,volume,pct_change,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
2012-01-03,127.76,128.38,127.43,127.5,102.459,193697900.0,0.0159,0.8613,128.6261,120.6209,54.716,122.8262,13.7814,123.3593,123.4745
2012-01-04,127.2,127.81,126.71,127.7,102.6197,127186500.0,0.0016,0.9969,128.8753,120.5197,54.9231,105.2818,8.3875,123.55,123.6743
2012-01-05,127.01,128.23,126.43,128.04,102.8929,173895000.0,0.0027,1.119,129.175,120.398,55.2828,104.8022,11.0179,123.8293,123.8153
2012-01-06,128.2,128.22,127.29,127.71,102.6277,148050000.0,-0.0026,1.1755,129.3358,120.3352,54.8434,105.2071,11.0179,124.1133,123.9488
2012-01-09,128.0,128.18,127.

KeyboardInterrupt: 

# Save the model and tokenizer

In [None]:
model.save_pretrained('llama-3.1-8b-finetuned')
tokenizer.save_pretrained('llama-3.1-8b-finetuned')