# Necessary Imports

In [1]:
pip install transformers torch onnx openvino-dev[onnx] datasets accelerate openvino peft bitsandbytes trl -U

Collecting transformers
  Downloading transformers-4.42.4-py3-none-any.whl (9.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Collecting torch
  Downloading torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m956.0 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnx
  Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openvino-dev[onnx]
  Downloading openvino_dev-2024.2.0-15519-py3-none-any.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
!pip show transformers
!pip show accelerate

Name: transformers
Version: 4.44.0
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: e:\coding material\python pr\research\gpt-neo\.venv\lib\site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: peft, trl
Name: accelerate
Version: 0.33.0
Summary: Accelerate
Home-page: https://github.com/huggingface/accelerate
Author: The HuggingFace team
Author-email: zach.mueller@huggingface.co
License: Apache
Location: e:\coding material\python pr\research\gpt-neo\.venv\lib\site-packages
Requires: huggingface-hub, numpy, packaging, psutil, pyyaml, safetensors, torch
Required-by: peft, trl


In [3]:
!pip uninstall -y transformers accelerate
!pip install transformers[torch] accelerate

Found existing installation: transformers 4.42.4
Uninstalling transformers-4.42.4:
  Successfully uninstalled transformers-4.42.4
Found existing installation: accelerate 0.32.1
Uninstalling accelerate-0.32.1:
  Successfully uninstalled accelerate-0.32.1
Collecting transformers[torch]
  Using cached transformers-4.42.4-py3-none-any.whl (9.3 MB)
Collecting accelerate
  Using cached accelerate-0.32.1-py3-none-any.whl (314 kB)
Installing collected packages: transformers, accelerate
Successfully installed accelerate-0.32.1 transformers-4.42.4


In [1]:
import pandas as pd
import torch
from datasets import Dataset
from random import randrange
#from peft import Loraconfig, get_peft_model, AutoPeftModelForCausalLM
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


# Load and Save Pre-trained Model and Tokenizer

In [2]:
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125m") #TinyLlama/TinyLlama-1.1B-Chat-v1.0, EleutherAI/gpt-neo-125m, Qwen/Qwen2-0.5B-Instruct
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125m")

model.save_pretrained("./my_gpt_model")
tokenizer.save_pretrained("./my_gpt_model")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


('./my_gpt_model\\tokenizer_config.json',
 './my_gpt_model\\special_tokens_map.json',
 './my_gpt_model\\vocab.json',
 './my_gpt_model\\merges.txt',
 './my_gpt_model\\added_tokens.json',
 './my_gpt_model\\tokenizer.json')

# Convert PyTorch Model to ONNX Format

In [3]:
import torch

tokenizer = AutoTokenizer.from_pretrained("./my_gpt_model")
model = AutoModelForCausalLM.from_pretrained("./my_gpt_model")

model.eval()

class ORTModelForOnnxExport(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, input_ids, attention_mask):
        return self.model(input_ids=input_ids, attention_mask=attention_mask, return_dict=False)[0]

ort_model = ORTModelForOnnxExport(model)

dummy_input = tokenizer("Hello, world!", return_tensors="pt")

dynamic_axes = {
    'input_ids': {0: 'batch_size', 1: 'sequence'},
    'attention_mask': {0: 'batch_size', 1: 'sequence'},
    'output': {0: 'batch_size', 1: 'sequence', 2: 'vocab_size'}
}

torch.onnx.export(ort_model,
                  (dummy_input.input_ids, dummy_input.attention_mask),
                  "gptneo_model.onnx",
                  input_names=['input_ids', 'attention_mask'],
                  output_names=['output'],
                  dynamic_axes=dynamic_axes,
                  opset_version=11,
                  do_constant_folding=True)

print("Model converted to ONNX format successfully.")

  if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal:
  if past_key_values_length > 0:
  mask_value = torch.tensor(mask_value, dtype=attn_weights.dtype).to(attn_weights.device)


Model converted to ONNX format successfully.


# Convert ONNX Model to OpenVINO IR Format

In [4]:
import openvino as ov
import os

ir_model = ov.convert_model("gptneo_model.onnx",
                            input=[ov.PartialShape([1, 4])])

ov.save_model(ir_model, "gptneo_ir_model.xml")

print("Model converted to IR format successfully.")

print("XML file exists:", os.path.exists("gptneo_ir_model.xml"))
print("BIN file exists:", os.path.exists("gptneo_ir_model.bin"))

Model converted to IR format successfully.
XML file exists: True
BIN file exists: True


# Pre Processing Dataset

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('medquad.csv')

df = df[['question', 'answer']]

In [3]:
train_val_df, sample_df = train_test_split(df, test_size=0.1, random_state=42) #using only sample df as the whole dataset is very big

In [4]:
sample_df['text'] = 'Question:\n' + sample_df['question'] + '\n\nAnswer:\n' + sample_df['answer']
sample_df.drop(columns=['question','answer'], axis=1,inplace=True)
# sample_df.to_csv('/content/pre_dataset.csv', index=False)

In [5]:
df = pd.read_csv('pre_dataset.csv')

train = Dataset.from_pandas(df)
train_dataset_size = len(train)
print(train_dataset_size)

1642


In [6]:
train

Dataset({
    features: ['text'],
    num_rows: 1642
})

# Load and Prepare Model, Tokenizer for finetuning

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, TrainerCallback
from datasets import load_dataset, DatasetDict
import torch

model_name = "./my_gpt_model"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

train_dataset_size = len(train)
batch_size = 4
steps_per_epoch = train_dataset_size // batch_size

target_steps = 300
num_epochs = max(3, target_steps // steps_per_epoch)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=num_epochs,
    per_device_train_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=100,
    save_total_limit=3,
)

class PrinterCallback(TrainerCallback):
    def on_log(self, args, state, control, logs=None, **kwargs):
        if state.is_local_process_zero and logs is not None:
            if 'loss' in logs:
                print(f"Step {state.global_step}: Training Loss: {logs['loss']:.5f}")
            else:
                print(f"Step {state.global_step}: Logs: {logs}")

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train,
    dataset_text_field='text',
    max_seq_length=1042,
    tokenizer=tokenizer,
    packing=True,
    callbacks=[PrinterCallback()]
)

trainer.train()

trainer.save_model("./fine_tuned_gpt_model")


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
  1%|▏         | 10/771 [08:21<10:01:00, 47.39s/it]

Step 10: Training Loss: 2.27120
{'loss': 2.2712, 'grad_norm': 3.3194222450256348, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}


  3%|▎         | 20/771 [30:39<28:27:57, 136.45s/it]

Step 20: Training Loss: 2.26500
{'loss': 2.265, 'grad_norm': 4.622042655944824, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.08}


  4%|▍         | 30/771 [38:40<10:28:57, 50.93s/it] 

Step 30: Training Loss: 2.17860
{'loss': 2.1786, 'grad_norm': 3.095930337905884, 'learning_rate': 3e-06, 'epoch': 0.12}


  5%|▌         | 40/771 [47:49<11:51:41, 58.42s/it]

Step 40: Training Loss: 2.19890
{'loss': 2.1989, 'grad_norm': 2.8253071308135986, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.16}


  6%|▋         | 50/771 [54:45<6:01:55, 30.12s/it] 

Step 50: Training Loss: 2.30610
{'loss': 2.3061, 'grad_norm': 3.6538195610046387, 'learning_rate': 5e-06, 'epoch': 0.19}


  8%|▊         | 60/771 [1:04:11<12:15:15, 62.05s/it]

Step 60: Training Loss: 2.22630
{'loss': 2.2263, 'grad_norm': 2.450727701187134, 'learning_rate': 6e-06, 'epoch': 0.23}


  9%|▉         | 70/771 [1:14:21<11:51:51, 60.93s/it]

Step 70: Training Loss: 2.13330
{'loss': 2.1333, 'grad_norm': 2.8213706016540527, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.27}


 10%|█         | 80/771 [1:24:52<12:05:12, 62.97s/it]

Step 80: Training Loss: 2.11290
{'loss': 2.1129, 'grad_norm': 2.7868173122406006, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.31}


 12%|█▏        | 90/771 [1:33:33<8:27:01, 44.67s/it] 

Step 90: Training Loss: 2.10620
{'loss': 2.1062, 'grad_norm': 4.332728862762451, 'learning_rate': 9e-06, 'epoch': 0.35}


 13%|█▎        | 100/771 [1:40:07<7:18:43, 39.23s/it]

Step 100: Training Loss: 2.06880
{'loss': 2.0688, 'grad_norm': 2.4800851345062256, 'learning_rate': 1e-05, 'epoch': 0.39}


 14%|█▍        | 110/771 [1:46:25<6:48:41, 37.10s/it]

Step 110: Training Loss: 1.93290
{'loss': 1.9329, 'grad_norm': 2.3605234622955322, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.43}


 16%|█▌        | 120/771 [1:52:42<6:49:12, 37.72s/it]

Step 120: Training Loss: 2.01940
{'loss': 2.0194, 'grad_norm': 2.5386953353881836, 'learning_rate': 1.2e-05, 'epoch': 0.47}


 17%|█▋        | 130/771 [2:00:02<7:19:23, 41.13s/it]

Step 130: Training Loss: 1.96980
{'loss': 1.9698, 'grad_norm': 2.450562000274658, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.51}


 18%|█▊        | 140/771 [2:06:25<6:50:02, 38.99s/it]

Step 140: Training Loss: 2.02220
{'loss': 2.0222, 'grad_norm': 2.6121182441711426, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.54}


 19%|█▉        | 150/771 [2:12:37<6:35:28, 38.21s/it]

Step 150: Training Loss: 1.86150
{'loss': 1.8615, 'grad_norm': 2.6697869300842285, 'learning_rate': 1.5e-05, 'epoch': 0.58}


 21%|██        | 160/771 [2:18:53<6:25:55, 37.90s/it]

Step 160: Training Loss: 1.91050
{'loss': 1.9105, 'grad_norm': 2.445535659790039, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.62}


 22%|██▏       | 170/771 [2:25:29<6:31:55, 39.13s/it]

Step 170: Training Loss: 1.84800
{'loss': 1.848, 'grad_norm': 2.4512035846710205, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.66}


 23%|██▎       | 180/771 [2:34:34<9:16:01, 56.45s/it]

Step 180: Training Loss: 1.84450
{'loss': 1.8445, 'grad_norm': 2.5028512477874756, 'learning_rate': 1.8e-05, 'epoch': 0.7}


 25%|██▍       | 190/771 [2:43:44<8:56:22, 55.39s/it]

Step 190: Training Loss: 1.68310
{'loss': 1.6831, 'grad_norm': 2.2647852897644043, 'learning_rate': 1.9e-05, 'epoch': 0.74}


 26%|██▌       | 200/771 [2:53:25<9:51:00, 62.10s/it]

Step 200: Training Loss: 1.65660
{'loss': 1.6566, 'grad_norm': 2.062037467956543, 'learning_rate': 2e-05, 'epoch': 0.78}


 27%|██▋       | 210/771 [3:04:56<8:35:09, 55.10s/it] 

Step 210: Training Loss: 1.67400
{'loss': 1.674, 'grad_norm': 2.871417284011841, 'learning_rate': 2.1e-05, 'epoch': 0.82}


 29%|██▊       | 220/771 [3:12:57<7:18:54, 47.79s/it]

Step 220: Training Loss: 1.79650
{'loss': 1.7965, 'grad_norm': 2.6327884197235107, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.86}


 30%|██▉       | 230/771 [3:22:22<9:26:03, 62.78s/it]

Step 230: Training Loss: 1.86140
{'loss': 1.8614, 'grad_norm': 2.7375996112823486, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.89}


 31%|███       | 240/771 [6:20:20<44:33:34, 302.10s/it]  

Step 240: Training Loss: 1.90990
{'loss': 1.9099, 'grad_norm': 2.4168527126312256, 'learning_rate': 2.4e-05, 'epoch': 0.93}


 32%|███▏      | 250/771 [6:28:53<8:22:00, 57.81s/it]  

Step 250: Training Loss: 1.76550
{'loss': 1.7655, 'grad_norm': 2.8996987342834473, 'learning_rate': 2.5e-05, 'epoch': 0.97}


 34%|███▎      | 260/771 [6:39:02<8:10:23, 57.58s/it] 

Step 260: Training Loss: 1.60000
{'loss': 1.6, 'grad_norm': 2.3106701374053955, 'learning_rate': 2.6000000000000002e-05, 'epoch': 1.01}


 35%|███▌      | 270/771 [6:58:51<9:59:21, 71.78s/it]  

Step 270: Training Loss: 1.63020
{'loss': 1.6302, 'grad_norm': 2.3923351764678955, 'learning_rate': 2.7000000000000002e-05, 'epoch': 1.05}


 36%|███▋      | 280/771 [7:17:02<10:40:13, 78.23s/it] 

Step 280: Training Loss: 1.75120
{'loss': 1.7512, 'grad_norm': 2.867401123046875, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.09}


 38%|███▊      | 290/771 [7:33:46<10:37:19, 79.50s/it] 

Step 290: Training Loss: 1.57410
{'loss': 1.5741, 'grad_norm': 2.47965669631958, 'learning_rate': 2.9e-05, 'epoch': 1.13}


 39%|███▉      | 300/771 [8:43:56<28:51:42, 220.60s/it]  

Step 300: Training Loss: 1.63270
{'loss': 1.6327, 'grad_norm': 2.3813555240631104, 'learning_rate': 3e-05, 'epoch': 1.17}


 40%|████      | 310/771 [8:48:03<3:52:19, 30.24s/it]  

Step 310: Training Loss: 1.57990
{'loss': 1.5799, 'grad_norm': 2.525441884994507, 'learning_rate': 3.1e-05, 'epoch': 1.21}


 42%|████▏     | 320/771 [8:52:15<3:08:28, 25.07s/it]

Step 320: Training Loss: 1.59680
{'loss': 1.5968, 'grad_norm': 2.5783278942108154, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.25}


 43%|████▎     | 330/771 [8:56:17<2:56:07, 23.96s/it]

Step 330: Training Loss: 1.59090
{'loss': 1.5909, 'grad_norm': 2.370445489883423, 'learning_rate': 3.3e-05, 'epoch': 1.28}


 44%|████▍     | 340/771 [9:00:07<2:45:24, 23.03s/it]

Step 340: Training Loss: 1.63300
{'loss': 1.633, 'grad_norm': 2.7317776679992676, 'learning_rate': 3.4000000000000007e-05, 'epoch': 1.32}


 45%|████▌     | 350/771 [9:03:57<2:40:53, 22.93s/it]

Step 350: Training Loss: 1.74820
{'loss': 1.7482, 'grad_norm': 2.6831376552581787, 'learning_rate': 3.5e-05, 'epoch': 1.36}


 47%|████▋     | 360/771 [9:07:46<2:36:55, 22.91s/it]

Step 360: Training Loss: 1.60910
{'loss': 1.6091, 'grad_norm': 2.373021125793457, 'learning_rate': 3.6e-05, 'epoch': 1.4}


 48%|████▊     | 370/771 [9:11:35<2:33:14, 22.93s/it]

Step 370: Training Loss: 1.71020
{'loss': 1.7102, 'grad_norm': 2.6271188259124756, 'learning_rate': 3.7e-05, 'epoch': 1.44}


 49%|████▉     | 380/771 [9:15:23<2:28:22, 22.77s/it]

Step 380: Training Loss: 1.40290
{'loss': 1.4029, 'grad_norm': 2.2700958251953125, 'learning_rate': 3.8e-05, 'epoch': 1.48}


 51%|█████     | 390/771 [9:19:12<2:25:10, 22.86s/it]

Step 390: Training Loss: 1.56750
{'loss': 1.5675, 'grad_norm': 2.641772985458374, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.52}


 52%|█████▏    | 400/771 [9:22:59<2:21:07, 22.82s/it]

Step 400: Training Loss: 1.45670
{'loss': 1.4567, 'grad_norm': 2.5417110919952393, 'learning_rate': 4e-05, 'epoch': 1.56}


 53%|█████▎    | 410/771 [9:26:50<2:18:24, 23.00s/it]

Step 410: Training Loss: 1.51170
{'loss': 1.5117, 'grad_norm': 2.533494472503662, 'learning_rate': 4.1e-05, 'epoch': 1.6}


 54%|█████▍    | 420/771 [9:30:37<2:12:26, 22.64s/it]

Step 420: Training Loss: 1.55680
{'loss': 1.5568, 'grad_norm': 2.7900168895721436, 'learning_rate': 4.2e-05, 'epoch': 1.63}


 56%|█████▌    | 430/771 [9:34:26<2:09:50, 22.85s/it]

Step 430: Training Loss: 1.57200
{'loss': 1.572, 'grad_norm': 2.5677456855773926, 'learning_rate': 4.3e-05, 'epoch': 1.67}


 57%|█████▋    | 440/771 [9:38:14<2:05:58, 22.84s/it]

Step 440: Training Loss: 1.53630
{'loss': 1.5363, 'grad_norm': 2.37910795211792, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.71}


 58%|█████▊    | 450/771 [9:42:03<2:02:27, 22.89s/it]

Step 450: Training Loss: 1.59250
{'loss': 1.5925, 'grad_norm': 2.4967598915100098, 'learning_rate': 4.5e-05, 'epoch': 1.75}


 60%|█████▉    | 460/771 [9:45:53<1:58:57, 22.95s/it]

Step 460: Training Loss: 1.47560
{'loss': 1.4756, 'grad_norm': 2.3414230346679688, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.79}


 61%|██████    | 470/771 [9:49:41<1:53:59, 22.72s/it]

Step 470: Training Loss: 1.29610
{'loss': 1.2961, 'grad_norm': 2.8773648738861084, 'learning_rate': 4.7e-05, 'epoch': 1.83}


 62%|██████▏   | 480/771 [9:53:30<1:51:28, 22.98s/it]

Step 480: Training Loss: 1.50510
{'loss': 1.5051, 'grad_norm': 4.4171462059021, 'learning_rate': 4.8e-05, 'epoch': 1.87}


 64%|██████▎   | 490/771 [9:57:19<1:47:40, 22.99s/it]

Step 490: Training Loss: 1.64220
{'loss': 1.6422, 'grad_norm': 2.3806312084198, 'learning_rate': 4.9e-05, 'epoch': 1.91}


 65%|██████▍   | 500/771 [10:01:08<1:43:48, 22.98s/it]

Step 500: Training Loss: 1.63670
{'loss': 1.6367, 'grad_norm': 2.3975987434387207, 'learning_rate': 5e-05, 'epoch': 1.95}


 66%|██████▌   | 510/771 [10:04:56<1:39:22, 22.85s/it]

Step 510: Training Loss: 1.32370
{'loss': 1.3237, 'grad_norm': 2.392613172531128, 'learning_rate': 4.81549815498155e-05, 'epoch': 1.98}


 67%|██████▋   | 520/771 [10:08:45<1:35:52, 22.92s/it]

Step 520: Training Loss: 1.42050
{'loss': 1.4205, 'grad_norm': 1.9604507684707642, 'learning_rate': 4.6309963099631e-05, 'epoch': 2.02}


 69%|██████▊   | 530/771 [10:12:34<1:31:49, 22.86s/it]

Step 530: Training Loss: 1.35340
{'loss': 1.3534, 'grad_norm': 2.726243495941162, 'learning_rate': 4.4464944649446495e-05, 'epoch': 2.06}


 70%|███████   | 540/771 [10:16:23<1:28:06, 22.89s/it]

Step 540: Training Loss: 1.39850
{'loss': 1.3985, 'grad_norm': 2.099479913711548, 'learning_rate': 4.261992619926199e-05, 'epoch': 2.1}


 71%|███████▏  | 550/771 [10:20:13<1:24:40, 22.99s/it]

Step 550: Training Loss: 1.38600
{'loss': 1.386, 'grad_norm': 2.407332420349121, 'learning_rate': 4.077490774907749e-05, 'epoch': 2.14}


 73%|███████▎  | 560/771 [10:24:02<1:20:30, 22.89s/it]

Step 560: Training Loss: 1.38080
{'loss': 1.3808, 'grad_norm': 2.3637170791625977, 'learning_rate': 3.892988929889299e-05, 'epoch': 2.18}


 74%|███████▍  | 570/771 [10:27:51<1:17:07, 23.02s/it]

Step 570: Training Loss: 1.24370
{'loss': 1.2437, 'grad_norm': 1.9955575466156006, 'learning_rate': 3.7084870848708486e-05, 'epoch': 2.22}


 75%|███████▌  | 580/771 [10:31:40<1:12:39, 22.82s/it]

Step 580: Training Loss: 1.43410
{'loss': 1.4341, 'grad_norm': 2.182145357131958, 'learning_rate': 3.5239852398523984e-05, 'epoch': 2.26}


 77%|███████▋  | 590/771 [10:35:27<1:08:45, 22.79s/it]

Step 590: Training Loss: 1.19150
{'loss': 1.1915, 'grad_norm': 2.0965142250061035, 'learning_rate': 3.339483394833948e-05, 'epoch': 2.3}


 78%|███████▊  | 600/771 [10:39:14<1:04:39, 22.69s/it]

Step 600: Training Loss: 1.03340
{'loss': 1.0334, 'grad_norm': 1.9944266080856323, 'learning_rate': 3.1549815498154986e-05, 'epoch': 2.33}


 79%|███████▉  | 610/771 [10:43:05<1:01:28, 22.91s/it]

Step 610: Training Loss: 1.46430
{'loss': 1.4643, 'grad_norm': 2.201939105987549, 'learning_rate': 2.970479704797048e-05, 'epoch': 2.37}


 80%|████████  | 620/771 [10:46:54<57:33, 22.87s/it]  

Step 620: Training Loss: 1.47950
{'loss': 1.4795, 'grad_norm': 2.5579686164855957, 'learning_rate': 2.7859778597785978e-05, 'epoch': 2.41}


 82%|████████▏ | 630/771 [10:50:43<53:39, 22.83s/it]

Step 630: Training Loss: 1.40940
{'loss': 1.4094, 'grad_norm': 2.8552064895629883, 'learning_rate': 2.6014760147601475e-05, 'epoch': 2.45}


 83%|████████▎ | 640/771 [10:54:31<50:05, 22.94s/it]

Step 640: Training Loss: 1.33920
{'loss': 1.3392, 'grad_norm': 2.412003993988037, 'learning_rate': 2.4169741697416977e-05, 'epoch': 2.49}


 84%|████████▍ | 650/771 [10:58:21<46:17, 22.95s/it]

Step 650: Training Loss: 1.34620
{'loss': 1.3462, 'grad_norm': 1.865411639213562, 'learning_rate': 2.2324723247232474e-05, 'epoch': 2.53}


 86%|████████▌ | 660/771 [11:02:10<42:26, 22.94s/it]

Step 660: Training Loss: 1.34130
{'loss': 1.3413, 'grad_norm': 2.5199527740478516, 'learning_rate': 2.0479704797047972e-05, 'epoch': 2.57}


 87%|████████▋ | 670/771 [11:05:59<38:37, 22.94s/it]

Step 670: Training Loss: 1.32330
{'loss': 1.3233, 'grad_norm': 2.188162088394165, 'learning_rate': 1.863468634686347e-05, 'epoch': 2.61}


 88%|████████▊ | 680/771 [11:09:48<34:44, 22.91s/it]

Step 680: Training Loss: 1.32890
{'loss': 1.3289, 'grad_norm': 2.2854504585266113, 'learning_rate': 1.6789667896678967e-05, 'epoch': 2.65}


 89%|████████▉ | 690/771 [11:13:37<30:47, 22.81s/it]

Step 690: Training Loss: 1.27090
{'loss': 1.2709, 'grad_norm': 2.307363510131836, 'learning_rate': 1.4944649446494467e-05, 'epoch': 2.68}


 91%|█████████ | 700/771 [11:17:27<27:15, 23.03s/it]

Step 700: Training Loss: 1.39030
{'loss': 1.3903, 'grad_norm': 2.5844953060150146, 'learning_rate': 1.3099630996309964e-05, 'epoch': 2.72}


 92%|█████████▏| 710/771 [11:21:16<23:19, 22.95s/it]

Step 710: Training Loss: 1.33180
{'loss': 1.3318, 'grad_norm': 2.1082499027252197, 'learning_rate': 1.1254612546125462e-05, 'epoch': 2.76}


 93%|█████████▎| 720/771 [11:25:05<19:27, 22.90s/it]

Step 720: Training Loss: 1.37810
{'loss': 1.3781, 'grad_norm': 2.180717945098877, 'learning_rate': 9.40959409594096e-06, 'epoch': 2.8}


 95%|█████████▍| 730/771 [11:28:55<15:40, 22.93s/it]

Step 730: Training Loss: 1.46730
{'loss': 1.4673, 'grad_norm': 2.2752676010131836, 'learning_rate': 7.564575645756458e-06, 'epoch': 2.84}


 96%|█████████▌| 740/771 [11:32:45<11:56, 23.11s/it]

Step 740: Training Loss: 1.49150
{'loss': 1.4915, 'grad_norm': 2.3527345657348633, 'learning_rate': 5.7195571955719566e-06, 'epoch': 2.88}


 97%|█████████▋| 750/771 [11:36:35<08:02, 22.99s/it]

Step 750: Training Loss: 1.40940
{'loss': 1.4094, 'grad_norm': 2.2700767517089844, 'learning_rate': 3.874538745387454e-06, 'epoch': 2.92}


 99%|█████████▊| 760/771 [11:40:23<04:12, 22.92s/it]

Step 760: Training Loss: 1.25760
{'loss': 1.2576, 'grad_norm': 2.2671334743499756, 'learning_rate': 2.029520295202952e-06, 'epoch': 2.96}


100%|█████████▉| 770/771 [11:44:14<00:22, 22.97s/it]

Step 770: Training Loss: 1.33300
{'loss': 1.333, 'grad_norm': 1.8313028812408447, 'learning_rate': 1.845018450184502e-07, 'epoch': 3.0}


100%|██████████| 771/771 [11:44:38<00:00, 54.84s/it]

Step 771: Logs: {'train_runtime': 42278.1228, 'train_samples_per_second': 0.036, 'train_steps_per_second': 0.018, 'total_flos': 819722969837568.0, 'train_loss': 1.630171654443951, 'epoch': 3.0}
{'train_runtime': 42278.1228, 'train_samples_per_second': 0.036, 'train_steps_per_second': 0.018, 'train_loss': 1.630171654443951, 'epoch': 3.0}





# Exporting the Fine-Tuned GPT-Neo Model to ONNX Format

In [8]:
import torch

tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_gpt_model")
model = AutoModelForCausalLM.from_pretrained("./fine_tuned_gpt_model")

model.eval()

class ORTModelForOnnxExport(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, input_ids, attention_mask):
        return self.model(input_ids=input_ids, attention_mask=attention_mask, return_dict=False)[0]

ort_model = ORTModelForOnnxExport(model)

dummy_input = tokenizer("Hello, world!", return_tensors="pt")

dynamic_axes = {
    'input_ids': {0: 'batch_size', 1: 'sequence'},
    'attention_mask': {0: 'batch_size', 1: 'sequence'},
    'output': {0: 'batch_size', 1: 'sequence', 2: 'vocab_size'}
}

torch.onnx.export(ort_model,
                  (dummy_input.input_ids, dummy_input.attention_mask),
                  "fine_tuned_gptneo_model.onnx",
                  input_names=['input_ids', 'attention_mask'],
                  output_names=['output'],
                  dynamic_axes=dynamic_axes,
                  opset_version=11,
                  do_constant_folding=True)

print("Fine-tuned model converted to ONNX format successfully.")

  if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal:
  if past_key_values_length > 0:
  mask_value = torch.tensor(mask_value, dtype=attn_weights.dtype).to(attn_weights.device)


Fine-tuned model converted to ONNX format successfully.


# Converting the Fine-Tuned GPT-Neo Model to OpenVINO IR Format

In [9]:
from openvino.tools import mo

ov_model = mo.convert_model("fine_tuned_gptneo_model.onnx",
                            input=[('input_ids', [1, -1]), ('attention_mask', [1, -1])],
                            output=['output'])

from openvino.runtime import serialize
serialize(ov_model, "fine_tuned_gptneo_ir_model.xml")

print("Fine-tuned model converted to IR format successfully.")

[ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release.
In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. 
Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html
Fine-tuned model converted to IR format successfully.


# OpenVINO IR Format Fine Tuned Model Inference

In [10]:
import openvino.runtime as ov
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_gpt_model")

core = ov.Core()
model = core.read_model("fine_tuned_gptneo_ir_model.xml")
compiled_model = core.compile_model(model, "CPU")

infer_request = compiled_model.create_infer_request()

def generate_text(prompt, max_length=200): # The maximum length is 200; the answer may be short.
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.numpy()
    attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.numpy()

    for _ in range(max_length):
        inputs = {"input_ids": input_ids, "attention_mask": attention_mask}

        results = infer_request.infer(inputs)
        logits = results[next(iter(results))]

        next_token_id = np.argmax(logits[0, -1, :])

        input_ids = np.concatenate([input_ids, np.array([[next_token_id]])], axis=1)
        attention_mask = np.concatenate([attention_mask, np.array([[1]])], axis=1)

        if next_token_id == tokenizer.eos_token_id:
            break

    generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=True)
    return generated_text

prompt = "What is (are) Schwannomatosis ?"
generated_text = generate_text(prompt)
print("Generated text:", generated_text)

#True answer: Schwannomatosis is a rare form of neurofibromatosis that is primarily characterized by multiple schwannomas (benign tumors of the nervous system) in the absence of bilateral (affecting both sides) vestibular schwannomas. Signs and symptoms of the condition vary based on the size, location and number of schwannomas but may include pain; numbness; tingling; and/or weakness in the fingers and toes. Inherited forms of the disorder account for only 15 percent of all cases. In some of these families, schwannomatosis is caused by changes (mutations) in the SMARCB1 or LZTR1 genes; in other cases, the exact underlying cause is unknown. When inherited, the condition is passed down in an autosomal dominant manner with highly variable expressivity and reduced penetrance. Treatment is based on the signs and symptoms present in each person but may include medications and/or surgery.

Generated text: What is (are) Schwannomatosis?

Answer:
Schwannomatosis is a rare condition that affects the nervous system. The exact cause is unknown, but it is believed to be a disorder of the nervous system. The signs and symptoms of this condition are similar to those of other connective tissue disorders, such as rheumatoid arthritis and multiple sclerosis.
                
Signs and Symptoms
                
Signs and symptoms of Schwannomatosis include:
                
A lump in the back of the neck. The lump can be painful and hard to move. The lump can also be painful and hard to move.
                
A lump in the back of the neck. The lump can be


# Simple Fine Tuned Model Inference

In [11]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

fine_tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_gpt_model")
fine_model = AutoModelForCausalLM.from_pretrained("./fine_tuned_gpt_model")

In [16]:
# fine_model.cuda() # Using GPU for generating text

prompt = "What are the symptoms of Crome syndrome ?"

inputs = fine_tokenizer(prompt, return_tensors='pt', truncation=True, padding=True)
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device)

outputs = fine_model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_new_tokens=200,
    temperature=0.6,
    pad_token_id=fine_tokenizer.eos_token_id
)



In [17]:
generated_text = fine_tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)

What are the symptoms of Crome syndrome?

Answer:
What are the signs and symptoms of Crome syndrome? The Human Phenotype Ontology provides the following list of signs and symptoms for Crome syndrome. If the information is available, the table below includes how often the symptom is seen in people with this condition. You can use the MedlinePlus Medical Dictionary to look up the definitions for these medical terms. Signs and Symptoms Approximate number of patients (when available) Abnormality of the skin - Abnormality of the fingernails - Abnormality of the palate - Abnormality of the palate - Abnormality of the ribs - Abnormality of the tongue - Abnormality of the tongue - Abnormality of the testes - Abnormality of the testes - Abnormality of the testes - Abnormality of the testes - Abnormality of the testes - Abnormality of the testes - Abnormality of the testes - Abnormality of the
