## 1. Tokens

In [None]:
from tiktoken import encoding_for_model

# Initialize encoder for a given model
encoder = encoding_for_model("text-davinci-003")

# Text to encode
text = "The sun is shining"

# Encoding the text
tokens = encoder.encode(text)

# Output the encoded tokens
print(tokens)  # This will output the tokens as integers

# Decoding each token back to text
decoded_tokens = [encoder.decode_single_token_bytes(token).decode('utf-8') for token in tokens]

# Output the decoded tokens
print(decoded_tokens)

[464, 4252, 318, 22751]
['The', ' sun', ' is', ' shining']


## 2. The OpenAI API

In [None]:
from openai import OpenAI
from google.colab import userdata
import os

# Replace 'YourKey' with your actual OpenAI API key
client = OpenAI(api_key='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')

# Specify the model you want to use
model = "gpt-3.5-turbo"  # or "gpt-4-1106-preview" for GPT-4 if available

# System and user messages
messages = [
    {"role": "system", "content": 'You are a Tech Support Bot that provides helpful and concise tech advice.'},
    {"role": "user", "content": 'How do I reset my router?'}
]

# Creating a chat completion
response = client.chat.completions.create(
    model=model,
    messages=messages,
    temperature=0
)

# Extracting and printing the response message
response_message = response.choices[0].message.content
print(response_message)


To reset your router, locate the reset button on the back or bottom of the router. Use a paperclip or a small pointed object to press and hold the reset button for about 10-15 seconds until the router lights start flashing. This will reset the router to its factory default settings. Remember to reconfigure your network settings after the reset.


## 3. Create our own code interpreter

In [None]:
import ast
import json
import inspect
from pydantic import create_model
from inspect import Parameter

In [None]:
# Function to calculate factorial
def factorial(n: int):
    """Calculates factorial of n"""
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)

In [None]:
# Function to evaluate arithmetic expressions
def eval_expr(expr: str):
    """Evaluates a basic arithmetic expression"""
    try:
        # Safely evaluate arithmetic expressions
        tree = ast.parse(expr, mode='eval')
        if isinstance(tree, ast.Expression):
            compiled = compile(tree, filename="<ast>", mode="eval")
            return eval(compiled)
        else:
            return "Invalid expression"
    except:
        return "Error in expression"

In [None]:
# Generate schema for functions
def schema(f):
    kw = {n: (o.annotation, ... if o.default == Parameter.empty else o.default)
          for n, o in inspect.signature(f).parameters.items()}
    s = create_model(f'Input for `{f.__name__}`', **kw).schema()
    return dict(name=f.__name__, description=f.__doc__, parameters=s)

In [None]:
# Assuming the setup for GPT model interaction
def askgpt(question, system=None, functions=[]):
    # Simulate the call to GPT model with the provided question and system message
    # This is a placeholder for actual GPT model interaction
    print(f"Question: {question}")
    if system:
        print(f"System: {system}")
    for func in functions:
        print(f"Function schema: {json.dumps(func, indent=2)}")
    # Placeholder response simulation
    return {"choices": [{"message": {"function_call": {"name": "factorial", "arguments": json.dumps({"n": 5})}}}]}

In [None]:
# Example use of the schema function
print("Factorial function schema:")
print(schema(factorial))

# Example use of the eval_expr function
print("\nEvaluating expression '2 + 3 * 4':")
print(eval_expr("2 + 3 * 4"))

In [None]:
# Example interaction with GPT model
c = askgpt("Calculate 5 factorial using the `factorial` function.",
           system="You must use the `factorial` function for calculations.",
           functions=[schema(factorial)])

# Function to call the appropriate Python function based on GPT's response
def call_func(c):
    fc = c["choices"][0]["message"]["function_call"]
    f = globals()[fc["name"]]
    args = json.loads(fc["arguments"])
    return f(**args)

# Example function call based on GPT's response
print("\nCalling function based on GPT's response:")
print(call_func(c))

## 4. Retrieval augmented generation(RAG)

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

False

In [None]:
from google.colab import userdata

# Set OpenAI API key
import os
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents=SimpleDirectoryReader("/content/drive/MyDrive/Sem2/CMPE258(DL)/data").load_data()

In [None]:
documents

[Document(id_='8e7d1e36-9424-4f24-9ba2-4ee1e5dbfef5', embedding=None, metadata={'page_label': '1', 'file_name': '2310.05421.pdf', 'file_path': '/content/drive/MyDrive/Sem2/CMPE258(DL)/data/2310.05421.pdf', 'file_type': 'application/pdf', 'file_size': 326993, 'creation_date': '2024-02-25', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Submitte d to the 3rd International Conference on “Wom en in Science & Technology: Creating Sustainable Career”  \n28 -30 December, 2023  Automating Customer Ser vice using LangChain  \nBuilding custom open -source GPT Chatbot for organizat ions \nKeivalya Pandya  \n19me439 @bvmengineering.ac.in  \nBirla Vishvakarma Mahavidyalaya, Gujarat, I ndia Prof

In [None]:
index=VectorStoreIndex.from_documents(documents,show_progress=True)

Parsing nodes:   0%|          | 0/11 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/20 [00:00<?, ?it/s]

In [None]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7cb1bff8c040>

In [None]:
query_engine=index.as_query_engine()

In [None]:
from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

retriever=VectorIndexRetriever(index=index,similarity_top_k=4)
postprocessor=SimilarityPostprocessor(similarity_cutoff=0.80)

query_engine=RetrieverQueryEngine(retriever=retriever,
                                  node_postprocessors=[postprocessor])


In [None]:
query_engine = index.as_query_engine()
response = query_engine.query("What is attention is all yopu need?")

In [None]:
from llama_index.core.response.pprint_utils import pprint_response
pprint_response(response,show_source=True)
print(response)

Final Response: Attention is all you need is a phrase commonly
associated with the Transformer model architecture.
______________________________________________________________________
Source Node 1/2
Node ID: 20f8e162-9b3a-4ea1-8238-4fbe48f50d4b
Similarity: 0.7183680086950025
Text: However, as we stan d at the cusp of a new era in customer
service automation, it becomes abundantly clear that  the traditional
methods once hailed as revolutionary, are  gradually becoming
obsolete.   This paper i s an invitation to envision a future wher e
customer service is no t a cost center but a wellspring of  customer
satisfaction and ...
______________________________________________________________________
Source Node 2/2
Node ID: 615be6b9-0cec-43bd-9508-7fbeee0330ad
Similarity: 0.7180131138522393
Text: Conference acronym ’XX, June 13–15, 2023, İstanbul, Turkey
Soygazi and Oguz Figure 2: Questions in the Prompt Analysis. provided
the correct answer. However, LLMMathChain encoun- tered the same i

In [None]:
import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("/content/drive/MyDrive/Sem2/CMPE258(DL)/data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

# either way we can now query the index
query_engine = index.as_query_engine()
response = query_engine.query("What are transformers?")
print(response)

Transformers are large neural models that have been widely used in natural language processing tasks. They are known for their ability to efficiently handle sequential data by processing the input data as a whole rather than sequentially. Transformers have been utilized in various applications such as language translation, text generation, and information retrieval.


## 5. Fine tuning with Gemma

In [None]:
import os
import transformers
import torch
from google.colab import userdata
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaTokenizer

In [None]:
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

In [None]:
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             device_map={"":0},
                                             token=os.environ['HF_TOKEN'])

tokenizer_config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [None]:
text = "Quote: Imagination is more,"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Imagination is more, than knowledge.

I am a self-taught artist, born in 1985 in


In [None]:
text = "Quote: Imagination is more"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Imagination is more important than knowledge. Knowledge is limited. Imagination encircles the world.

- Albert Einstein

The


In [None]:
os.environ["WANDB_DISABLED"] = "false"

In [None]:
lora_config = LoraConfig(
    r = 8,
    target_modules = ["q_proj", "o_proj", "k_proj", "v_proj",
                      "gate_proj", "up_proj", "down_proj"],
    task_type = "CAUSAL_LM",
)

In [None]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

Downloading readme:   0%|          | 0.00/5.55k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [None]:
data['train']['quote']

['“Be yourself; everyone else is already taken.”',
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.”",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.”",
 '“So many books, so little time.”',
 '“A room without books is like a body without a soul.”',
 "“Be who you are and say what you feel, because those who mind don't matter, and those who matter don't mind.”",
 "“You've gotta dance like there's nobody watching,Love like you'll never be hurt,Sing like there's nobody listening,And live like it's heaven on earth.”",
 "“You know you're in love when you can't fall asleep because reality is finally better than your dreams.”",
 '“You only live once, but if you do it right, once is enough.”',
 '“Be the change that you wish to see in the world.”',
 "“In three words I can sum up everything I

In [None]:
def formatting_func(example):
    text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
    return [text]

In [None]:
data['train']

Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 2508
})

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
)



Map:   0%|          | 0/2508 [00:00<?, ? examples/s]



In [None]:
trainer.train()

Step,Training Loss
1,1.6802
2,0.6298
3,1.0221
4,1.0306
5,0.4185
6,1.2275
7,1.092
8,0.3321
9,0.5626
10,0.4791


TrainOutput(global_step=100, training_loss=0.1457195576839149, metrics={'train_runtime': 56.7229, 'train_samples_per_second': 7.052, 'train_steps_per_second': 1.763, 'total_flos': 55030401331200.0, 'train_loss': 0.1457195576839149, 'epoch': 66.67})

In [None]:
text = "Quote: A woman is like a tea bag;"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: A woman is like a tea bag; you can’t tell how strong she is until you put her in hot water
Author: Eleanor


In [None]:
text = "Quote: Outside of a dog, a book is man's"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Outside of a dog, a book is man's best friend.
Author: Nicolas Chamfort
Quote: The most wasted of all days is one


## 6. llama.cpp

In [None]:
!pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"

Collecting unsloth[colab]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-2j617h9r/unsloth_7c16e3cebb7540dbbe4ba4d6c1d967e0
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-2j617h9r/unsloth_7c16e3cebb7540dbbe4ba4d6c1d967e0
  Resolved https://github.com/unslothai/unsloth.git to commit 3e4c5a323c16bbda2c92212b790073c4e99c2a55
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting bitsandbytes (from unsloth[colab]@ git+https://github.com/unslothai/unsloth.git)
  Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
Collecting xformers@ https://download.

In [None]:
import torch
from unsloth import FastLanguageModel



In [None]:
major_version, minor_version = torch.cuda.get_device_capability()

In [None]:
import torch
max_seq_length = 4096
dtype = None
load_in_4bit = True
model_name = "unsloth/tinyllama-bnb-4bit"

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

config.json:   0%|          | 0.00/1.09k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Llama patching release 2024.2
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.1.0+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.22.post7. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth: unsloth/tinyllama-bnb-4bit can only handle sequence lengths of at most 2048.
But with kaiokendev's RoPE scaling of 2.0, it can be magically be extended to 4096!
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` attribute will be overwritten with the one you passed to `from_pretrained`.


model.safetensors:   0%|          | 0.00/762M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/894 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

In [None]:

model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = False,
    random_state = 3407,
    max_seq_length = max_seq_length,
)

Unsloth 2024.2 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging


In [None]:
logging.set_verbosity_info()

In [None]:
DATASET_ID = 'knowrohit07/know_medical_dialogue_v2'

In [None]:
# get model text generate
def get_model_generate(index: int):

    medical_condition = test_dataset['instruction'][index]
    treatment_options = test_dataset['output'][index]

    prompt = f'''
    ### Instruction:
    For describe the treatment options the following conversation.

    ### Explaining medical conditions:
    {medical_condition}

    ### Describe the treatment options:
    '''

    input_ids = tokenizer(prompt, return_tensors='pt',truncation=True).input_ids.cuda()

    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=500,
        )

    output= tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]

    dash_line = '-'.join('' for x in range(100))
    result = f''''{dash_line}\nINPUT PROMPT:\n{prompt}{dash_line}\nBASELINE HUMAN DESCRIBE THE TREATMENT OPTIONS:\n{treatment_options}\n{dash_line}\nTRAINED MODEL GENERATED TEXT :\n{output}'''

    return result

In [None]:
def format_instruction(medical_condition: str, treatment_options: str):
	return f'''### Instruction:
            For describe the treatment options the following conversation.

            ### Explaining medical conditions:
            {medical_condition.strip()}

            ### Describe the treatment options:
            {treatment_options.strip()}
            '''.strip()

In [None]:
from datasets import Dataset, load_dataset, DatasetDict
dataset = load_dataset(DATASET_ID)

Downloading readme:   0%|          | 0.00/2.48k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.33M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

Downloading readme:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/44.3M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/51760 [00:00<?, ? examples/s]

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging
logging.set_verbosity_info()

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = True, # Packs short sequences together to save time!
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_ratio = 0.1,
        num_train_epochs = 1,
        learning_rate = 2e-5,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.1,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Generating train split: 0 examples [00:00, ? examples/s]

Using auto half precision backend


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 3,013 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 376
 "-____-"     Number of trainable parameters = 25,231,360


Step,Training Loss
1,2.3756
2,2.299
3,2.399
4,2.4075
5,2.3467
6,2.3666
7,2.3666
8,2.3894
9,2.3621
10,2.3623




Training completed. Do not forget to share your model on huggingface.co/models =)




In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Continue the fibonnaci sequence.", # instruction
        "1, 1, 2, 3, 5, 8", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nContinue the fibonnaci sequence.\n\n### Input:\n1, 1, 2, 3, 5, 8\n\n### Response:\nThe fibonacci sequence is a sequence of numbers that can be generated by starting with 1 and then adding the previous number to the previous number. The sequence is: 1, 1, 2, 3, 5, 8, 13, 22, 44,']

In [None]:
input = "My daughter ( F, 18 y/o, 5'5', 165lbs) has been feeling poorly for a few weeks. \
She had COVID a couple of months ago and symptoms have are much worse in the last month or so. \
She feels light headed, breathless, dizzy, and her heart rate (HR) goes from ~65 lying down to ~250 when standing. \
Today she tells me HR has been around 195 all day and she feels really lousy. \
What should I do?"

FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Label what the parent should do to take care of their daughter.", # instruction
        input, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

["<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nLabel what the parent should do to take care of their daughter.\n\n### Input:\nMy daughter ( F, 18 y/o, 5'5', 165lbs) has been feeling poorly for a few weeks. She had COVID a couple of months ago and symptoms have are much worse in the last month or so. She feels light headed, breathless, dizzy, and her heart rate (HR) goes from ~65 lying down to ~250 when standing. Today she tells me HR has been around 195 all day and she feels really lousy. What should I do?\n\n### Response:\nThe parent should take care of their daughter by providing her with the necessary medical attention and support. They should make sure that she gets enough rest, eats a healthy diet, and gets plenty of exercise. They should also make sure that she gets enough sleep, and that she has access to a reliable source of"]