In [1]:
!pip uninstall -y numpy scipy transformers datasets bitsandbytes peft pyarrow

Found existing installation: numpy 1.23.5
Uninstalling numpy-1.23.5:
  Successfully uninstalled numpy-1.23.5
Found existing installation: scipy 1.10.1
Uninstalling scipy-1.10.1:
  Successfully uninstalled scipy-1.10.1
[0m

In [1]:
!pip install numpy==1.23.5
!pip install scipy==1.10.1



In [2]:
!pip install -q datasets==2.14.5
!pip install -q bitsandbytes==0.41.1
!pip install -q peft==0.5.0
!pip install -q pyarrow==14.0.1
!pip install -q modelscope
!pip install -q transformers>=4.37.0

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 1.22.0 requires numpy>=1.24.0, but you have numpy 1.23.5 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 1.22.0 requires numpy>=1.24.0, but you have numpy 1.23.5 which is incompatible.[0m[31m
[0m

In [3]:
from typing import Dict, List as TypingList
from datasets import Dataset, load_dataset, disable_caching
disable_caching()
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel, PeftConfig
from transformers import BitsAndBytesConfig
from transformers.generation import GenerationConfig
import torch

In [4]:
import numpy as np
import scipy
print(f"NumPy version: {np.__version__}")
print(f"SciPy version: {scipy.__version__}")

NumPy version: 1.23.5
SciPy version: 1.10.1


In [6]:
dataset = load_dataset("MBZUAI/LaMini-instruction", split = "train")

In [7]:
small_dataset = dataset.select(i for i in range(200))
print(small_dataset)
print(small_dataset[0])

Dataset({
    features: ['instruction', 'response', 'instruction_source'],
    num_rows: 200
})
{'instruction': 'List 5 reasons why someone should learn to code', 'response': '1. High demand for coding skills in the job market\n2. Increased problem-solving and analytical skills\n3. Ability to develop new products and technologies\n4. Potentially higher earning potential\n5. Opportunity to work remotely and/or freelance', 'instruction_source': 'alpaca'}


In [8]:
prompt_template = """ Below is an instruction that decribes a task, Write a respone that appropriately completes the request, Instruction : {instruction} \n Response:"""
answer_template = """{response}"""

# create a function to add keys in the dictionary for prompt, answer and whole text

def _add_text(rec):
  instructions = rec["instruction"]
  response = rec["response"]
  if not instructions or not response:
    raise ValueError("instruction and response cannot be empty")
  rec["prompt"] = prompt_template.format(instruction = instructions)
  rec["answer"] = answer_template.format(response = response)
  rec["text"] = rec["prompt"] + rec["answer"]

  return rec

small_dataset = small_dataset.map(_add_text)
print(small_dataset[0])


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

{'instruction': 'List 5 reasons why someone should learn to code', 'response': '1. High demand for coding skills in the job market\n2. Increased problem-solving and analytical skills\n3. Ability to develop new products and technologies\n4. Potentially higher earning potential\n5. Opportunity to work remotely and/or freelance', 'instruction_source': 'alpaca', 'prompt': ' Below is an instruction that decribes a task, Write a respone that appropriately completes the request, Instruction : List 5 reasons why someone should learn to code \n Response:', 'answer': '1. High demand for coding skills in the job market\n2. Increased problem-solving and analytical skills\n3. Ability to develop new products and technologies\n4. Potentially higher earning potential\n5. Opportunity to work remotely and/or freelance', 'text': ' Below is an instruction that decribes a task, Write a respone that appropriately completes the request, Instruction : List 5 reasons why someone should learn to code \n Respons

In [9]:
model_id = "Qwen/Qwen2.5-7B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map = "auto",
torch_dtype = torch.float16,
load_in_8bit = True
 )

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/686 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [11]:
from functools import partial
from transformers import DataCollatorForSeq2Seq

MAX_LENGTH = 256

# Preprocess function to tokenize the input and response
def _preprocess_batch(batch: Dict[str, TypingList[str]]):
    # Corrected 'test_target' to 'text_target'
    model_input = tokenizer(batch["text"], text_target=batch["response"],
                            truncation=True, padding="max_length", max_length=MAX_LENGTH)
    model_input["labels"] = model_input["input_ids"]
    return model_input

# Use partial to create the preprocessing function
_preprocessing_function = partial(_preprocess_batch)

# Apply the preprocessing function to the dataset
encode_small_dataset = small_dataset.map(_preprocessing_function, batched=True, remove_columns=['instruction', 'response', 'prompt', 'answer'])

# Filter the dataset for sequences within the max length
processed_dataset = encode_small_dataset.filter(lambda rec: len(rec["input_ids"]) <= MAX_LENGTH)

# Split dataset into training and test sets
split_dataset = processed_dataset.train_test_split(test_size=0.2, seed=42)

# Create a data collator for sequence-to-sequence tasks
data_collator = DataCollatorForSeq2Seq(model=model, tokenizer=tokenizer, max_length=MAX_LENGTH, pad_to_multiple_of=8, padding="max_length")


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Filter:   0%|          | 0/200 [00:00<?, ? examples/s]

In [12]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training


In [13]:
LORA_R = 256
LORA_ALPHA = 512
LORA_DROPOUT = 0.05

# Define LoRA config

lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM"
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 80,740,352 || all params: 7,696,356,864 || trainable%: 1.0490723523705878


In [15]:
from transformers import TrainingArguments, Trainer
import bitsandbytes

EPOCHS = 5
LEARNING_RATE = 2e-5
MODEL_SAVE_FOLDER_NAME = "Qwen2.5-7B-lora"


training_args = TrainingArguments(
    output_dir=MODEL_SAVE_FOLDER_NAME,
    overwrite_output_dir=True,
    fp16=True,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    learning_rate=LEARNING_RATE,
    num_train_epochs=EPOCHS,
    logging_strategy="epoch",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2
)

split_dataset = processed_dataset.train_test_split(test_size=0.2, seed=42)

print(split_dataset)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    data_collator=data_collator,
    train_dataset=split_dataset["train"],
    eval_dataset=split_dataset["test"]
)

model.config.use_cache = False

trainer.train()


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


DatasetDict({
    train: Dataset({
        features: ['instruction_source', 'text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 160
    })
    test: Dataset({
        features: ['instruction_source', 'text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 40
    })
})


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss
1,0.3911,0.325654
2,0.2802,0.322351
3,0.2536,0.328688
4,0.2291,0.336598
5,0.2117,0.345978


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


TrainOutput(global_step=800, training_loss=0.2731343936920166, metrics={'train_runtime': 2128.5115, 'train_samples_per_second': 0.376, 'train_steps_per_second': 0.376, 'total_flos': 8787590538854400.0, 'train_loss': 0.2731343936920166, 'epoch': 5.0})

In [16]:
from transformers import pipeline

def postprocess(response):
    message = response.split("Response:")
    if len(message) < 2:
        raise ValueError("Invalid Template for Prompt: The Template should include the term 'Response:'")
    return "".join(message[1:])

inf_pipeline = pipeline(
    task="text-generation",
    model=trainer.model,
    tokenizer=tokenizer,
    max_length=256,
    temperature=0.0,
    trust_remote_code=True,
    device_map="auto"
)


inference_prompt = "Write me a recipe for Dosa"
response = inf_pipeline(prompt_template.format(instruction=inference_prompt))[0]["generated_text"]
formatted_response = postprocess(response)


print(formatted_response)


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCaus

Ingredients:
- 1 cup urad dal
- 1/2 cup rice
- 1 tsp salt
- 3 cups water

Instructions:
1. Soak urad dal and rice in separate bowls for 6-8 hours or overnight.
2. Drain the soaked rice and urad dal and grind them separately into a smooth paste.
3. Mix the urad dal paste and rice paste together and add salt.
4. Add water gradually and mix well to form a thin batter.
5. Let the batter rest for at least 6-8 hours or overnight.
6. Heat a non-stick pan and pour a ladleful of batter onto it.
7. Spread the batter evenly and let it cook until bubbles form on the surface.
8. Flip the dosa and cook until golden brown.
9. Serve hot with your favorite accompaniments.


In [17]:
inference_prompt = "Write me a recipe for Lasagna"
response = inf_pipeline(prompt_template.format(instruction=inference_prompt))[0]["generated_text"]
formatted_response = postprocess(response)


print(formatted_response)

Both `max_new_tokens` (=2048) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Ingredients:
- 1 pound ground beef
- 1 jar (24 oz) of spaghetti sauce
- 1 (15 oz) can of tomato sauce
- 1 (15 oz) can of tomato paste
- 1 tablespoon of olive oil
- 1 tablespoon of sugar
- 1 teaspoon of dried basil
- 1 teaspoon of dried oregano
- 1 teaspoon of salt
- 1/2 teaspoon of black pepper
- 1 pound of lasagna noodles
- 2 cups of shredded mozzarella cheese
- 1 cup of grated Parmesan cheese

Instructions:
1. Preheat the oven to 375°F.
2. In a large skillet, heat the olive oil over medium-high heat. Add the ground beef and cook until browned, breaking it up into small pieces as it cooks.
3. Add the spaghetti sauce, tomato sauce, tomato paste, sugar, basil, oregano, salt, and black pepper to the skillet with the beef. Stir well and let it simmer for 10-15 minutes.
4. Meanwhile, cook the lasagna noodles according to the package instructions. Drain and set aside.
5. In a 9x13 inch baking dish, spread a thin layer of the meat sauce on the bottom.
6. Place a layer of lasagna noodles on t

In [18]:
inference_prompt = "Write me the process to get Schengen visa"
response = inf_pipeline(prompt_template.format(instruction=inference_prompt))[0]["generated_text"]
formatted_response = postprocess(response)


print(formatted_response)

Both `max_new_tokens` (=2048) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


1. Determine if you are eligible for a Schengen visa.
2. Gather the required documents, including a valid passport, visa application form, recent passport-sized photos, proof of financial means, travel insurance, and evidence of return/onward ticket.
3. Schedule an appointment at the nearest Schengen embassy or consulate.
4. Attend the visa interview and answer any questions asked by the consular officer.
5. Wait for the visa decision, which can take up to 30 days.
6. If your visa is approved, collect it from the embassy or consulate and follow the instructions provided.
7. If your visa is denied, you may appeal the decision within the specified timeframe.
