<a href="https://colab.research.google.com/github/RitzKar/Ad_creator/blob/main/Finetuning_Tech16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Fine tuning

In [None]:
import os
from google.colab import userdata
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

In [None]:
!pip3 install -q -U bitsandbytes
!pip3 install -q -U peft==0.8.2
!pip3 install -q -U trl==0.7.10
!pip3 install -q -U accelerate==0.27.1
!pip3 install -q -U datasets==2.17.0
!pip3 install -q -U transformers==4.38.1

In [None]:
# !pip install accelerate
# !pip install transformers --upgrade

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer

model_id = "google/gemma-7b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, token=os.environ['HF_TOKEN'])

In [None]:
text = "Quote: Greed, for lack of a better word,"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# os.environ["WANDB_DISABLED"] = "true"

In [None]:
from peft import LoraConfig

# Create an instance of the LoraConfig class with specific configurations.
lora_config = LoraConfig(
    # 'r' parameter: This might specify a rate, resolution, or reduction dimension,
    # depending on the context within the model or system it's being applied to.
    # The exact meaning of 'r=8' depends on the internal implementation of LoraConfig.
    r=8,

    # 'target_modules' parameter: Lists the components or modules of the model that
    # the configuration will target. These could refer to different parts of a neural network,
    # especially in the context of an attention mechanism or similar structure, where:
    # - 'q_proj' could refer to the projection for queries,
    # - 'o_proj' for outputs,
    # - 'k_proj' for keys,
    # - 'v_proj' for values,
    # - 'gate_proj', 'up_proj', 'down_proj' could be custom projections or mechanisms
    # within the model, possibly related to gating or hierarchical processing.
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],

    # 'task_type' parameter: Specifies the kind of task the model or system is intended for.
    # 'CAUSAL_LM' indicates a Causal Language Model, a type of model used for generating text
    # where each output token is predicted based on the preceding ones, without looking ahead
    # to future tokens. This is common in generative tasks where understanding the sequence
    # order is crucial.
    task_type="CAUSAL_LM",
)

In [None]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

In [None]:
# Display the first 3 examples
for i in range(3):
    print(data['train'][i])

In [None]:
import transformers
from trl import SFTTrainer

def formatting_func(example):
    text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
    return [text]

trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
)
trainer.train()

In [None]:
text = "Quote: Greed, for lack of a better word,  "
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=30)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

##Gemma

In [None]:
# !pip install accelerate
# !pip install transformers --upgrade

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", device_map="auto")

In [None]:
input_text = """Summarize the following text in 3 bullet points:


	Opec+ members led by Saudi Arabia and Russia have extended voluntary cuts to oil production for another three months, as they attempt to boost prices that have remained subdued in spite of geopolitical tensions.

The curbs were due to expire at the end of March but will continue until the end of June, according to Saudi Arabia’s state news agency.

The measures add to a series of output cuts by Opec+ members since 2022 designed to support prices amid rising US production and tepid global demand. Since the latest voluntary cuts came into effect in January they have lowered the combined production targets of members by about 2.2mn barrels a day.

“The decision sends a message of cohesion and confirms that the group is not in a hurry to return supply volumes, supporting the view that when this finally happens, it will be gradual,” said Giacomo Romeo, an analyst at Jefferies.

Brent crude, the international benchmark, has risen by 6 per cent and the US equivalent WTI almost 8 per cent since the latest cuts were first announced at the end of November.

But despite tensions in the Middle East, including the Israel-Hamas war and the attacks on commercial shipping by the Houthis, the oil price remains well below the $100 a barrel level last seen in the summer of 2022.

Traders had largely expected the decision to extend the curbs, with crude oil prices rising last week ahead of the announcement. Brent rose more than 2 per cent last week to close above $83 a barrel on Friday, while WTI closed just under $80 a barrel, a rise of more than 4 per cent.

Opec+ was “trying to keep the market in balance”, said Amrita Sen at Energy Aspects. “Oil prices are a lot more stable . . . but they want to ensure the stability continues,” she said.

Saudi Arabia has shouldered most of the curbs, having cut its production by 1mn b/d since July. In total, the kingdom is producing 2mn b/d a less than it did in October 2022. In January, it dropped its plans to expand its daily oil production capacity by 2027 in a major policy reversal.


Summary:
"""
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

outputs = model.generate(**input_ids, max_length=1000)
print(tokenizer.decode(outputs[0]))

In [None]:
# Q&A function

def answer_the_question(question):
    input_ids = tokenizer(question, return_tensors="pt").input_ids
    generated_text = model.generate(
        input_ids.to("cuda"),
        max_length=1000,
    )
    answer = tokenizer.decode(generated_text[0], skip_special_tokens=True)
    return answer
question = "Write a short rap about stanford students taking an LLM class."
answer = answer_the_question(question)
print(f"Question: {question}\nAnswer: {answer}")