<a href="https://colab.research.google.com/github/Praneeth-18/Unsloth---Tuning-and-experimenting-with-LLMs/blob/main/unsloth_assignments_tuning_and_experimenting_with_llms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **a.) Finetuning**

In [None]:
!pip install -q transformers datasets accelerate peft bitsandbytes
!pip install -q torch

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h

# **TinyLlama**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from transformers import BitsAndBytesConfig
import torch

def train_tinyllama():
    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        quantization_config=bnb_config,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    tokenizer.pad_token = tokenizer.eos_token

    # Print available GPU memory
    print(f"GPU memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

    # Load a very small dataset
    dataset = load_dataset("databricks/databricks-dolly-15k", split="train[:10]")

    # Print dataset example to debug
    print("\nDataset example:")
    print(dataset[0])

    # Format data with proper dictionary access
    def format_data(example):
        try:
            # Print example structure
            print("\nProcessing example:")
            print(example)

            return f"### Instruction: {example['instruction']}\n### Response: {example['response']}"
        except Exception as e:
            print(f"Error in format_data: {e}")
            print(f"Example structure: {example}")
            raise e

    # Process and tokenize dataset
    tokenized_dataset = dataset.map(
        lambda examples: {
            'input_ids': tokenizer(
                format_data(examples),
                truncation=True,
                max_length=512,
                padding="max_length",
                return_tensors=None
            )['input_ids'],
            'attention_mask': tokenizer(
                format_data(examples),
                truncation=True,
                max_length=512,
                padding="max_length",
                return_tensors=None
            )['attention_mask']
        }
    )

    # LoRA config
    config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, config)

    # Training arguments
    training_args = TrainingArguments(
        output_dir="tinyllama_results",
        num_train_epochs=1,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        save_steps=5,
        logging_steps=5,
        learning_rate=2e-4,
        weight_decay=0.01,
        warmup_steps=2,
        logging_dir="./logs",
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    # Train
    trainer.train()

    # Save
    model.save_pretrained("tinyllama_adapter")

    return model, tokenizer

# Test function
def test_model(model, tokenizer, prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory first
torch.cuda.empty_cache()

# Run training with error handling
try:
    print("Starting training...")
    model, tokenizer = train_tinyllama()

    print("\nTesting model...")
    test_prompt = "Explain what is machine learning in simple terms"
    result = test_model(model, tokenizer, test_prompt)
    print("\nTest Result:")
    print(result)

except Exception as e:
    print(f"An error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

Starting training...
GPU memory available: 15.84 GB

Dataset example:
{'instruction': 'When did Virgin Australia start operating?', 'context': "Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to use the Virgin brand. It commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route. It suddenly found itself as a major airline in Australia's domestic market after the collapse of Ansett Australia in September 2001. The airline has since grown to directly serve 32 cities in Australia, from hubs in Brisbane, Melbourne and Sydney.", 'response': 'Virgin Australia commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route.', 'category': 'closed_qa'}


Map:   0%|          | 0/10 [00:00<?, ? examples/s]


Processing example:
{'instruction': 'When did Virgin Australia start operating?', 'context': "Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to use the Virgin brand. It commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route. It suddenly found itself as a major airline in Australia's domestic market after the collapse of Ansett Australia in September 2001. The airline has since grown to directly serve 32 cities in Australia, from hubs in Brisbane, Melbourne and Sydney.", 'response': 'Virgin Australia commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route.', 'category': 'closed_qa'}

Processing example:
{'instruction': 'When did Virgin Australia start operating?', 'context': "Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to us

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 7


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:


An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-10-118a6cd8fc56>", line 121, in <cell line: 119>
    model, tokenizer = train_tinyllama()
  File "<ipython-input-10-118a6cd8fc56>", line 102, in train_tinyllama
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_t

# **Phi**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_phi():
    print("Starting Phi training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Phi model...")
    model = AutoModelForCausalLM.from_pretrained(
        "microsoft/phi-1_5",
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(
        "microsoft/phi-1_5",
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple dataset
    print("Creating dataset...")
    train_data = [
        {
            "instruction": "Write a Python function to add two numbers",
            "response": "def add_numbers(a, b):\n    return a + b"
        },
        {
            "instruction": "Create a function to check if a number is prime",
            "response": "def is_prime(n):\n    if n < 2:\n        return False\n    for i in range(2, int(n ** 0.5) + 1):\n        if n % i == 0:\n            return False\n    return True"
        },
        {
            "instruction": "Write a function to reverse a string",
            "response": "def reverse_string(s):\n    return s[::-1]"
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data
    def format_data(example):
        return f"### Instruction: {example['instruction']}\n### Response: {example['response']}"

    # Tokenize dataset
    print("Tokenizing dataset...")
    def tokenize_function(examples):
        formatted_text = format_data(examples)
        return tokenizer(
            formatted_text,
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        )

    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./phi_results",
        num_train_epochs=3,           # Train for a few epochs
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,              # Log every step
        save_steps=3,                 # Save every 3 steps
        max_steps=10                  # Only train for 10 steps
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./phi_adapter")

    return model, tokenizer

# Test function
def test_phi(model, tokenizer, prompt):
    print(f"\nTesting with prompt: {prompt}")
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory
print("Clearing GPU memory...")
torch.cuda.empty_cache()

# Main execution
try:
    print("\nStarting Phi training process...")
    model, tokenizer = train_phi()

    # Test the model
    print("\nTesting the model...")
    test_prompts = [
        "Write a Python function to calculate factorial",
        "Create a function to find the maximum number in a list"
    ]

    for prompt in test_prompts:
        result = test_phi(model, tokenizer, prompt)
        print(f"\nPrompt: {prompt}")
        print(f"Result: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./phi_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

Clearing GPU memory...

Starting Phi training process...
Starting Phi training setup...
Loading Phi model...


config.json:   0%|          | 0.00/736 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.84G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Creating dataset...
Dataset example:
{'instruction': 'Write a Python function to add two numbers', 'response': 'def add_numbers(a, b):\n    return a + b'}
Tokenizing dataset...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Setting up LoRA...


max_steps is given, it will override any value given in num_train_epochs


Setting up training arguments...
Initializing trainer...
Starting training...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:



An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-11-8a5ea4b83429>", line 144, in <cell line: 142>
    model, tokenizer = train_phi()
  File "<ipython-input-11-8a5ea4b83429>", line 117, in train_phi
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_train_begin
 

# **Gemma**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_gemma():
    print("Starting Gemma training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Gemma model...")
    model = AutoModelForCausalLM.from_pretrained(
        "google/gemma-2b",
        quantization_config=bnb_config,
        device_map="auto",
    )
    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple QA dataset
    print("Creating dataset...")
    train_data = [
        {
            "question": "What is machine learning?",
            "answer": "Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance without being explicitly programmed."
        },
        {
            "question": "How does photosynthesis work?",
            "answer": "Photosynthesis is the process where plants convert sunlight, water, and carbon dioxide into glucose and oxygen, providing energy for the plant to grow."
        },
        {
            "question": "What causes earthquakes?",
            "answer": "Earthquakes are caused by the movement of tectonic plates beneath Earth's surface. When these plates move or collide, they release energy in the form of seismic waves."
        },
        {
            "question": "What is the theory of relativity?",
            "answer": "The theory of relativity, developed by Einstein, describes how space and time are related and how mass and energy are equivalent (E=mc²)."
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data
    def format_data(example):
        return f"<start_of_turn>user\nQuestion: {example['question']}<end_of_turn>\n<start_of_turn>model\nAnswer: {example['answer']}<end_of_turn>"

    # Tokenize dataset
    print("Tokenizing dataset...")
    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./gemma_results",
        num_train_epochs=3,           # Train for a few epochs
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,              # Log every step
        save_steps=3,                 # Save every 3 steps
        max_steps=10                  # Only train for 10 steps
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./gemma_adapter")

    return model, tokenizer

# Test function
def test_gemma(model, tokenizer, question):
    print(f"\nTesting with question: {question}")
    prompt = f"<start_of_turn>user\nQuestion: {question}<end_of_turn>\n<start_of_turn>model\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory
print("Clearing GPU memory...")
torch.cuda.empty_cache()

# Main execution
try:
    print("\nStarting Gemma training process...")
    model, tokenizer = train_gemma()

    # Test the model
    print("\nTesting the model...")
    test_questions = [
        "What is the speed of light?",
        "How does the human brain work?",
        "What is quantum computing?"
    ]

    for question in test_questions:
        result = test_gemma(model, tokenizer, question)
        print(f"\nQuestion: {question}")
        print(f"Answer: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./gemma_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

# Optional: Convert for Ollama
def convert_for_ollama():
    print("\nConverting for Ollama...")
    !git clone https://github.com/ggerganov/llama.cpp
    %cd llama.cpp
    !pip install -r requirements.txt
    !python convert-lora-to-ggml.py ../gemma_adapter/adapter_config.json

Clearing GPU memory...

Starting Gemma training process...
Starting Gemma training setup...
Loading Gemma model...


config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Creating dataset...
Dataset example:
{'question': 'What is machine learning?', 'answer': 'Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance without being explicitly programmed.'}
Tokenizing dataset...


Map:   0%|          | 0/4 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


Setting up LoRA...
Setting up training arguments...
Initializing trainer...
Starting training...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:



An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-12-bb977126c7a6>", line 135, in <cell line: 133>
    model, tokenizer = train_gemma()
  File "<ipython-input-12-bb977126c7a6>", line 107, in train_gemma
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_train_beg

# **Qwen-1.5**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_qwen():
    print("Starting Qwen training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Qwen model...")
    model = AutoModelForCausalLM.from_pretrained(
        "Qwen/Qwen1.5-0.5B",
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(
        "Qwen/Qwen1.5-0.5B",
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple dataset for summarization
    print("Creating dataset...")
    train_data = [
        {
            "text": "The sun is a massive ball of gas primarily composed of hydrogen and helium. It undergoes nuclear fusion in its core, releasing enormous amounts of energy in the form of light and heat. This energy travels through space and reaches Earth, providing the heat and light necessary for life.",
            "summary": "The sun is a giant gas sphere that produces energy through nuclear fusion, providing Earth with essential light and heat."
        },
        {
            "text": "Artificial Intelligence (AI) is a field of computer science focused on creating intelligent machines that can perform tasks typically requiring human intelligence. These tasks include visual perception, speech recognition, decision-making, and language translation. AI systems learn from experience using machine learning algorithms.",
            "summary": "AI is computer technology that mimics human intelligence, performing tasks like recognition, decision-making, and translation through learning algorithms."
        },
        {
            "text": "Climate change refers to long-term shifts in global weather patterns and average temperatures. It's primarily caused by human activities releasing greenhouse gases into the atmosphere. These gases trap heat, leading to global warming, rising sea levels, and extreme weather events.",
            "summary": "Climate change involves long-term weather and temperature changes, mainly caused by human-produced greenhouse gases leading to global warming."
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data
    def format_data(example):
        return f"<|im_start|>user\nText to summarize: {example['text']}<|im_end|>\n<|im_start|>assistant\nSummary: {example['summary']}<|im_end|>"

    # Tokenize dataset
    print("Tokenizing dataset...")
    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./qwen_results",
        num_train_epochs=3,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,
        save_steps=3,
        max_steps=10
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./qwen_adapter")

    return model, tokenizer

# Test function
def test_qwen(model, tokenizer, text):
    print(f"\nTesting with text: {text[:100]}...")  # Print first 100 chars
    prompt = f"<|im_start|>user\nText to summarize: {text}<|im_end|>\n<|im_start|>assistant\nSummary:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory
print("Clearing GPU memory...")
torch.cuda.empty_cache()

# Main execution
try:
    print("\nStarting Qwen training process...")
    model, tokenizer = train_qwen()

    # Test the model
    print("\nTesting the model...")
    test_texts = [
        "Neural networks are computational systems inspired by biological neural networks in human brains. They consist of interconnected nodes that process and transmit information, allowing the system to learn patterns and make predictions.",
        "The water cycle, also known as the hydrologic cycle, describes the continuous movement of water on Earth. Water evaporates from surfaces, forms clouds, falls as precipitation, and flows through rivers back to the oceans."
    ]

    for text in test_texts:
        result = test_qwen(model, tokenizer, text)
        print(f"\nOriginal Text: {text[:100]}...")
        print(f"Generated Summary: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./qwen_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

# Optional: Convert for Ollama
def convert_for_ollama():
    print("\nConverting for Ollama...")
    !git clone https://github.com/ggerganov/llama.cpp
    %cd llama.cpp
    !pip install -r requirements.txt
    !python convert-lora-to-ggml.py ../qwen_adapter/adapter_config.json

Clearing GPU memory...

Starting Qwen training process...
Starting Qwen training setup...
Loading Qwen model...


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Creating dataset...
Dataset example:
{'text': 'The sun is a massive ball of gas primarily composed of hydrogen and helium. It undergoes nuclear fusion in its core, releasing enormous amounts of energy in the form of light and heat. This energy travels through space and reaches Earth, providing the heat and light necessary for life.', 'summary': 'The sun is a giant gas sphere that produces energy through nuclear fusion, providing Earth with essential light and heat.'}
Tokenizing dataset...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Setting up LoRA...


max_steps is given, it will override any value given in num_train_epochs


Setting up training arguments...
Initializing trainer...
Starting training...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:



An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-13-7c3b3ffeebf2>", line 135, in <cell line: 133>
    model, tokenizer = train_qwen()
  File "<ipython-input-13-7c3b3ffeebf2>", line 107, in train_qwen
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_train_begin

# **Mistral-7B-Instruct-v0.2**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_mistral():
    print("Starting Mistral training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Mistral model...")
    model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-v0.1",
        quantization_config=bnb_config,
        device_map="auto",
    )
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple instruction dataset
    print("Creating dataset...")
    train_data = [
        {
            "instruction": "Explain quantum computing",
            "response": "Quantum computing uses quantum mechanics principles like superposition and entanglement to perform computations. Unlike classical computers that use bits (0 or 1), quantum computers use quantum bits or qubits that can exist in multiple states simultaneously, potentially solving complex problems much faster."
        },
        {
            "instruction": "What is machine learning?",
            "response": "Machine learning is a branch of artificial intelligence where computer systems learn and improve from experience without explicit programming. They identify patterns in data to make predictions and decisions, becoming more accurate over time."
        },
        {
            "instruction": "How does blockchain work?",
            "response": "Blockchain is a decentralized digital ledger that records transactions across a network of computers. Each block contains transaction data and links to the previous block, creating a chain. The system is secure because changes require consensus from the network, making it difficult to alter records."
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data with Mistral chat template
    def format_data(example):
        return f"<s>[INST] {example['instruction']} [/INST] {example['response']}</s>"

    # Tokenize dataset
    print("Tokenizing dataset...")
    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj"
        ],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()  # Print trainable parameters

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./mistral_results",
        num_train_epochs=3,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,
        save_steps=3,
        max_steps=10,
        fp16=True,  # Use mixed precision
        optim="paged_adamw_8bit"  # Use 8-bit optimizer
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./mistral_adapter")

    return model, tokenizer

# Test function
def test_mistral(model, tokenizer, instruction):
    print(f"\nTesting with instruction: {instruction}")
    prompt = f"<s>[INST] {instruction} [/INST]"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1,
        do_sample=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Memory management function
def clear_memory():
    import gc
    gc.collect()
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        print(f"GPU Memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Main execution
try:
    print("\nClearing memory...")
    clear_memory()

    print("\nStarting Mistral training process...")
    model, tokenizer = train_mistral()

    # Test the model
    print("\nTesting the model...")
    test_instructions = [
        "Explain how solar panels work",
        "What is the theory of evolution?",
        "How does the internet work?"
    ]

    for instruction in test_instructions:
        result = test_mistral(model, tokenizer, instruction)
        print(f"\nInstruction: {instruction}")
        print(f"Response: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./mistral_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

# Convert for Ollama
def convert_for_ollama():
    !git clone https://github.com/ggerganov/llama.cpp
    %cd llama.cpp
    !pip install -r requirements.txt
    !python convert-lora-to-ggml.py ../mistral_adapter/adapter_config.json

    # Create Modelfile with proper string formatting
    modelfile_content = '''
FROM mistral:latest
ADAPTER ./ggml-adapter-model.bin
TEMPLATE """<s>[INST] {{ .Prompt }} [/INST]"""
PARAMETER stop "[INST]"
PARAMETER stop "</s>"
'''

    with open("Modelfile", "w") as f:
        f.write(modelfile_content)

    print("Created Modelfile for Ollama")


Clearing memory...
GPU Memory available: 15.84 GB

Starting Mistral training process...
Starting Mistral training setup...
Loading Mistral model...

An error occurred: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.
403 Client Error. (Request ID: Root=1-67229ce1-08c0444f27bd206e0e06af16;283224c8-c909-4d2c-9460-5e77c7cca34e)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.
Access to model mistralai/Mistral-7B-v0.1 is restricted and you are not in the authorized list. Visit https://huggingface.co/mistralai/Mistral-7B-v0.1 to ask for access.


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
    response.raise_for_status()
  File "/usr/local/lib/python3.10/dist-packages/requests/models.py", line 1024, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 403 Client Error: Forbidden for url: https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py", line 402, in cached_file
    resolved_file = hf_hub_download(
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py", line 101, in inner_f
    return f(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)


# **b.) Continued pretraining**

In [None]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2024.10.7-py3-none-any.whl.metadata (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth-zoo (from unsloth)
  Downloading unsloth_zoo-2024.10.5-py3-none-any.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.28.post2-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.8.14-py3-none-any.whl.metadata (8.4 kB)
Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.11.1,>=0.7.9 (from unsloth)
  Downloading trl-0.11.1-py3-none-any.whl.metadata (12 kB)
Collecting hf-transfer (from unsloth)
  Downloa

In [None]:
from huggingface_hub import login
from getpass import getpass

# Prompt for Hugging Face token securely
hf_token = getpass("Please enter your Hugging Face token: ")

# Authenticate with Hugging Face
login(token=hf_token)

# Now load your model
from unsloth import FastLanguageModel

model_name = "mistralai/Mistral-7B"  # Replace with the model name

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=512,
    dtype="float16",
    load_in_4bit=True,
    token=hf_token  # Pass token for private model access
)


Please enter your Hugging Face token: ··········
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


RuntimeError: mistralai/Mistral-7B is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [None]:
from unsloth import FastLanguageModel

# Define your model name (e.g., a smaller Hugging Face model for Colab)
model_name = "mistralai/Mistral-7B"  # Replace with the desired model

# Load the model and tokenizer with settings optimized for Colab
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=512,
    dtype="float16",  # Helps reduce memory usage
    load_in_4bit=True,  # Further reduces memory load
    token=os.environ["HUGGINGFACE_TOKEN"]  # Fetches token from environment
)


NameError: name 'os' is not defined

# **c.) Chat templates**

In [1]:
# Required imports
!pip install transformers datasets accelerate bitsandbytes trl peft
!pip install -q pytorch_lightning wandb

import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import pandas as pd
from typing import List, Dict
import json

# 1. Basic Setup and Configurations
def setup_model_and_tokenizer(base_model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
    """Setup the model and tokenizer with appropriate configurations"""

    # QLoRA configurations
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=False
    )

    # Load model and tokenizer
    model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        quantization_config=bnb_config,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer

# 2. Chat Templates for Different Tasks

def classification_template(example: Dict) -> str:
    """Template for classification tasks"""
    return f"""<|system|>You are a helpful AI assistant that performs classification tasks.</s>
<|user|>{example['text']}</s>
<|assistant|>The category for this text is: {example['label']}</s>"""

def conversation_template(example: Dict) -> str:
    """Template for conversational data"""
    dialog = ""
    for turn in example['conversation']:
        if turn['role'] == 'user':
            dialog += f"<|user|>{turn['content']}</s>"
        else:
            dialog += f"<|assistant|>{turn['content']}</s>"
    return f"<|system|>You are a helpful AI assistant.</s>{dialog}"

# 3. Dataset Preparation Functions

def prepare_classification_dataset(dataset_name: str, text_column: str, label_column: str):
    """Prepare a dataset for classification fine-tuning"""
    dataset = load_dataset(dataset_name)

    def format_example(example):
        return {
            'text': example[text_column],
            'label': example[label_column]
        }

    return dataset.map(format_example)

def prepare_conversation_dataset(conversations: List[Dict]):
    """Prepare a dataset for conversational fine-tuning"""
    formatted_data = []
    for conv in conversations:
        formatted_data.append({
            'conversation': conv
        })
    return formatted_data

# 4. Extended Context Size Implementation

def extend_context_size(model, tokenizer, target_context_size=8192):
    """Extend the context size of the model"""
    model.config.max_position_embeddings = target_context_size
    tokenizer.model_max_length = target_context_size

    # Interpolate position embeddings
    orig_pos_emb = model.get_input_embeddings().weight
    new_pos_emb = torch.nn.Parameter(
        torch.zeros(target_context_size, orig_pos_emb.size(1))
    )

    # Linear interpolation
    steps = target_context_size // orig_pos_emb.size(0)
    for i in range(target_context_size):
        orig_idx = i // steps
        next_idx = min(orig_idx + 1, orig_pos_emb.size(0) - 1)
        alpha = (i % steps) / steps
        new_pos_emb.data[i] = (1 - alpha) * orig_pos_emb.data[orig_idx] + alpha * orig_pos_emb.data[next_idx]

    model.get_input_embeddings().weight = new_pos_emb
    return model, tokenizer

# 5. Multi-Dataset Fine-tuning

def prepare_mixed_dataset(datasets: List[Dict]):
    """Combine multiple datasets for single fine-tuning"""
    combined_data = []

    for dataset_info in datasets:
        data = dataset_info['data']
        template_fn = dataset_info['template']

        formatted_data = [template_fn(example) for example in data]
        combined_data.extend(formatted_data)

    return combined_data

# 6. Training Setup

def setup_training(model, dataset, output_dir="./results"):
    """Setup LoRA and training arguments"""

    # LoRA configuration
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Prepare model for training
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=3,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        weight_decay=0.001,
        logging_steps=10,
        save_strategy="epoch",
        evaluation_strategy="epoch"
    )

    return model, training_args

# Example usage:

# 1. Classification Example
"""
model, tokenizer = setup_model_and_tokenizer()

# Load and prepare classification dataset
classification_data = prepare_classification_dataset(
    "emotion",
    text_column="text",
    label_column="label"
)

# Format data with classification template
formatted_data = [classification_template(example) for example in classification_data]

# Setup and train
model, training_args = setup_training(model, formatted_data)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=formatted_data
)
trainer.train()
"""

# 2. Conversational Example
"""
# Example conversation data
conversations = [
    {
        'conversation': [
            {'role': 'user', 'content': 'Hello!'},
            {'role': 'assistant', 'content': 'Hi there! How can I help you today?'}
        ]
    }
]

conv_data = prepare_conversation_dataset(conversations)
formatted_conv_data = [conversation_template(example) for example in conv_data]
"""

# 3. Extended Context Example
"""
model, tokenizer = setup_model_and_tokenizer()
model, tokenizer = extend_context_size(model, tokenizer, target_context_size=8192)
"""

# 4. Multi-Dataset Example
"""
datasets = [
    {
        'data': classification_data,
        'template': classification_template
    },
    {
        'data': conv_data,
        'template': conversation_template
    }
]

combined_dataset = prepare_mixed_dataset(datasets)
"""

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting trl
  Downloading trl-0.12.0-py3-none-any.whl.metadata (10 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Collecting transformers
  Downloading transformers-4.46.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?2

"\ndatasets = [\n    {\n        'data': classification_data,\n        'template': classification_template\n    },\n    {\n        'data': conv_data,\n        'template': conversation_template\n    }\n]\n\ncombined_dataset = prepare_mixed_dataset(datasets)\n"

# **d.) Reward Modeling**

In [2]:
# Install required packages
!pip install torch transformers datasets accelerate bitsandbytes trl peft wandb

import torch
import numpy as np
from dataclasses import dataclass
from typing import Dict, List, Optional
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer
import wandb

# 1. Basic Setup
def setup_model_and_tokenizer(base_model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
    """Initialize model and tokenizer with 4-bit quantization"""

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=False
    )

    model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer

# 2. ORPO Implementation

@dataclass
class ORPOConfig:
    """Configuration for ORPO training"""
    beta: float = 0.1  # Temperature parameter
    lambda_entropy: float = 0.1  # Entropy regularization coefficient
    gamma: float = 0.99  # Discount factor
    clip_range: float = 0.2  # Policy clip range

class ORPOTrainer:
    def __init__(
        self,
        model,
        ref_model,
        tokenizer,
        reward_model,
        config: ORPOConfig,
        training_args: TrainingArguments
    ):
        self.model = model
        self.ref_model = ref_model
        self.tokenizer = tokenizer
        self.reward_model = reward_model
        self.config = config
        self.training_args = training_args

    def compute_reward(self, responses):
        """Compute rewards using the reward model"""
        with torch.no_grad():
            rewards = self.reward_model(responses)
        return rewards

    def compute_kl_divergence(self, policy_logits, ref_logits):
        """Compute KL divergence between policy and reference model"""
        policy_probs = torch.softmax(policy_logits, dim=-1)
        ref_probs = torch.softmax(ref_logits, dim=-1)
        kl_div = torch.sum(policy_probs * (torch.log(policy_probs) - torch.log(ref_probs)), dim=-1)
        return kl_div

    def train_step(self, batch):
        """Single ORPO training step"""
        # Get model outputs
        policy_outputs = self.model(batch["input_ids"])
        ref_outputs = self.ref_model(batch["input_ids"])

        # Compute rewards
        rewards = self.compute_reward(policy_outputs.logits)

        # Compute KL divergence
        kl_div = self.compute_kl_divergence(policy_outputs.logits, ref_outputs.logits)

        # Compute ORPO loss
        advantages = rewards - self.config.beta * kl_div

        # Policy loss with clipping
        ratio = torch.exp(policy_outputs.logits - ref_outputs.logits)
        clipped_ratio = torch.clamp(ratio, 1 - self.config.clip_range, 1 + self.config.clip_range)

        policy_loss = -torch.min(
            ratio * advantages,
            clipped_ratio * advantages
        ).mean()

        # Entropy regularization
        entropy = -(torch.softmax(policy_outputs.logits, dim=-1) *
                   torch.log_softmax(policy_outputs.logits, dim=-1)).sum(dim=-1).mean()

        total_loss = policy_loss - self.config.lambda_entropy * entropy

        return total_loss

    def train(self, train_dataset):
        """Full training loop"""
        for epoch in range(self.training_args.num_train_epochs):
            total_loss = 0
            for batch in train_dataset:
                loss = self.train_step(batch)
                total_loss += loss.item()

                # Optimization step
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.training_args.max_grad_norm)
                self.training_args.optimizer.step()
                self.training_args.optimizer.zero_grad()

            print(f"Epoch {epoch}: Average Loss = {total_loss / len(train_dataset)}")

# 3. DPO Implementation

def prepare_dpo_dataset(dataset):
    """Prepare dataset for DPO training"""
    return {
        "prompt": dataset["prompt"],
        "chosen": dataset["chosen_response"],
        "rejected": dataset["rejected_response"]
    }

def train_dpo(
    model,
    ref_model,
    tokenizer,
    dataset,
    output_dir="./dpo_results",
    num_train_epochs=3,
    per_device_train_batch_size=4
):
    """Train model using DPO"""

    # Prepare DPO training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=num_train_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=4,
        learning_rate=2e-5,
        weight_decay=0.001,
        logging_steps=10,
        save_strategy="epoch",
        evaluation_strategy="epoch",
        remove_unused_columns=False
    )

    # Initialize DPO trainer
    dpo_trainer = DPOTrainer(
        model=model,
        ref_model=ref_model,
        args=training_args,
        beta=0.1,  # Temperature parameter for DPO loss
        train_dataset=dataset,
        tokenizer=tokenizer,
        max_length=512,
        max_prompt_length=128
    )

    # Train the model
    dpo_trainer.train()

# 4. Example Usage

def main():
    # Initialize wandb
    wandb.init(project="tinyllama-rlhf")

    # Setup models
    model, tokenizer = setup_model_and_tokenizer()
    ref_model, _ = setup_model_and_tokenizer()  # Reference model

    # Prepare LoRA
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)

    # Example dataset (modify according to your data)
    dataset = load_dataset("anthropic/hh-rlhf")  # Example RLHF dataset

    # Training arguments
    training_args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=3,
        per_device_train_batch_size=4,
        learning_rate=2e-5,
        weight_decay=0.001,
        logging_steps=10,
        save_strategy="epoch"
    )

    # For ORPO training
    orpo_config = ORPOConfig()
    orpo_trainer = ORPOTrainer(
        model=model,
        ref_model=ref_model,
        tokenizer=tokenizer,
        reward_model=None,  # Add your reward model here
        config=orpo_config,
        training_args=training_args
    )

    # For DPO training
    dpo_dataset = prepare_dpo_dataset(dataset)
    train_dpo(
        model=model,
        ref_model=ref_model,
        tokenizer=tokenizer,
        dataset=dpo_dataset
    )

if __name__ == "__main__":
    main()

# 5. Example Dataset Format

"""
Example dataset format for preference data:

{
    "prompt": "What is the capital of France?",
    "chosen_response": "The capital of France is Paris.",
    "rejected_response": "The capital of France is London."
}

For ORPO, you'll also need a reward model that can score responses.
"""



[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:


Abort: 

# **e.) continued fine tuning**

In [3]:
# Required installations
!pip install transformers datasets accelerate bitsandbytes peft wandb

import os
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig,
    get_peft_model,
    PeftModel,
    PeftConfig,
    prepare_model_for_kbit_training
)
from datasets import load_dataset, Dataset
import wandb
from typing import Optional, Dict
import json

class CheckpointManager:
    """Handles checkpoint loading, saving, and merging operations"""

    def __init__(
        self,
        base_model_id: str,
        checkpoint_path: Optional[str] = None,
        adapter_path: Optional[str] = None
    ):
        self.base_model_id = base_model_id
        self.checkpoint_path = checkpoint_path
        self.adapter_path = adapter_path

    def setup_quantization(self):
        """Setup 4-bit quantization configuration"""
        return BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=False
        )

    def load_base_model(self):
        """Load the base model with quantization"""
        print(f"Loading base model: {self.base_model_id}")

        model = AutoModelForCausalLM.from_pretrained(
            self.base_model_id,
            quantization_config=self.setup_quantization(),
            device_map="auto",
            trust_remote_code=True
        )

        tokenizer = AutoTokenizer.from_pretrained(
            self.base_model_id,
            trust_remote_code=True
        )
        tokenizer.pad_token = tokenizer.eos_token

        return model, tokenizer

    def load_checkpoint(self):
        """Load model from checkpoint"""
        print(f"Loading checkpoint: {self.checkpoint_path}")

        model = AutoModelForCausalLM.from_pretrained(
            self.checkpoint_path,
            quantization_config=self.setup_quantization(),
            device_map="auto",
            trust_remote_code=True
        )

        tokenizer = AutoTokenizer.from_pretrained(
            self.checkpoint_path,
            trust_remote_code=True
        )
        tokenizer.pad_token = tokenizer.eos_token

        return model, tokenizer

    def load_and_merge_lora(self, model):
        """Load and merge LoRA adapter with the model"""
        print(f"Loading and merging LoRA adapter: {self.adapter_path}")

        model = PeftModel.from_pretrained(
            model,
            self.adapter_path
        )

        # Merge LoRA weights with base model
        model = model.merge_and_unload()

        return model

    def save_checkpoint(self, model, tokenizer, output_dir: str):
        """Save model checkpoint"""
        print(f"Saving checkpoint to: {output_dir}")

        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)

        # Save model and tokenizer
        model.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)

class ContinuedTrainer:
    """Handles continued fine-tuning process"""

    def __init__(
        self,
        model,
        tokenizer,
        training_args: TrainingArguments,
        dataset_config: Dict
    ):
        self.model = model
        self.tokenizer = tokenizer
        self.training_args = training_args
        self.dataset_config = dataset_config

    def prepare_dataset(self):
        """Prepare dataset for continued training"""
        if self.dataset_config.get("path"):
            # Load from local file
            if self.dataset_config["path"].endswith('.json'):
                with open(self.dataset_config["path"], 'r') as f:
                    data = json.load(f)
                dataset = Dataset.from_dict(data)
            else:
                dataset = load_dataset(
                    "json",
                    data_files=self.dataset_config["path"]
                )["train"]
        else:
            # Load from Hugging Face hub
            dataset = load_dataset(
                self.dataset_config["name"],
                split="train"
            )

        # Apply preprocessing
        def preprocess(examples):
            # Apply custom template if provided
            if "template" in self.dataset_config:
                texts = [
                    self.dataset_config["template"].format(**example)
                    for example in examples
                ]
            else:
                texts = examples[self.dataset_config["text_column"]]

            # Tokenize
            tokenized = self.tokenizer(
                texts,
                truncation=True,
                padding="max_length",
                max_length=self.dataset_config.get("max_length", 512)
            )

            return tokenized

        processed_dataset = dataset.map(
            preprocess,
            batched=True,
            remove_columns=dataset.column_names
        )

        return processed_dataset

    def setup_lora(self):
        """Setup LoRA for continued training"""
        lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["q_proj", "v_proj"],
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )

        self.model = prepare_model_for_kbit_training(self.model)
        self.model = get_peft_model(self.model, lora_config)

        return self.model

    def train(self):
        """Execute training"""
        # Prepare dataset
        dataset = self.prepare_dataset()

        # Setup data collator
        data_collator = DataCollatorForLanguageModeling(
            tokenizer=self.tokenizer,
            mlm=False
        )

        # Initialize trainer
        trainer = Trainer(
            model=self.model,
            args=self.training_args,
            train_dataset=dataset,
            data_collator=data_collator
        )

        # Start training
        trainer.train()

        return trainer

def main():
    # Initialize wandb
    wandb.init(project="continued-finetuning")

    # Configuration
    config = {
        "base_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "checkpoint_path": "path/to/your/checkpoint",  # Optional
        "adapter_path": "path/to/your/lora",  # Optional
        "output_dir": "./continued_training",
        "dataset_config": {
            "path": "path/to/your/data.json",  # Or use name from HF hub
            "text_column": "text",
            "max_length": 512,
            "template": "{instruction}\n{input}\n{output}"  # Optional
        }
    }

    # Initialize checkpoint manager
    checkpoint_manager = CheckpointManager(
        base_model_id=config["base_model_id"],
        checkpoint_path=config.get("checkpoint_path"),
        adapter_path=config.get("adapter_path")
    )

    # Load model and tokenizer
    if config.get("checkpoint_path"):
        model, tokenizer = checkpoint_manager.load_checkpoint()
    else:
        model, tokenizer = checkpoint_manager.load_base_model()

    # Merge LoRA if provided
    if config.get("adapter_path"):
        model = checkpoint_manager.load_and_merge_lora(model)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=config["output_dir"],
        num_train_epochs=3,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        learning_rate=2e-5,
        weight_decay=0.001,
        logging_steps=10,
        save_strategy="epoch",
        evaluation_strategy="no",
        lr_scheduler_type="cosine",
        warmup_steps=100
    )

    # Initialize trainer
    trainer = ContinuedTrainer(
        model=model,
        tokenizer=tokenizer,
        training_args=training_args,
        dataset_config=config["dataset_config"]
    )

    # Setup LoRA for continued training
    model = trainer.setup_lora()

    # Start training
    trainer.train()

    # Save final checkpoint
    checkpoint_manager.save_checkpoint(
        model=model,
        tokenizer=tokenizer,
        output_dir=config["output_dir"]
    )

if __name__ == "__main__":
    main()

"""
Example JSON data format:
{
    "data": [
        {
            "instruction": "Translate the following English text to French",
            "input": "Hello, how are you?",
            "output": "Bonjour, comment allez-vous?"
        },
        ...
    ]
}
"""



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:


Abort: 

# **f.) finetune unsloth for mental health development chatbot**

In [4]:
# Required installations
!pip install -q unsloth[cu121] torch==2.1.2 transformers==4.37.2 accelerate==0.26.1 bitsandbytes==0.42.0
!pip install datasets wandb trl

import torch
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    BitsAndBytesConfig
)
from unsloth import FastLanguageModel
import pandas as pd
import json
from typing import Dict, List, Optional
import wandb
from datetime import datetime

class MentalHealthDataProcessor:
    """Handles mental health dataset preparation and processing"""

    def __init__(self, tokenizer, max_length: int = 512):
        self.tokenizer = tokenizer
        self.max_length = max_length

        # Define conversation templates
        self.templates = {
            "basic": (
                "<|system|>You are a supportive AI assistant trained to provide "
                "mental health information and emotional support. Always encourage "
                "professional help for serious concerns.</s>\n"
                "<|user|>{user_input}</s>\n"
                "<|assistant|>{assistant_response}</s>"
            ),
            "therapeutic": (
                "<|system|>You are a supportive AI assistant. Focus on empathy, "
                "validation, and encouraging professional help when needed. Never "
                "provide medical advice or diagnosis.</s>\n"
                "<|user|>{user_input}</s>\n"
                "<|assistant|>{assistant_response}</s>"
            )
        }

    def load_custom_dataset(self, file_path: str) -> Dataset:
        """Load dataset from JSON file"""
        if file_path.endswith('.json'):
            with open(file_path, 'r') as f:
                data = json.load(f)
        elif file_path.endswith('.csv'):
            data = pd.read_csv(file_path).to_dict('records')

        return Dataset.from_dict({
            'conversations': data
        })

    def format_conversation(self, example: Dict, template_type: str = "basic") -> str:
        """Format conversation using template"""
        template = self.templates[template_type]
        return template.format(
            user_input=example['user_input'],
            assistant_response=example['assistant_response']
        )

    def preprocess_dataset(self, dataset: Dataset, template_type: str = "basic"):
        """Preprocess and tokenize dataset"""

        def process_example(example):
            # Format conversation
            text = self.format_conversation(example, template_type)

            # Tokenize
            encodings = self.tokenizer(
                text,
                truncation=True,
                padding="max_length",
                max_length=self.max_length,
                return_tensors="pt"
            )

            return {
                "input_ids": encodings["input_ids"][0],
                "attention_mask": encodings["attention_mask"][0],
                "labels": encodings["input_ids"][0].clone()
            }

        return dataset.map(process_example, remove_columns=dataset.column_names)

class MentalHealthModelTrainer:
    """Handles model training for mental health chatbot"""

    def __init__(
        self,
        base_model_id: str,
        output_dir: str,
        max_seq_length: int = 512,
        load_in_4bit: bool = True
    ):
        self.base_model_id = base_model_id
        self.output_dir = output_dir
        self.max_seq_length = max_seq_length
        self.load_in_4bit = load_in_4bit

        # Initialize model and tokenizer
        self.setup_model_and_tokenizer()

    def setup_model_and_tokenizer(self):
        """Initialize model and tokenizer using Unsloth"""
        print(f"Loading model: {self.base_model_id}")

        # Load model and tokenizer using Unsloth
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=self.base_model_id,
            max_seq_length=self.max_seq_length,
            load_in_4bit=self.load_in_4bit,
            trust_remote_code=True
        )

        # Add special tokens for chat format
        special_tokens = {
            "pad_token": "<|pad|>",
            "sep_token": "</s>",
            "additional_special_tokens": ["<|system|>", "<|user|>", "<|assistant|>"]
        }
        self.tokenizer.add_special_tokens(special_tokens)
        self.model.resize_token_embeddings(len(self.tokenizer))

    def prepare_training_args(
        self,
        num_train_epochs: int = 3,
        per_device_train_batch_size: int = 4,
        gradient_accumulation_steps: int = 4
    ):
        """Prepare training arguments"""
        return TrainingArguments(
            output_dir=self.output_dir,
            num_train_epochs=num_train_epochs,
            per_device_train_batch_size=per_device_train_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            learning_rate=2e-5,
            weight_decay=0.01,
            logging_steps=10,
            save_strategy="epoch",
            evaluation_strategy="epoch",
            report_to="wandb",
            remove_unused_columns=False,
            lr_scheduler_type="cosine",
            warmup_steps=100,
            optim="adamw_torch",
            fp16=True,
            load_best_model_at_end=True,
            metric_for_best_model="loss"
        )

    def train(
        self,
        train_dataset: Dataset,
        val_dataset: Optional[Dataset] = None,
        training_args: Optional[TrainingArguments] = None
    ):
        """Execute training"""
        if training_args is None:
            training_args = self.prepare_training_args()

        # Configure Unsloth training
        self.model = FastLanguageModel.get_peft_model(
            self.model,
            r=16,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
            lora_alpha=16,
            lora_dropout=0.05,
            task_type="CAUSAL_LM"
        )

        # Train the model
        self.model.train()

        trainer = self.model.get_trainer(
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            args=training_args,
            max_seq_length=self.max_seq_length,
            packing=True
        )

        trainer.train()

        return trainer

def main():
    # Initialize wandb
    wandb.init(project="mental-health-chatbot")

    # Configuration
    config = {
        "base_model_id": "microsoft/phi-2",  # or another suitable base model
        "output_dir": "./mental_health_bot",
        "dataset_path": "path/to/your/mental_health_data.json",
        "max_seq_length": 512,
        "num_train_epochs": 3,
        "batch_size": 4
    }

    # Initialize trainer
    trainer = MentalHealthModelTrainer(
        base_model_id=config["base_model_id"],
        output_dir=config["output_dir"],
        max_seq_length=config["max_seq_length"]
    )

    # Initialize data processor
    data_processor = MentalHealthDataProcessor(
        tokenizer=trainer.tokenizer,
        max_length=config["max_seq_length"]
    )

    # Load and process dataset
    dataset = data_processor.load_custom_dataset(config["dataset_path"])
    processed_dataset = data_processor.preprocess_dataset(dataset, template_type="therapeutic")

    # Split dataset
    train_val = processed_dataset.train_test_split(test_size=0.1)

    # Training arguments
    training_args = trainer.prepare_training_args(
        num_train_epochs=config["num_train_epochs"],
        per_device_train_batch_size=config["batch_size"]
    )

    # Train model
    trainer.train(
        train_dataset=train_val["train"],
        val_dataset=train_val["test"],
        training_args=training_args
    )

    # Save model
    trainer.model.save_pretrained(config["output_dir"])
    trainer.tokenizer.save_pretrained(config["output_dir"])

"""
Example dataset format:
{
    "conversations": [
        {
            "user_input": "I've been feeling really anxious lately and having trouble sleeping.",
            "assistant_response": "I hear how difficult this has been for you. Anxiety and sleep problems can be really challenging. While I can share some general relaxation techniques, it's important to know that a mental health professional can provide personalized strategies. Would you like to talk about some simple calming techniques that others have found helpful?"
        },
        {
            "user_input": "Sometimes I feel overwhelmed by everything I need to do.",
            "assistant_response": "It's completely normal to feel overwhelmed sometimes. Let's break this down together. Could you tell me more about what's on your plate right now? We can explore some manageable steps while remembering that it's also okay to reach out to a counselor or therapist for additional support."
        }
    ]
}
"""

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

ModuleNotFoundError: No module named 'unsloth'

# **g.) use unsloth to finetune a model and export to ollama and show inference**

In [9]:
# Install required packages
!pip install -q unsloth accelerate transformers datasets bitsandbytes
!curl https://ollama.ai/install.sh | sh  # Install Ollama

import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)
from unsloth import FastLanguageModel
import json
import subprocess
import os
from typing import Dict, List, Optional
import requests

class ModelTrainer:
    """Handles model training and export to Ollama"""
    def __init__(
        self,
        base_model_id: str,
        output_dir: str,
        max_seq_length: int = 512,
        load_in_4bit: bool = True
    ):
        self.base_model_id = base_model_id
        self.output_dir = output_dir
        self.max_seq_length = max_seq_length
        self.load_in_4bit = load_in_4bit

        # Create output directory
        os.makedirs(output_dir, exist_ok=True)

        # Initialize model and tokenizer
        self.setup_model_and_tokenizer()

    def setup_model_and_tokenizer(self):
        """Initialize model and tokenizer using Unsloth"""
        print(f"Loading model: {self.base_model_id}")

        # Load model and tokenizer using Unsloth
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=self.base_model_id,
            max_seq_length=self.max_seq_length,
            load_in_4bit=self.load_in_4bit,
            trust_remote_code=True
        )

        # Add special tokens if needed
        special_tokens = {
            "pad_token": "<|pad|>",
            "eos_token": "</s>",
            "bos_token": "<s>"
        }
        self.tokenizer.add_special_tokens(special_tokens)
        self.model.resize_token_embeddings(len(self.tokenizer))

    def prepare_dataset(self, dataset_path: str):
        """Prepare dataset for training"""
        if dataset_path.endswith('.json'):
            with open(dataset_path, 'r') as f:
                data = json.load(f)
            dataset = load_dataset('json', data_files=dataset_path)
        else:
            dataset = load_dataset(dataset_path)

        def preprocess(examples):
            # Assuming format: instruction, input, output
            prompts = [
                f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n{output}"
                for instruction, input_text, output in zip(
                    examples['instruction'],
                    examples['input'],
                    examples['output']
                )
            ]

            return self.tokenizer(
                prompts,
                truncation=True,
                padding="max_length",
                max_length=self.max_seq_length
            )

        processed_dataset = dataset.map(
            preprocess,
            batched=True,
            remove_columns=dataset.column_names
        )

        return processed_dataset

    def train(
        self,
        train_dataset,
        num_train_epochs: int = 3,
        per_device_train_batch_size: int = 4,
        gradient_accumulation_steps: int = 4
    ):
        """Train the model"""
        # Configure Unsloth training
        self.model = FastLanguageModel.get_peft_model(
            self.model,
            r=16,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
            lora_alpha=16,
            lora_dropout=0.05,
            task_type="CAUSAL_LM"
        )

        # Training arguments
        training_args = {
            "num_train_epochs": num_train_epochs,
            "per_device_train_batch_size": per_device_train_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps,
            "learning_rate": 2e-5,
            "output_dir": self.output_dir,
            "save_steps": 100
        }

        # Train the model
        trainer = self.model.get_trainer(
            train_dataset=train_dataset,
            args=training_args,
            max_seq_length=self.max_seq_length,
            packing=True
        )

        trainer.train()
        return trainer

    def export_to_gguf(self):
        """Export model to GGUF format"""
        # Save model in Hugging Face format first
        self.model.save_pretrained(self.output_dir)
        self.tokenizer.save_pretrained(self.output_dir)

        # Convert to GGUF using llama.cpp
        gguf_output = os.path.join(self.output_dir, "model.gguf")
        command = (
            f"python -m llama_cpp.convert_hf_to_gguf "
            f"--outfile {gguf_output} "
            f"--input {self.output_dir}"
        )
        subprocess.run(command, shell=True, check=True)

        return gguf_output

    def export_to_ollama(self, model_name: str, api_base: str = "http://localhost:11434"):
        """Export model to Ollama"""
        gguf_path = self.export_to_gguf()

        # Upload model to Ollama
        url = f"{api_base}/api/upload"
        files = {"model": open(gguf_path, "rb")}
        response = requests.post(url, files=files)
        response.raise_for_status()
        model_id = response.json()["model_id"]

        # Update model configuration
        url = f"{api_base}/api/v1/push"
        url = f"{api_base}/api/v1/push"
        url = f"{api_base}/api/v1/push"
        payload = {
            "name": model_name,
            "base_model": model_id,
            "format": "ggml"
        }
        response = requests.post(url, json=payload)
        response.raise_for_status()
        print("Model configuration updated successfully.")
        return model_id


class OllamaInference:
    """Handles inference with Ollama"""
    def __init__(self, model_name: str, api_base: str = "http://localhost:11434"):
        self.model_name = model_name
        self.api_base = api_base

    def generate(
        self,
        prompt: str,
        system: str = "",
        temperature: float = 0.7,
        max_tokens: int = 500
    ) -> str:
        """Generate response using Ollama API"""
        url = f"{self.api_base}/api/generate"

        payload = {
            "model": self.model_name,
            "prompt": prompt,
            "system": system,
            "temperature": temperature,
            "max_tokens": max_tokens
        }

        response = requests.post(url, json=payload)
        response.raise_for_status()

        # Collect streaming response
        full_response = ""
        for line in response.iter_lines():
            if line:
                json_response = json.loads(line)
                full_response += json_response.get("response", "")

                if json_response.get("done", False):
                    break

        return full_response.strip()

    def chat(
        self,
        messages: List[Dict[str, str]],
        temperature: float = 0.7
    ) -> str:
        """Chat with the model using message format"""
        url = f"{self.api_base}/api/chat"

        payload = {
            "model": self.model_name,
            "messages": messages,
            "temperature": temperature
        }

        response = requests.post(url, json=payload)
        response.raise_for_status()

        return response.json()["message"]["content"]


def main():
    # Configuration
    config = {
        "base_model_id": "microsoft/phi-2",  # or another suitable base model
        "output_dir": "./exported_model",
        "dataset_path": "path/to/your/dataset.json",
        "ollama_model_name": "my-custom-model"
    }

    # Initialize trainer
    trainer = ModelTrainer(
        base_model_id=config["base_model_id"],
        output_dir=config["output_dir"]
    )

    # Prepare dataset and train
    dataset = trainer.prepare_dataset(config["dataset_path"])
    trainer.train(dataset)

    # Export to Ollama
    trainer.export_to_ollama(config["ollama_model_name"])

    # Initialize inference
    inference = OllamaInference(config["ollama_model_name"])

    # Example usage
    system_prompt = "You are a helpful AI assistant."
    user_prompt = "What is the capital of France?"

    # Generation example
    response = inference.generate(
        prompt=user_prompt,
        system=system_prompt
    )
    print("Generated Response:", response)

    # Chat example
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    chat_response = inference.chat(messages)
    print("Chat Response:", chat_response)


if __name__ == "__main__":
    main()


# Example dataset format:
"""
{
    "data": [
        {
            "instruction": "Answer the following question",
            "input": "What is the capital of France?",
            "output": "The capital of France is Paris."
        }
    ]
}
"""

# Usage Guide:
"""
1. Start Ollama server:
   ollama serve

2. Run the script to train and export:
   python script.py

3. Run inference using either the Python API:
   inference = OllamaInference("my-custom-model")
   response = inference.generate("What is the capital of France?")

4. Or use CLI:
   ollama run my-custom-model "What is the capital of France?"
"""

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.7/164.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.5/209.5 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.4/318.4 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.8/109.8 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.7/16.7 MB[0m [31m56.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx