<a href="https://colab.research.google.com/github/Praneeth-18/Unsloth---Tuning-and-experimenting-with-LLMs/blob/main/unsloth_assignments_tuning_and_experimenting_with_llms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q transformers datasets accelerate peft bitsandbytes
!pip install -q torch

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h

# **TinyLlama**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from transformers import BitsAndBytesConfig
import torch

def train_tinyllama():
    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        quantization_config=bnb_config,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    tokenizer.pad_token = tokenizer.eos_token

    # Print available GPU memory
    print(f"GPU memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

    # Load a very small dataset
    dataset = load_dataset("databricks/databricks-dolly-15k", split="train[:10]")

    # Print dataset example to debug
    print("\nDataset example:")
    print(dataset[0])

    # Format data with proper dictionary access
    def format_data(example):
        try:
            # Print example structure
            print("\nProcessing example:")
            print(example)

            return f"### Instruction: {example['instruction']}\n### Response: {example['response']}"
        except Exception as e:
            print(f"Error in format_data: {e}")
            print(f"Example structure: {example}")
            raise e

    # Process and tokenize dataset
    tokenized_dataset = dataset.map(
        lambda examples: {
            'input_ids': tokenizer(
                format_data(examples),
                truncation=True,
                max_length=512,
                padding="max_length",
                return_tensors=None
            )['input_ids'],
            'attention_mask': tokenizer(
                format_data(examples),
                truncation=True,
                max_length=512,
                padding="max_length",
                return_tensors=None
            )['attention_mask']
        }
    )

    # LoRA config
    config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, config)

    # Training arguments
    training_args = TrainingArguments(
        output_dir="tinyllama_results",
        num_train_epochs=1,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        save_steps=5,
        logging_steps=5,
        learning_rate=2e-4,
        weight_decay=0.01,
        warmup_steps=2,
        logging_dir="./logs",
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    # Train
    trainer.train()

    # Save
    model.save_pretrained("tinyllama_adapter")

    return model, tokenizer

# Test function
def test_model(model, tokenizer, prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory first
torch.cuda.empty_cache()

# Run training with error handling
try:
    print("Starting training...")
    model, tokenizer = train_tinyllama()

    print("\nTesting model...")
    test_prompt = "Explain what is machine learning in simple terms"
    result = test_model(model, tokenizer, test_prompt)
    print("\nTest Result:")
    print(result)

except Exception as e:
    print(f"An error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

Starting training...
GPU memory available: 15.84 GB

Dataset example:
{'instruction': 'When did Virgin Australia start operating?', 'context': "Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to use the Virgin brand. It commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route. It suddenly found itself as a major airline in Australia's domestic market after the collapse of Ansett Australia in September 2001. The airline has since grown to directly serve 32 cities in Australia, from hubs in Brisbane, Melbourne and Sydney.", 'response': 'Virgin Australia commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route.', 'category': 'closed_qa'}


Map:   0%|          | 0/10 [00:00<?, ? examples/s]


Processing example:
{'instruction': 'When did Virgin Australia start operating?', 'context': "Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to use the Virgin brand. It commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route. It suddenly found itself as a major airline in Australia's domestic market after the collapse of Ansett Australia in September 2001. The airline has since grown to directly serve 32 cities in Australia, from hubs in Brisbane, Melbourne and Sydney.", 'response': 'Virgin Australia commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route.', 'category': 'closed_qa'}

Processing example:
{'instruction': 'When did Virgin Australia start operating?', 'context': "Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to us

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 7


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:


An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-10-118a6cd8fc56>", line 121, in <cell line: 119>
    model, tokenizer = train_tinyllama()
  File "<ipython-input-10-118a6cd8fc56>", line 102, in train_tinyllama
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_t

# **Phi**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_phi():
    print("Starting Phi training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Phi model...")
    model = AutoModelForCausalLM.from_pretrained(
        "microsoft/phi-1_5",
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(
        "microsoft/phi-1_5",
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple dataset
    print("Creating dataset...")
    train_data = [
        {
            "instruction": "Write a Python function to add two numbers",
            "response": "def add_numbers(a, b):\n    return a + b"
        },
        {
            "instruction": "Create a function to check if a number is prime",
            "response": "def is_prime(n):\n    if n < 2:\n        return False\n    for i in range(2, int(n ** 0.5) + 1):\n        if n % i == 0:\n            return False\n    return True"
        },
        {
            "instruction": "Write a function to reverse a string",
            "response": "def reverse_string(s):\n    return s[::-1]"
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data
    def format_data(example):
        return f"### Instruction: {example['instruction']}\n### Response: {example['response']}"

    # Tokenize dataset
    print("Tokenizing dataset...")
    def tokenize_function(examples):
        formatted_text = format_data(examples)
        return tokenizer(
            formatted_text,
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        )

    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./phi_results",
        num_train_epochs=3,           # Train for a few epochs
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,              # Log every step
        save_steps=3,                 # Save every 3 steps
        max_steps=10                  # Only train for 10 steps
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./phi_adapter")

    return model, tokenizer

# Test function
def test_phi(model, tokenizer, prompt):
    print(f"\nTesting with prompt: {prompt}")
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory
print("Clearing GPU memory...")
torch.cuda.empty_cache()

# Main execution
try:
    print("\nStarting Phi training process...")
    model, tokenizer = train_phi()

    # Test the model
    print("\nTesting the model...")
    test_prompts = [
        "Write a Python function to calculate factorial",
        "Create a function to find the maximum number in a list"
    ]

    for prompt in test_prompts:
        result = test_phi(model, tokenizer, prompt)
        print(f"\nPrompt: {prompt}")
        print(f"Result: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./phi_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

Clearing GPU memory...

Starting Phi training process...
Starting Phi training setup...
Loading Phi model...


config.json:   0%|          | 0.00/736 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.84G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Creating dataset...
Dataset example:
{'instruction': 'Write a Python function to add two numbers', 'response': 'def add_numbers(a, b):\n    return a + b'}
Tokenizing dataset...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Setting up LoRA...


max_steps is given, it will override any value given in num_train_epochs


Setting up training arguments...
Initializing trainer...
Starting training...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:



An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-11-8a5ea4b83429>", line 144, in <cell line: 142>
    model, tokenizer = train_phi()
  File "<ipython-input-11-8a5ea4b83429>", line 117, in train_phi
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_train_begin
 

# **Gemma**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_gemma():
    print("Starting Gemma training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Gemma model...")
    model = AutoModelForCausalLM.from_pretrained(
        "google/gemma-2b",
        quantization_config=bnb_config,
        device_map="auto",
    )
    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple QA dataset
    print("Creating dataset...")
    train_data = [
        {
            "question": "What is machine learning?",
            "answer": "Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance without being explicitly programmed."
        },
        {
            "question": "How does photosynthesis work?",
            "answer": "Photosynthesis is the process where plants convert sunlight, water, and carbon dioxide into glucose and oxygen, providing energy for the plant to grow."
        },
        {
            "question": "What causes earthquakes?",
            "answer": "Earthquakes are caused by the movement of tectonic plates beneath Earth's surface. When these plates move or collide, they release energy in the form of seismic waves."
        },
        {
            "question": "What is the theory of relativity?",
            "answer": "The theory of relativity, developed by Einstein, describes how space and time are related and how mass and energy are equivalent (E=mc²)."
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data
    def format_data(example):
        return f"<start_of_turn>user\nQuestion: {example['question']}<end_of_turn>\n<start_of_turn>model\nAnswer: {example['answer']}<end_of_turn>"

    # Tokenize dataset
    print("Tokenizing dataset...")
    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./gemma_results",
        num_train_epochs=3,           # Train for a few epochs
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,              # Log every step
        save_steps=3,                 # Save every 3 steps
        max_steps=10                  # Only train for 10 steps
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./gemma_adapter")

    return model, tokenizer

# Test function
def test_gemma(model, tokenizer, question):
    print(f"\nTesting with question: {question}")
    prompt = f"<start_of_turn>user\nQuestion: {question}<end_of_turn>\n<start_of_turn>model\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory
print("Clearing GPU memory...")
torch.cuda.empty_cache()

# Main execution
try:
    print("\nStarting Gemma training process...")
    model, tokenizer = train_gemma()

    # Test the model
    print("\nTesting the model...")
    test_questions = [
        "What is the speed of light?",
        "How does the human brain work?",
        "What is quantum computing?"
    ]

    for question in test_questions:
        result = test_gemma(model, tokenizer, question)
        print(f"\nQuestion: {question}")
        print(f"Answer: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./gemma_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

# Optional: Convert for Ollama
def convert_for_ollama():
    print("\nConverting for Ollama...")
    !git clone https://github.com/ggerganov/llama.cpp
    %cd llama.cpp
    !pip install -r requirements.txt
    !python convert-lora-to-ggml.py ../gemma_adapter/adapter_config.json

Clearing GPU memory...

Starting Gemma training process...
Starting Gemma training setup...
Loading Gemma model...


config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Creating dataset...
Dataset example:
{'question': 'What is machine learning?', 'answer': 'Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance without being explicitly programmed.'}
Tokenizing dataset...


Map:   0%|          | 0/4 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


Setting up LoRA...
Setting up training arguments...
Initializing trainer...
Starting training...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:



An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-12-bb977126c7a6>", line 135, in <cell line: 133>
    model, tokenizer = train_gemma()
  File "<ipython-input-12-bb977126c7a6>", line 107, in train_gemma
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_train_beg

# **Qwen-1.5**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_qwen():
    print("Starting Qwen training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Qwen model...")
    model = AutoModelForCausalLM.from_pretrained(
        "Qwen/Qwen1.5-0.5B",
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(
        "Qwen/Qwen1.5-0.5B",
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple dataset for summarization
    print("Creating dataset...")
    train_data = [
        {
            "text": "The sun is a massive ball of gas primarily composed of hydrogen and helium. It undergoes nuclear fusion in its core, releasing enormous amounts of energy in the form of light and heat. This energy travels through space and reaches Earth, providing the heat and light necessary for life.",
            "summary": "The sun is a giant gas sphere that produces energy through nuclear fusion, providing Earth with essential light and heat."
        },
        {
            "text": "Artificial Intelligence (AI) is a field of computer science focused on creating intelligent machines that can perform tasks typically requiring human intelligence. These tasks include visual perception, speech recognition, decision-making, and language translation. AI systems learn from experience using machine learning algorithms.",
            "summary": "AI is computer technology that mimics human intelligence, performing tasks like recognition, decision-making, and translation through learning algorithms."
        },
        {
            "text": "Climate change refers to long-term shifts in global weather patterns and average temperatures. It's primarily caused by human activities releasing greenhouse gases into the atmosphere. These gases trap heat, leading to global warming, rising sea levels, and extreme weather events.",
            "summary": "Climate change involves long-term weather and temperature changes, mainly caused by human-produced greenhouse gases leading to global warming."
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data
    def format_data(example):
        return f"<|im_start|>user\nText to summarize: {example['text']}<|im_end|>\n<|im_start|>assistant\nSummary: {example['summary']}<|im_end|>"

    # Tokenize dataset
    print("Tokenizing dataset...")
    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./qwen_results",
        num_train_epochs=3,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,
        save_steps=3,
        max_steps=10
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./qwen_adapter")

    return model, tokenizer

# Test function
def test_qwen(model, tokenizer, text):
    print(f"\nTesting with text: {text[:100]}...")  # Print first 100 chars
    prompt = f"<|im_start|>user\nText to summarize: {text}<|im_end|>\n<|im_start|>assistant\nSummary:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU memory
print("Clearing GPU memory...")
torch.cuda.empty_cache()

# Main execution
try:
    print("\nStarting Qwen training process...")
    model, tokenizer = train_qwen()

    # Test the model
    print("\nTesting the model...")
    test_texts = [
        "Neural networks are computational systems inspired by biological neural networks in human brains. They consist of interconnected nodes that process and transmit information, allowing the system to learn patterns and make predictions.",
        "The water cycle, also known as the hydrologic cycle, describes the continuous movement of water on Earth. Water evaporates from surfaces, forms clouds, falls as precipitation, and flows through rivers back to the oceans."
    ]

    for text in test_texts:
        result = test_qwen(model, tokenizer, text)
        print(f"\nOriginal Text: {text[:100]}...")
        print(f"Generated Summary: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./qwen_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

# Optional: Convert for Ollama
def convert_for_ollama():
    print("\nConverting for Ollama...")
    !git clone https://github.com/ggerganov/llama.cpp
    %cd llama.cpp
    !pip install -r requirements.txt
    !python convert-lora-to-ggml.py ../qwen_adapter/adapter_config.json

Clearing GPU memory...

Starting Qwen training process...
Starting Qwen training setup...
Loading Qwen model...


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Creating dataset...
Dataset example:
{'text': 'The sun is a massive ball of gas primarily composed of hydrogen and helium. It undergoes nuclear fusion in its core, releasing enormous amounts of energy in the form of light and heat. This energy travels through space and reaches Earth, providing the heat and light necessary for life.', 'summary': 'The sun is a giant gas sphere that produces energy through nuclear fusion, providing Earth with essential light and heat.'}
Tokenizing dataset...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Setting up LoRA...


max_steps is given, it will override any value given in num_train_epochs


Setting up training arguments...
Initializing trainer...
Starting training...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:



An error occurred: 


Traceback (most recent call last):
  File "<ipython-input-13-7c3b3ffeebf2>", line 135, in <cell line: 133>
    model, tokenizer = train_qwen()
  File "<ipython-input-13-7c3b3ffeebf2>", line 107, in train_qwen
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2202, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 460, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py", line 507, in call_event
    result = getattr(callback, event)(
  File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/integration_utils.py", line 900, in on_train_begin

# **Mistral-7B-Instruct-v0.2**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
from transformers import BitsAndBytesConfig
import torch

def train_mistral():
    print("Starting Mistral training setup...")

    # Setup quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load model
    print("Loading Mistral model...")
    model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-v0.1",
        quantization_config=bnb_config,
        device_map="auto",
    )
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
    tokenizer.pad_token = tokenizer.eos_token

    # Create simple instruction dataset
    print("Creating dataset...")
    train_data = [
        {
            "instruction": "Explain quantum computing",
            "response": "Quantum computing uses quantum mechanics principles like superposition and entanglement to perform computations. Unlike classical computers that use bits (0 or 1), quantum computers use quantum bits or qubits that can exist in multiple states simultaneously, potentially solving complex problems much faster."
        },
        {
            "instruction": "What is machine learning?",
            "response": "Machine learning is a branch of artificial intelligence where computer systems learn and improve from experience without explicit programming. They identify patterns in data to make predictions and decisions, becoming more accurate over time."
        },
        {
            "instruction": "How does blockchain work?",
            "response": "Blockchain is a decentralized digital ledger that records transactions across a network of computers. Each block contains transaction data and links to the previous block, creating a chain. The system is secure because changes require consensus from the network, making it difficult to alter records."
        }
    ]

    dataset = Dataset.from_list(train_data)

    print("Dataset example:")
    print(dataset[0])

    # Format data with Mistral chat template
    def format_data(example):
        return f"<s>[INST] {example['instruction']} [/INST] {example['response']}</s>"

    # Tokenize dataset
    print("Tokenizing dataset...")
    tokenized_dataset = dataset.map(
        lambda x: tokenizer(
            format_data(x),
            truncation=True,
            max_length=512,
            padding="max_length",
            return_tensors=None
        ),
        remove_columns=dataset.column_names
    )

    print("Setting up LoRA...")
    # LoRA configuration
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj"
        ],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Apply LoRA
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()  # Print trainable parameters

    print("Setting up training arguments...")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./mistral_results",
        num_train_epochs=3,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=1,
        save_steps=3,
        max_steps=10,
        fp16=True,  # Use mixed precision
        optim="paged_adamw_8bit"  # Use 8-bit optimizer
    )

    print("Initializing trainer...")
    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    print("Starting training...")
    # Train
    trainer.train()

    print("Saving model...")
    # Save the fine-tuned model
    model.save_pretrained("./mistral_adapter")

    return model, tokenizer

# Test function
def test_mistral(model, tokenizer, instruction):
    print(f"\nTesting with instruction: {instruction}")
    prompt = f"<s>[INST] {instruction} [/INST]"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=200,
        temperature=0.7,
        num_return_sequences=1,
        do_sample=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Memory management function
def clear_memory():
    import gc
    gc.collect()
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        print(f"GPU Memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Main execution
try:
    print("\nClearing memory...")
    clear_memory()

    print("\nStarting Mistral training process...")
    model, tokenizer = train_mistral()

    # Test the model
    print("\nTesting the model...")
    test_instructions = [
        "Explain how solar panels work",
        "What is the theory of evolution?",
        "How does the internet work?"
    ]

    for instruction in test_instructions:
        result = test_mistral(model, tokenizer, instruction)
        print(f"\nInstruction: {instruction}")
        print(f"Response: {result}")

    # Save to Drive
    print("\nSaving to Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')
    !cp -r ./mistral_adapter '/content/drive/MyDrive/'

except Exception as e:
    print(f"\nAn error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

# Convert for Ollama
def convert_for_ollama():
    !git clone https://github.com/ggerganov/llama.cpp
    %cd llama.cpp
    !pip install -r requirements.txt
    !python convert-lora-to-ggml.py ../mistral_adapter/adapter_config.json

    # Create Modelfile with proper string formatting
    modelfile_content = '''
FROM mistral:latest
ADAPTER ./ggml-adapter-model.bin
TEMPLATE """<s>[INST] {{ .Prompt }} [/INST]"""
PARAMETER stop "[INST]"
PARAMETER stop "</s>"
'''

    with open("Modelfile", "w") as f:
        f.write(modelfile_content)

    print("Created Modelfile for Ollama")


Clearing memory...
GPU Memory available: 15.84 GB

Starting Mistral training process...
Starting Mistral training setup...
Loading Mistral model...

An error occurred: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.
403 Client Error. (Request ID: Root=1-67229ce1-08c0444f27bd206e0e06af16;283224c8-c909-4d2c-9460-5e77c7cca34e)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.
Access to model mistralai/Mistral-7B-v0.1 is restricted and you are not in the authorized list. Visit https://huggingface.co/mistralai/Mistral-7B-v0.1 to ask for access.


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
    response.raise_for_status()
  File "/usr/local/lib/python3.10/dist-packages/requests/models.py", line 1024, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 403 Client Error: Forbidden for url: https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py", line 402, in cached_file
    resolved_file = hf_hub_download(
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py", line 101, in inner_f
    return f(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)


# **b.) Continued pretraining**

In [None]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2024.10.7-py3-none-any.whl.metadata (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth-zoo (from unsloth)
  Downloading unsloth_zoo-2024.10.5-py3-none-any.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.28.post2-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.8.14-py3-none-any.whl.metadata (8.4 kB)
Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.11.1,>=0.7.9 (from unsloth)
  Downloading trl-0.11.1-py3-none-any.whl.metadata (12 kB)
Collecting hf-transfer (from unsloth)
  Downloa

In [None]:
from huggingface_hub import login
from getpass import getpass

# Prompt for Hugging Face token securely
hf_token = getpass("Please enter your Hugging Face token: ")

# Authenticate with Hugging Face
login(token=hf_token)

# Now load your model
from unsloth import FastLanguageModel

model_name = "mistralai/Mistral-7B"  # Replace with the model name

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=512,
    dtype="float16",
    load_in_4bit=True,
    token=hf_token  # Pass token for private model access
)


Please enter your Hugging Face token: ··········
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


RuntimeError: mistralai/Mistral-7B is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [None]:
from unsloth import FastLanguageModel

# Define your model name (e.g., a smaller Hugging Face model for Colab)
model_name = "mistralai/Mistral-7B"  # Replace with the desired model

# Load the model and tokenizer with settings optimized for Colab
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=512,
    dtype="float16",  # Helps reduce memory usage
    load_in_4bit=True,  # Further reduces memory load
    token=os.environ["HUGGINGFACE_TOKEN"]  # Fetches token from environment
)


NameError: name 'os' is not defined