print("🔧 Installing packages (Colab-safe versions)...")
import subprocess
import sys
import os

# First, ensure we have clean installations
packages_to_uninstall = ["bitsandbytes", "triton"]
for pkg in packages_to_uninstall:
    try:
        subprocess.run([sys.executable, "-m", "pip", "uninstall", pkg, "-y"],
                      capture_output=True, check=False)
    except:
        pass

# Install core packages without problematic dependencies
core_packages = [
    "transformers>=4.47.0",
    "torch",
    "datasets",
    "peft>=0.8.0",
    "trl>=0.7.0",
    "accelerate",
    "huggingface_hub",
]

for package in core_packages:
    try:
        result = subprocess.run([sys.executable, "-m", "pip", "install", package],
                              capture_output=True, text=True, timeout=300)
        if result.returncode == 0:
            print(f"✅ {package} installed")
        else:
            print(f"⚠️ {package} had issues: {result.stderr}")
    except Exception as e:
        print(f"⚠️ {package} installation failed: {e}")

# Import with error handling
print("📦 Importing libraries...")
try:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
    from datasets import Dataset
    from peft import LoraConfig, get_peft_model
    from huggingface_hub import login
    print("✅ Core imports successful")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("🔄 Trying alternative imports...")

# Try importing TRL with fallbacks
try:
    from trl import SFTTrainer
    TRL_AVAILABLE = True
    print("✅ TRL SFTTrainer available")
except ImportError:
    print("⚠️ TRL SFTTrainer not available, will use alternative")
    TRL_AVAILABLE = False

print(f"🖥️ PyTorch: {torch.__version__}")
print(f"🚀 CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"💾 GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# Set device and memory management
device = "cuda" if torch.cuda.is_available() else "cpu"
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# Login to Hugging Face
HF_TOKEN = "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"
os.environ["HF_TOKEN"] = HF_TOKEN
login(token=HF_TOKEN)
print("✅ Logged in to Hugging Face")

# Model loading with better error handling
model_name = "HuggingFaceTB/SmolLM3-3B"
print(f"🔄 Loading {model_name}...")

try:
    # Load tokenizer first
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
        use_fast=True
    )

    # Fix tokenizer - CRITICAL: Use a different pad token
    if tokenizer.pad_token is None:
        # Use a special token for padding instead of EOS
        tokenizer.add_special_tokens({'pad_token': '<|pad|>'})

    print("✅ Tokenizer loaded")

    # Load model with conservative settings
    print("📦 Loading model (this may take 5-10 minutes)...")

    try:
        # First attempt: Load normally
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            trust_remote_code=True,
            low_cpu_mem_usage=True,
            device_map=None,  # Don't use device_map to avoid meta tensor issues
            use_cache=False  # Disable caching for training
        )

        # Manually move to device after loading
        if torch.cuda.is_available():
            model = model.to(device)

    except Exception as e:
        print(f"⚠️ Standard loading failed: {e}")
        print("🔄 Trying alternative loading method...")

        # Alternative: Load without low_cpu_mem_usage
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32,  # Use float32 for compatibility
            trust_remote_code=True,
            low_cpu_mem_usage=False,
            use_cache=False
        )

        # Move to device and convert to appropriate dtype
        if torch.cuda.is_available():
            model = model.to(device)
            if torch.cuda.get_device_capability()[0] >= 7:  # Check if supports fp16
                model = model.half()

    # Resize token embeddings if we added new tokens
    if len(tokenizer) > model.config.vocab_size:
        model.resize_token_embeddings(len(tokenizer))

    # Ensure model is on the correct device after resizing
    if torch.cuda.is_available():
        model = model.to(device)

    print("✅ Model loaded successfully")
    print(f"📊 Model size: {sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters")
    print(f"🎯 Model device: {next(model.parameters()).device}")
    print(f"📊 Model dtype: {next(model.parameters()).dtype}")

except Exception as e:
    print(f"❌ Model loading failed: {e}")
    print("🔄 Trying with smaller fallback model...")

    # Fallback to smaller model
    model_name = "HuggingFaceTB/SmolLM2-1.7B"
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.add_special_tokens({'pad_token': '<|pad|>'})

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float32,
        trust_remote_code=True,
        low_cpu_mem_usage=False,
        use_cache=False
    )

    if len(tokenizer) > model.config.vocab_size:
        model.resize_token_embeddings(len(tokenizer))

    if torch.cuda.is_available():
        model = model.to(device)

# Create training dataset with proper formatting
print("📚 Preparing training data...")

# Better training examples with clear conversation boundaries
training_conversations = [
    # Greetings and introductions
    [{"role": "user", "content": "Hello"},
     {"role": "assistant", "content": "Hello! How can I help you today?"}],

    [{"role": "user", "content": "Hi there"},
     {"role": "assistant", "content": "Hi! I'm here to assist you. What would you like to know?"}],

    [{"role": "user", "content": "Good morning"},
     {"role": "assistant", "content": "Good morning! I hope you're having a great day. How can I help?"}],

    # Questions about capabilities
    [{"role": "user", "content": "What can you do?"},
     {"role": "assistant", "content": "I can help with questions, provide explanations, assist with writing, and have conversations on various topics. What would you like help with?"}],

    [{"role": "user", "content": "Tell me about yourself"},
     {"role": "assistant", "content": "I'm an AI assistant designed to be helpful, informative, and engaging. I enjoy helping people with their questions and tasks."}],

    # Helpful responses
    [{"role": "user", "content": "I need help with something"},
     {"role": "assistant", "content": "I'd be happy to help! Could you tell me more about what you need assistance with?"}],

    [{"role": "user", "content": "Can you explain this?"},
     {"role": "assistant", "content": "Of course! I'd be glad to explain. What specifically would you like me to clarify?"}],

    # Polite interactions
    [{"role": "user", "content": "Thank you"},
     {"role": "assistant", "content": "You're very welcome! I'm glad I could help. Is there anything else I can assist you with?"}],

    [{"role": "user", "content": "Thanks for your help"},
     {"role": "assistant", "content": "My pleasure! I'm always here if you need more assistance."}],

    # Knowledge queries
    [{"role": "user", "content": "Tell me a fun fact"},
     {"role": "assistant", "content": "Here's a fun fact: Octopuses have three hearts and blue blood! Two hearts pump blood to their gills, while the third pumps blood to the rest of their body."}],

    [{"role": "user", "content": "How are you?"},
     {"role": "assistant", "content": "I'm doing well, thank you for asking! I'm ready to help you with whatever you need."}],
]

# Create expanded dataset with better formatting
def create_training_text(conversation):
    """Create properly formatted training text"""
    text = ""
    for message in conversation:
        role = message["role"]
        content = message["content"]
        if role == "user":
            text += f"User: {content}\n"
        else:
            text += f"Assistant: {content}\n"
    text += "<|endoftext|>"  # Clear end marker
    return text

# Create training data
training_data = []
for conversation in training_conversations:
    training_data.append({
        "text": create_training_text(conversation)
    })

# Multiply dataset for more training examples
training_data = training_data * 20  # 220 examples total

dataset = Dataset.from_list(training_data)

# Filter out any problematic samples
dataset = dataset.filter(lambda x: len(x["text"]) > 10 and len(x["text"]) < 1000)

print(f"✅ Dataset ready: {len(dataset)} training samples")
print(f"📝 Sample text: {dataset[0]['text'][:100]}...")

# Configure LoRA for memory efficiency
print("⚙️ Setting up LoRA...")

# More conservative LoRA config
lora_config = LoraConfig(
    r=4,  # Smaller rank for stability
    lora_alpha=8,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Standard attention modules
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA
model = get_peft_model(model, lora_config)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"📊 Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")

# Training configuration with better settings
print("🏋️ Configuring training...")

training_args = TrainingArguments(
    output_dir="./smollm3_fixed",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,  # Lower learning rate for stability
    num_train_epochs=2,
    max_steps=50,  # Fewer steps for testing
    warmup_steps=5,
    logging_steps=5,
    save_strategy="steps",
    save_steps=25,
    optim="adamw_torch",
    fp16=torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 7,
    bf16=False,
    gradient_checkpointing=False,  # Disable to avoid caching issues
    dataloader_pin_memory=False,
    remove_unused_columns=False,
    report_to="none",
    push_to_hub=False,
    lr_scheduler_type="cosine",
    weight_decay=0.01,
)

# Create trainer with improved error handling
if TRL_AVAILABLE:
    print("🚀 Using SFTTrainer...")
    try:
        from trl import SFTTrainer

        # Custom formatting function for better control
        def formatting_prompts_func(examples):
            texts = []
            for text in examples["text"]:
                # Ensure text ends properly
                if not text.endswith("<|endoftext|>"):
                    text += "<|endoftext|>"
                texts.append(text)
            return texts

        trainer = SFTTrainer(
            model=model,
            train_dataset=dataset,
            args=training_args,
            tokenizer=tokenizer,
            formatting_func=formatting_prompts_func,
            max_seq_length=128,  # Shorter sequences
            packing=False,
            dataset_num_proc=1,  # Single process to avoid issues
        )
        print("✅ SFTTrainer initialized")
    except Exception as e:
        print(f"⚠️ SFTTrainer failed: {e}")
        TRL_AVAILABLE = False

if not TRL_AVAILABLE:
    print("🔄 Using standard Trainer...")
    from transformers import Trainer, DataCollatorForLanguageModeling

    # Tokenize dataset properly - fix the batching issue
    def tokenize_function(examples):
        # Process each text individually to avoid tensor shape issues
        tokenized_inputs = []
        tokenized_labels = []

        for text in examples["text"]:
            # Tokenize individual text
            tokens = tokenizer(
                text,
                truncation=True,
                max_length=128,
                padding=False,
                add_special_tokens=True
            )
            tokenized_inputs.append(tokens["input_ids"])
            tokenized_labels.append(tokens["input_ids"].copy())  # Labels same as input for CLM

        return {
            "input_ids": tokenized_inputs,
            "labels": tokenized_labels
        }

    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=dataset.column_names,
        batch_size=1000  # Process in smaller batches
    )

    # Custom data collator that handles variable lengths properly
    def custom_data_collator(features):
        # Extract input_ids and labels
        input_ids = [f["input_ids"] for f in features]
        labels = [f["labels"] for f in features]

        # Pad sequences to the same length
        max_length = max(len(ids) for ids in input_ids)
        max_length = min(max_length, 128)  # Cap at 128

        padded_input_ids = []
        padded_labels = []
        attention_masks = []

        for ids, lbls in zip(input_ids, labels):
            # Truncate if too long
            if len(ids) > max_length:
                ids = ids[:max_length]
                lbls = lbls[:max_length]

            # Create attention mask
            attention_mask = [1] * len(ids)

            # Pad sequences
            pad_length = max_length - len(ids)
            if pad_length > 0:
                ids.extend([tokenizer.pad_token_id] * pad_length)
                lbls.extend([-100] * pad_length)  # -100 is ignored in loss computation
                attention_mask.extend([0] * pad_length)

            padded_input_ids.append(ids)
            padded_labels.append(lbls)
            attention_masks.append(attention_mask)

        return {
            "input_ids": torch.tensor(padded_input_ids, dtype=torch.long),
            "labels": torch.tensor(padded_labels, dtype=torch.long),
            "attention_mask": torch.tensor(attention_masks, dtype=torch.long)
        }

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=custom_data_collator,
    )

# Start training with better error handling
print("🚀 Starting training...")
try:
    # Clear any cached states
    if hasattr(model, 'gradient_checkpointing_enable'):
        model.gradient_checkpointing_disable()

    trainer.train()
    print("✅ Training completed successfully!")

except Exception as e:
    print(f"❌ Training failed: {e}")
    print("💡 This might be due to memory issues or model compatibility")
    import traceback
    traceback.print_exc()

# Save model
print("💾 Saving model...")
try:
    trainer.save_model()
    tokenizer.save_pretrained("./smollm3_fixed")

    # Upload to HuggingFace
    model.push_to_hub(
        "soupstick/smollm3-fixed",
        private=True,
        use_auth_token=HF_TOKEN
    )
    tokenizer.push_to_hub(
        "soupstick/smollm3-fixed",
        private=True,
        use_auth_token=HF_TOKEN
    )
    print("✅ Model saved and uploaded!")

except Exception as e:
    print(f"⚠️ Upload error: {e}")
    print("Model saved locally in './smollm3_fixed'")

# Test the model with better generation settings
print("🧪 Testing the fine-tuned model...")
try:
    # Ensure model is in eval mode
    model.eval()

    test_prompts = [
        "User: Hello\nAssistant:",
        "User: How are you?\nAssistant:",
        "User: What can you do?\nAssistant:"
    ]

    for test_prompt in test_prompts:
        print(f"\n🔤 Testing: {test_prompt}")

        inputs = tokenizer.encode(test_prompt, return_tensors="pt")
        if torch.cuda.is_available():
            inputs = inputs.to(device)

        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=30,
                do_sample=True,
                temperature=0.8,
                top_p=0.9,
                top_k=50,
                repetition_penalty=1.1,  # Prevent repetition
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                use_cache=True  # Enable cache for generation
            )

        # Decode response
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        new_text = full_response[len(test_prompt):].strip()

        print(f"🤖 Response: {new_text}")

except Exception as e:
    print(f"⚠️ Testing failed: {e}")
    import traceback
    traceback.print_exc()

print("\n🎉 Fixed fine-tuning completed!")
print(f"📍 Model: {model_name}")
print(f"🔗 HuggingFace: soupstick/smollm3-fixed")
print("🚀 Ready for your RAG chatbot!")

# Clean up memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
import gc
gc.collect()
print("🧹 Memory cleaned up")

# Fine Tuning V2

In [1]:
print("🔧 Installing packages (Colab-safe versions)...")
import subprocess
import sys
import os

# Uninstall problematic packages cleanly if present
packages_to_uninstall = ["triton"]
for pkg in packages_to_uninstall:
    try:
        subprocess.run([sys.executable, "-m", "pip", "uninstall", pkg, "-y"],
                       capture_output=True, check=False)
    except Exception:
        pass

# Core packages with specific version constraints for compatibility
core_packages = [
    "torch>=2.1.0",                         # stable torch version with CUDA support
    "transformers>=4.53.0",                 # supports newer LLMs like phi-3, smolLM
    "datasets>=2.18.0",                     # stable for Dataset()
    "peft>=0.8.0",                          # LoRA and parameter-efficient tuning
    "trl>=0.7.0",                           # for SFTTrainer
    "accelerate>=0.25.0",                   # required for trainer
    "huggingface_hub>=0.21.4",             # for model upload
    "feedparser>=6.0.11",                   # for RSS parsing
    "ddgs>=1.0.5",                          # updated DuckDuckGo search wrapper
    "requests",                             # used in RSS/HTTP fetches
    "bitsandbytes>=0.41.3"
]

for package in core_packages:
    try:
        result = subprocess.run([sys.executable, "-m", "pip", "install", package],
                                capture_output=True, text=True, timeout=300)
        if result.returncode == 0:
            print(f"✅ {package} installed")
        else:
            print(f"⚠️ {package} had issues: {result.stderr}")
    except Exception as e:
        print(f"❌ {package} installation failed: {e}")

# Import core libraries
print("📦 Importing libraries...")
try:
    import torch
    import requests
    import feedparser
    from ddgs import DDGS
    from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
    from datasets import Dataset
    from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
    from huggingface_hub import login
    print("✅ Core imports successful")
except ImportError as e:
    print(f"❌ Import error: {e}")

# Import TRL trainer with fallback flag
try:
    from trl import SFTTrainer
    TRL_AVAILABLE = True
    print("✅ TRL SFTTrainer available")
except ImportError:
    print("⚠️ TRL SFTTrainer not available, will use alternative")
    TRL_AVAILABLE = False

# Environment info
print(f"🖥️ PyTorch: {torch.__version__}")
print(f"🚀 CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"💾 GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

🔧 Installing packages (Colab-safe versions)...
✅ torch>=2.1.0 installed
✅ transformers>=4.53.0 installed
✅ datasets>=2.18.0 installed
✅ peft>=0.8.0 installed
✅ trl>=0.7.0 installed
✅ accelerate>=0.25.0 installed
✅ huggingface_hub>=0.21.4 installed
✅ feedparser>=6.0.11 installed
✅ ddgs>=1.0.5 installed
✅ requests installed
✅ bitsandbytes>=0.41.3 installed
📦 Importing libraries...


2025-07-21 12:05:39.197793: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753099539.563227      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753099539.672742      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


✅ Core imports successful
✅ TRL SFTTrainer available
🖥️ PyTorch: 2.6.0+cu124
🚀 CUDA available: True
💾 GPU memory: 15.8 GB


In [2]:
import time
from datetime import datetime
from typing import List, Dict
import os
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login
import torch
from trl import SFTTrainer

# Login to Hugging Face
login(token="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")

# Read Hugging Face token from Kaggle secrets
secret_label = "HF_TOKEN"  # Make sure this label matches your secret name in Kaggle
secret_value = UserSecretsClient().get_secret(secret_label)

In [6]:
# --- Config ---
MODEL_NAME = "HuggingFaceTB/SmolLM3-3B"
BATCH_SIZE = 4
LR = 2e-5
EPOCHS = 2
USE_4BIT = True
MAX_SAMPLES = 1000  # Total samples for quick fine-tuning
CUTOFF_DATE = datetime(2025, 7, 20)
OFFLOAD_DIR = "/content/offload"

# --- Tokenizer & Model ---
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

# --- Quantization Config ---
from transformers import BitsAndBytesConfig
bnb_config = {
    "load_in_4bit": USE_4BIT,
    "bnb_4bit_compute_dtype": torch.float16,
    "bnb_4bit_use_double_quant": True,
    "bnb_4bit_quant_type": "nf4",
}

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16,
    offload_folder = OFFLOAD_DIR,
    quantization_config=bnb_config,
    trust_remote_code=True
)

# --- QLoRA ---
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"]
)
model = get_peft_model(model, lora_config)

# --- Dataset Preparation ---
dataset = load_dataset("mlabonne/guanaco-llama2-1k", split="train")

# --- Training Setup ---
training_args = TrainingArguments(
    output_dir="./smollm3-qlora-ft",
    per_device_train_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    save_strategy="epoch",
    learning_rate=LR,
    bf16=True,
    logging_steps=10,
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=training_args
)

# --- Fine-Tune ---
trainer.train()
trainer.save_model("./smollm3-qlora-ft")
print("✅ Fine-tuning complete.")

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/289 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/182 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001-9ad84bb9cf65a4(…):   0%|          | 0.00/967k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  return fn(*args, **kwargs)


Step,Training Loss
10,1.5
20,1.4619
30,1.4468
40,1.3503
50,1.4651
60,1.3775
70,1.3386
80,1.4542
90,1.6007
100,1.274


  return fn(*args, **kwargs)


✅ Fine-tuning complete.


In [8]:
repo_id = "soupstick/smollm3-qlora-ft"
model.push_to_hub(repo_id, private=True, token=os.getenv("HF_TOKEN"))
tokenizer.push_to_hub(repo_id, private=True, token=os.getenv("HF_TOKEN"))
print("✅ Model and tokenizer pushed to", repo_id)

Uploading...:   0%|          | 0.00/60.5M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Uploading...:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

✅ Model and tokenizer pushed to soupstick/smollm3-qlora-ft


# Fine Tune V3