# Part 1: Inference

In [None]:
# Install the required libraries for transformers (Hugging Face library for NLP)
!pip install transformers

In [None]:
from transformers import pipeline

# Specify the model to use (pretrained from Hugging Face)
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # You can use other models or your own fine-tuned one model here
task = "text-generation"
prompt = "Explain how neural networks work in simple terms"

# Create a text-generation pipeline
generator = pipeline(task, model=model_name)

# Generate text based on the given prompt
results = generator(prompt, max_length=100, truncation=True, num_return_sequences=1)

# Print the generated output
text = results[0].get("generated_text")  # Extract generated text
print(f"Generated text:\n{text}")

# Part 2: Fine-Tuning

In [None]:
# Install the required libraries for transformers, dataset handling, and logging
!pip install -q transformers datasets wandb

In [None]:
from google.colab import userdata
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
)
import wandb

# Log in to Weights & Biases (W&B) using Colab secrets
wandb.login(key=userdata.get('WANDB_API_KEY'))

# Define the model for fine-tuning
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Create a small custom dataset for training and validation
train_data = {
    "text": [
        "Artificial Intelligence is transforming industries by automating tasks and enabling smarter decision-making.",
        "Machine learning algorithms learn patterns from data to make predictions and improve over time.",
        "Neural networks, inspired by the human brain, are the backbone of modern deep learning systems.",
        "Self-driving cars rely on AI to navigate roads, detect obstacles, and make real-time decisions.",
    ]
}

val_data = {
    "text": [
        "AI has the potential to solve some of the world's most pressing challenges, from healthcare to climate change.",
        "The collaboration between humans and AI will define the next era of technological innovation."
    ]
}

In [None]:
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
# For models like GPT-2 that don't have a pad token, assign the eos token as pad_token.
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load the model
model = AutoModelForCausalLM.from_pretrained(model_name)

# Convert dataset into Hugging Face Dataset format
train_dataset = Dataset.from_dict(train_data)
val_dataset = Dataset.from_dict(val_data)

# Define tokenization function with truncation and padding
max_length = 64  # Maximum sequence length

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)

# Apply tokenization to datasets
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

# Add labels by copying input_ids
# The labels are simply the input_ids because the model learns to predict the next token.
tokenized_train = tokenized_train.map(lambda examples: {"labels": examples["input_ids"]})
tokenized_val = tokenized_val.map(lambda examples: {"labels": examples["input_ids"]})

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="AI-Tuned-DeepSeek-R1-Distill-Qwen-1.5B",  # Directory to save model checkpoints
    learning_rate=0.0001,  # Learning rate for fine-tuning
    per_device_train_batch_size=2,  # Reduce batch size for low memory usage
    per_device_eval_batch_size=2,
    eval_strategy="steps",  # Evaluate periodically during training
    num_train_epochs=1,  # Number of training epochs
    eval_steps=2,  # Perform evaluation every 2 steps
    logging_steps=2,  # Log training details every 2 steps
    save_steps=2,  # Save model checkpoints every 2 steps
    load_best_model_at_end=True,  # Load best model after training
    report_to=["wandb"],  # Log metrics to Weights & Biases
    push_to_hub=True,  # Upload model to Hugging Face Hub
)

# Initialize the Trainer class
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    processing_class=tokenizer,
)

# Fine-tune the model (this will log training metrics to W&B and evaluate on the validation set)
trainer.train()