# Part 0: Setup

In [1]:
# Install PEFT along with dependencies
!pip install -q peft transformers accelerate bitsandbytes

In [12]:
import os
from dotenv import load_dotenv
import torch
import platform
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import pandas as pd


In [3]:
# RUN THIS CELL ONLY IF RUNNING ON PACE-ICE
# override the huggingface cache path and nltk cache path
dirs = {
    "HF_HOME":"~/scratch/hf_cache",
    "TRITON_CACHE_DIR":"~/scratch/triton_cache",
    "TORCHINDUCTOR_CACHE_DIR":"~/scratch/inductor_cache",
    'NLTK_DATA':"~/scratch/nltk_data"
}

for name in dirs:
    d = dirs[name]
    path = os.path.expanduser(d)
    print(name)
    print(path)
    os.makedirs(path, exist_ok=True)
    # making sure the cache dirs are rwx for owner
    os.chmod(path, 0o700)
    os.environ[name] = path
print("Make sure the cache files are in ~/scratch/ so quota doesn't exceed limit!")

HF_HOME
/home/hice1/yhsu72/scratch/hf_cache
TRITON_CACHE_DIR
/home/hice1/yhsu72/scratch/triton_cache
TORCHINDUCTOR_CACHE_DIR
/home/hice1/yhsu72/scratch/inductor_cache
NLTK_DATA
/home/hice1/yhsu72/scratch/nltk_data
Make sure the cache files are in ~/scratch/ so quota doesn't exceed limit!


In [None]:
# Change this to your own token (or save in .env)
os.environ['HF_TOKEN'] = ''

In [5]:
load_dotenv()  # loads HF_TOKEN into environment
print("‚úÖ Hugging Face token loaded from environment.")

‚úÖ Hugging Face token loaded from environment.


In [6]:
print("=== üß† Environment Info ===")
print(f"Python version: {platform.python_version()}")
print(f"PyTorch version: {torch.__version__}")
print("-----------------------------")

# Check for CUDA (NVIDIA GPUs)
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"‚úÖ CUDA is available. Number of GPUs: {num_gpus}")

    for i in range(num_gpus):
        gpu_name = torch.cuda.get_device_name(i)
        total_mem = torch.cuda.get_device_properties(i).total_memory / (1024**3)
        print(f"  ‚Ä¢ GPU {i}: {gpu_name} ({total_mem:.2f} GB VRAM)")

    # Also show current GPU and free memory
    current_gpu = torch.cuda.current_device()
    print(f"\nUsing GPU: {torch.cuda.get_device_name(current_gpu)}")
    free_mem, total_mem = torch.cuda.mem_get_info()
    print(f"Available VRAM: {free_mem/1e9:.2f} GB / {total_mem/1e9:.2f} GB")

# Check for Apple Silicon (MPS)
elif torch.backends.mps.is_available():
    print("‚úÖ Running on Apple Silicon (MPS backend).")

# Check for ROCm (AMD GPUs)
elif torch.version.hip is not None:
    print("‚úÖ ROCm (AMD GPU) detected.")

# Otherwise fallback to CPU
else:
    print("‚ö†Ô∏è No GPU detected ‚Äî running on CPU only.")
    print("This will be very slow for large models like Llama-3.1-8B.")

print("-----------------------------")

# Confirm torch default device
default_device = "cuda" if torch.cuda.is_available() else (
    "mps" if torch.backends.mps.is_available() else "cpu"
)
print(f"Default torch device: {default_device}")

=== üß† Environment Info ===
Python version: 3.10.13
PyTorch version: 2.8.0+cu128
-----------------------------
‚úÖ CUDA is available. Number of GPUs: 2
  ‚Ä¢ GPU 0: NVIDIA H200 (139.80 GB VRAM)
  ‚Ä¢ GPU 1: NVIDIA H200 (139.80 GB VRAM)

Using GPU: NVIDIA H200
Available VRAM: 149.56 GB / 150.11 GB
-----------------------------
Default torch device: cuda


In [25]:

# --- 3. Model name on Hugging Face Hub ---
model_name = "meta-llama/Llama-3.1-8B"

# --- 4. (Optional) Authenticate if model is gated/private ---
# from huggingface_hub import login
# login(token="YOUR_HF_TOKEN")

print("Loading tokenizer and model‚Ä¶")

# --- 5. Load tokenizer ---
# Tokenizer converts text ‚Üî tokens. Must match model for correct vocabulary.
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # ensure padding works

# --- 6. Load model in full bf16 precision (no quantization) ---
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",      # Automatically distributes layers across GPUs
    torch_dtype=torch.bfloat16,  # Use bf16 for all layers
    low_cpu_mem_usage=True,       # Stream weights directly to GPU to reduce CPU RAM footprint
    trust_remote_code=True        # Needed if the repo includes custom code
)

print("‚úÖ Model loaded successfully!")


Loading tokenizer and model‚Ä¶


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

‚úÖ Model loaded successfully!


In [8]:

# --- 8. Simple inference test ---
prompt = """### Instruction:
Explain the difference between left-wing and right-wing economic policies.

### Response:"""

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=150,                  # control output length
    do_sample=True,                      # enables some randomness
    temperature=0.7,                     # mild creativity
    top_p=0.9,                           # nucleus sampling
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.2               # prevent repeated text
)

result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)

### Instruction:
Explain the difference between left-wing and right-wing economic policies.

### Response: 

In a democratic society, people are free to express their opinions on political issues. However, there is no consensus on what constitutes ‚Äúleft‚Äù or ‚Äúright.‚Äù In general terms, however, we can say that those who advocate for greater government control of the economy tend toward leftist positions while conservatives argue against such regulation by advocating private enterprise as opposed to state interventionism.


For instance, consider how different approaches towards taxation might represent divergent ideologies within this spectrum; libertarians believe strongly in minimal taxes since they view excessive governmental spending (particularly welfare programs) negatively whereas social democrats want higher tax rates because they see these funds being used wisely through public services like education which benefits everyone regardless income level rather than just some 

# Part 1: Load & Prepare Data
Data when loaded in:
```
{
    "instruction": "Write a political news story from a {left/ right/ center} perspective based on the headline.", 
    "input": "Headline: ...",
    "output": "..."
}
```

**Steps to process it:**
1. Combine into 1 traning text
2. Tokenize text

In [15]:
cwd = os.getcwd()

# build full paths
data_path = os.path.join(cwd, "train.jsonl")   # input file
# Option 1: Load using pandas ‚Äî easiest for inspection
train_df = pd.read_json(data_path, lines=True)

print("‚úÖ Loaded train.jsonl successfully!")
print(f"Number of samples: {len(df)}\n")
print("üìä Preview:")
display(df.head())

‚úÖ Loaded train.jsonl successfully!
Number of samples: 2852

üìä Preview:


Unnamed: 0,instruction,input,output
0,Write a political news story from a right pers...,Headline: If Democrats Flip House What Will Th...,Quotes displayed in real-time or delayed by at...
1,Write a political news story from a center per...,Headline: Putin Marks Russias Victory Day Spee...,It has become an annual event - the military p...
2,Write a political news story from a left persp...,Headline: Media Industry Ap Criticized After D...,Copyright 2025 The Associated Press. All Right...
3,Write a political news story from a left persp...,Headline: Economy And Jobs Will There Be Reces...,"As the Fed wrestles with inflation, experts wo..."
4,Write a political news story from a right pers...,Headline: Technology Plans Tesla Tunnel Nashvi...,The project will be similar to one already in ...


In [16]:
def format_prompt(example):
    """
    Combine instruction, input, and output into one training text.
    """
    return (
        f"### Instruction:\n{example['instruction'].strip()}\n\n"
        f"### Input:\n{example['input'].strip()}\n\n"
        f"### Response:\n{example['output'].strip()}"
    )

In [17]:
# Apply formatting to DataFrames
train_df["text"] = train_df.apply(format_prompt, axis=1)

# Preview one formatted sample
print(train_df["text"].iloc[0][:800])  # show first 800 chars

### Instruction:
Write a political news story from a right perspective based on the headline.

### Input:
Headline: If Democrats Flip House What Will They Prioritize

### Response:
Quotes displayed in real-time or delayed by at least 15 minutes. Market data provided byFactset.
          Powered and implemented byFactSet Digital Solutions.Legal Statement.

This material may not be published, broadcast, rewritten, or redistributed. ¬©2025 FOX News Network, LLC. All rights reserved.FAQ-New Privacy Policy

If the Democrats win a majority in theHouse, it will be Democrats who chair key committees. And since the chair is picked largely on the basis of seniority, we know who they will be.

So let me introduce the Democrats who will become very powerful, if the Democrats win next week.

Maxine Wate


In [20]:
from datasets import Dataset
def tokenize_function(example):
    return tokenizer(
        example["text"],
        truncation=True,
        max_length=1024,   # adjust based on GPU memory
        padding="max_length"
    )
# Convert to HF Dataset and tokenize
train_dataset = Dataset.from_pandas(train_df[["text"]])
tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# Check shape and sample
print(tokenized_train)

print("============ Sanity Check - decoding encoded tokens ============")
print(tokenizer.decode(tokenized_train[0]["input_ids"][:200]))

Map:   0%|          | 0/2852 [00:00<?, ? examples/s]

Dataset({
    features: ['__index_level_0__', 'input_ids', 'attention_mask'],
    num_rows: 2852
})
<|begin_of_text|>### Instruction:
Write a political news story from a right perspective based on the headline.

### Input:
Headline: If Democrats Flip House What Will They Prioritize

### Response:
Quotes displayed in real-time or delayed by at least 15 minutes. Market data provided byFactset.
          Powered and implemented byFactSet Digital Solutions.Legal Statement.

This material may not be published, broadcast, rewritten, or redistributed. ¬©2025 FOX News Network, LLC. All rights reserved.FAQ-New Privacy Policy

If the Democrats win a majority in theHouse, it will be Democrats who chair key committees. And since the chair is picked largely on the basis of seniority, we know who they will be.

So let me introduce the Democrats who will become very powerful, if the Democrats win next week.

Maxine Waters will chair the Financial Services Committee. Known for her slogan "Impeach 45" 

# Part 2: LoRA fine-tuning
**Steps**
1. Set up LoRA model
2. Set up training arguments
3. Prepare a Data Collator
4. Set up Trainer
5. Run Training
6. Save new weights

In [9]:
# Set up Lora model for fine-tuning
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,                # rank of the LoRA matrices
    lora_alpha=32,       # scaling factor
    target_modules=["q_proj", "v_proj"],  # which layers to fine-tune
    lora_dropout=0.05,   # dropout for LoRA
    bias="none",         # keep bias frozen
    task_type="CAUSAL_LM" # type of task
)

# Wrap base model with PEFT
model = get_peft_model(model, lora_config) # freezes original layer
model.print_trainable_parameters()  # confirm only LoRA params are trainable

trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848


In [22]:
# Define Training Arguments
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="outputs/lora",         # where to save checkpoints & LoRA adapters
    per_device_train_batch_size=8,     # batch size per GPU
    gradient_accumulation_steps=4,     # effective batch size = 8 * 4 * 2 GPUs = 64
    learning_rate=3e-4,                # LoRA-friendly default
    num_train_epochs=3,                # 3 epochs, increase if dataset is larger
    logging_steps=50,
    save_strategy="epoch",
    save_total_limit=2,
    bf16=True,                          # optional: H200 supports bf16, can improve speed
    dataloader_drop_last=True,           # drop incomplete batch to avoid OOM
    report_to="none",                    # change to 'wandb' if using W&B
    remove_unused_columns=False,
    ddp_find_unused_parameters=False,   # improves multi-GPU performance
    gradient_checkpointing=True,        # reduce memory usage for large models
    warmup_steps=50,                    # optional warmup
    optim="paged_adamw_32bit",          # memory-efficient optimizer
    lr_scheduler_type="cosine"          # smooth LR schedule
)


print("‚úÖ Training arguments set up")


‚úÖ Training arguments set up


In [23]:
# Prepare Data Collator (Coverts list of examples into tensors)
from transformers import DataCollatorForLanguageModeling

# Collator for causal LM
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # False because this is causal LM, not masked LM
)

print("‚úÖ Data collator ready")

‚úÖ Data collator ready


In [27]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    data_collator=data_collator,
    processing_class=tokenizer
)

print("‚úÖ Trainer ready")

‚úÖ Trainer ready


In [29]:
# Begin training
trainer.train()

# Save the LoRA adapter after training
model.save_pretrained("outputs/lora_adapter")

print("‚úÖ Training complete and LoRA adapter saved")


Step,Training Loss
50,2.24
100,2.4916
150,1.917
200,1.3239
250,0.5324


‚úÖ Training complete and LoRA adapter saved


# Part 3: Quick Tests

In [36]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# --- Paths ---
model_path = "./outputs/lora_adapter"  # folder with your safetensor shards
tokenizer_path = "meta-llama/Llama-3.1-8B"  # or same folder if tokenizer saved locally

# --- Load tokenizer ---
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
tokenizer.pad_token = tokenizer.eos_token  # ensure padding token is set

# --- Load full model ---
trained_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",     # automatically place layers on GPUs
    torch_dtype=torch.bfloat16,  # use bfloat16 for efficiency
    trust_remote_code=True
)

print("‚úÖ Model loaded from safetensor shards")



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

‚úÖ Model loaded from safetensor shards


In [37]:

# --- Test generation ---
prompt = """### Instruction:
Write a political news story from a right perspective based on the headline.

### Input:
Headline: Georgia Tech student working on NLP project

### Response:
"""

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
    output_ids = trained_model.generate(
        **inputs,
        max_new_tokens=300,
        temperature=0.7,
        do_sample=True,
        top_p=0.9
    )

generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(generated_text)

### Instruction:
Write a political news story from a right perspective based on the headline.

### Input:
Headline: Georgia Tech student working on NLP project

### Response:
This material may not be published, broadcast, rewritten, or redistributed. ¬©2025 FOX News Network, LLC. All rights reserved. Quotes displayed in real-time or delayed by at least 15 minutes. Market data provided byFactset. Powered and implemented byFactSet Digital Solutions.Legal Statement. Mutual Fund and ETF data provided byRefinitiv Lipper.

Fox News Flash top headlines are here. Check out what's clicking on Foxnews.com.

AGeorgiahigh school student who is working on a "nonpartisan"artificial intelligence (AI) project with a "federal agency" has sparked concerns over the privacy and security of the teen's personal data.

The teen, who is in his 20s, is a junior at Georgetown University's Center for Global and Social Justice Science. He has been on the cusp of completing his work on the ChatGPT-4R chatbot, a pr