In [None]:

from datasets import load_dataset

dataset = load_dataset(
    "json",
    data_files="/kaggle/input/trump-datasettt/trump_neutral_pairs.json"
)["train"]

dataset = dataset.shuffle(seed=42)

train_dataset = dataset.select(range(4500))
test_dataset  = dataset.select(range(4500, 5000))

print(len(train_dataset), len(test_dataset))


def format_example(ex):
    return f"Neutral: {ex['output']}\nStyled: {ex['input']}"

train_dataset = train_dataset.map(lambda x: {"text": format_example(x)})
test_dataset  = test_dataset.map(lambda x: {"text": format_example(x)})


from transformers import AutoTokenizer

MODEL_NAME = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=256
    )

train_tok = train_dataset.map(tokenize, batched=True)
test_tok  = test_dataset.map(tokenize, batched=True)

train_tok.set_format("torch")
test_tok.set_format("torch")

train_tok = train_tok.remove_columns(["input", "output", "text"])
test_tok  = test_tok.remove_columns(["input", "output", "text"])


def add_labels(batch):
    batch["labels"] = batch["input_ids"].clone()
    return batch

train_tok = train_tok.map(add_labels, batched=True)
test_tok  = test_tok.map(add_labels, batched=True)


from transformers import AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto"
)

model.config.use_cache = False
model.gradient_checkpointing_enable()
model.enable_input_require_grads()


from peft import LoraConfig, get_peft_model, TaskType

lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["c_attn", "c_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()


from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./lora_trump_medium",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    warmup_steps=30,
    num_train_epochs=2,
    learning_rate=2e-4,
    fp16=False,
    logging_steps=20,
    save_steps=80,
    disable_tqdm=False,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=test_tok,
    tokenizer=tokenizer
)

trainer.train()


neutral_file = "/kaggle/input/trump-neutral-texts/neutral.txt"

with open(neutral_file, "r") as f:
    neutral_lines = [line.strip() for line in f if len(line.strip()) > 0]

print("Loaded:", len(neutral_lines))

def stylize(text):
    prompt = f"Neutral: {text}\nStyled:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output = model.generate(
        **inputs,
        max_length=128,
        temperature=0.8,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )

    out = tokenizer.decode(output[0], skip_special_tokens=True)
    return out.split("Styled:")[-1].strip()

responses = [stylize(t) for t in neutral_lines]

with open("responses.txt", "w") as f:
    for r in responses:
        f.write(r + "\n")

print("Saved responses.txt")


for i in range(10):
    neutral = test_dataset[i]["output"]
    real    = test_dataset[i]["input"]
    pred    = stylize(neutral)

    print("Neutral:", neutral)
    print("Pred:", pred)
    print("Real:", real)
    print("----------")

In [None]:
import json
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

BASE_MODEL = "gpt2-medium"
LORA_PATH = "/kaggle/working/lora_trump_medium/checkpoint-560"

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)

model = PeftModel.from_pretrained(base, LORA_PATH)
model.eval()

print("Model loaded")

dataset = load_dataset(
    "json",
    data_files="/kaggle/input/trump-datasettt/trump_neutral_pairs.json"
)["train"]

print("Total records:", len(dataset))

test_dataset = dataset.select(range(len(dataset) - 100, len(dataset)))

print("Test size:", len(test_dataset))

neutral_out = []
styled_out = []

for ex in test_dataset:
    neutral = ex["output"]
    prompt = f"Neutral: {neutral}\nStyled:"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=60,
            temperature=0.7,
            top_p=0.92
        )

    decoded = tokenizer.decode(out[0], skip_special_tokens=True)

    if "Styled:" in decoded:
        decoded = decoded.split("Styled:", 1)[1].strip()

    neutral_out.append(neutral)
    styled_out.append(decoded)

with open("neutral_test.txt", "w") as f:
    for line in neutral_out:
        f.write(line + "\n")

with open("styled_test_generated.txt", "w") as f:
    for line in styled_out:
        f.write(line + "\n")

print("Saved neutral_test.txt and styled_test_generated.txt")

In [None]:
import torch
import json
import os
import math
from torch import nn
from safetensors.torch import load_file
from transformers import AutoTokenizer, AutoModelForCausalLM
from openai import OpenAI
from peft import PeftModel

client = OpenAI(api_key="your api key")


def get_neutral_response(user_message):
    response = client.responses.create(
        model="gpt-5.2",
        instructions=(
    "You are a neutral assistant. "
    "Always respond with exactly one short sentence (max 12 words). "
    "Do not write more than one sentence."
),
        input=user_message
    )
    return response.output_text


BASE_MODEL = "gpt2-medium"
LORA_PATH = "/kaggle/working/lora_trump_medium/checkpoint-560"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

print("Loading base model...")
base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto"
)

print("Loading LoRA...")
model = PeftModel.from_pretrained(base, LORA_PATH)
model.eval()

print("LoRA successfully loaded!")


def stylize_trump(neutral_text):
    prompt = f"Neutral: {neutral_text}\nStyled:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=80,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=False
        )

    decoded = tokenizer.decode(out[0], skip_special_tokens=True)
    if "Styled:" in decoded:
        return decoded.split("Styled:", 1)[1].strip()
    return decoded


print("\n=== TRUMP CHAT (TRUE LoRA) READY ===\n")

while True:
    user_msg = input("You: ").strip()
    if user_msg.lower() == "exit":
        break

    neutral = get_neutral_response(user_msg)
    print("\nNeutral response:")
    print(neutral)

    styled = stylize_trump(neutral)
    print("\nTrump-style response:")
    print(styled)
    print("----------------------------------------\n")