# LeLM — Fine-tuned LLM for NBA Hot Takes

All-in-one Colab notebook for scraping, processing, training, and inference.

**Runtime**: T4 GPU (free tier) for Qwen3-8B, or A100 (Pro) for Qwen3-14B.

**Usage**: Run cells top to bottom. Mount Google Drive to persist checkpoints.

In [1]:
# Install dependencies
!pip install -q requests unsloth transformers trl datasets peft bitsandbytes pyyaml

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m432.3/432.3 kB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m92.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m506.8/506.8 kB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 MB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m566.4/566.4 kB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m


In [2]:
# Mount Google Drive for persistence
from google.colab import drive
drive.mount('/content/drive')

import os
DRIVE_DIR = '/content/drive/MyDrive/LeLM'
os.makedirs(DRIVE_DIR, exist_ok=True)

Mounted at /content/drive


In [3]:
# Configuration
import torch

# Choose model based on available GPU
gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'
print(f'GPU: {gpu_name}')
print(f'VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB')

if 'A100' in gpu_name:
    MODEL_NAME = 'unsloth/Qwen3-14B-bnb-4bit'
    print('Using Qwen3-14B (A100 detected)')
elif 'T4' in gpu_name:
    MODEL_NAME = 'unsloth/Qwen3-8B-bnb-4bit'
    print('Using Qwen3-8B (T4 detected)')
else:
    MODEL_NAME = 'unsloth/Qwen3-4B-bnb-4bit'
    print(f'Using Qwen3-4B (fallback for {gpu_name})')

MAX_SEQ_LENGTH = 2048

SYSTEM_PROMPT = (
    "You are an unapologetically bold NBA analyst who lives for hot takes. "
    "You speak with absolute conviction, back up your claims with stats and game knowledge, "
    "but aren't afraid to be controversial. You have strong opinions on player legacies, "
    "team strategies, and playoff predictions. Your style is passionate, entertaining, "
    "and occasionally provocative — like a mix of Skip Bayless's confidence, Charles Barkley's "
    "humor, and Zach Lowe's basketball IQ. Never hedge. Never be boring. Every take should "
    "make someone want to argue with you."
)

GPU: CPU


AssertionError: Torch not compiled with CUDA enabled

## Step 1: Scrape Reddit Data

Uses Reddit's public JSON endpoints — no API key needed.

In [None]:
import json
import time
import praw
from pathlib import Path

RAW_FILE = Path(DRIVE_DIR) / 'reddit_posts.jsonl'

reddit = praw.Reddit(
    client_id=os.environ['REDDIT_CLIENT_ID'],
    client_secret=os.environ['REDDIT_CLIENT_SECRET'],
    user_agent=os.environ['REDDIT_USER_AGENT'],
)

subreddit = reddit.subreddit('nba')
scraped_ids = set()

QUERIES = [
    'hot take', 'unpopular opinion', 'overrated underrated',
    'bold prediction', 'worst take', 'GOAT debate',
]

with open(RAW_FILE, 'a') as f:
    for query in QUERIES:
        print(f'Searching: {query}...')
        for post in subreddit.search(query, sort='top', limit=500):
            if post.id in scraped_ids:
                continue
            f.write(json.dumps({
                'id': post.id, 'type': 'post',
                'title': post.title, 'selftext': post.selftext,
                'score': post.score, 'created_utc': post.created_utc,
            }) + '\n')
            scraped_ids.add(post.id)

            post.comments.replace_more(limit=0)
            for c in sorted(post.comments.list(), key=lambda c: c.score, reverse=True)[:10]:
                if c.id not in scraped_ids:
                    f.write(json.dumps({
                        'id': c.id, 'type': 'comment',
                        'post_title': post.title, 'body': c.body,
                        'score': c.score, 'created_utc': c.created_utc,
                    }) + '\n')
                    scraped_ids.add(c.id)
        time.sleep(2)

print(f'Total scraped: {len(scraped_ids)} items -> {RAW_FILE}')

import json
import time
import requests
from pathlib import Path

RAW_FILE = Path(DRIVE_DIR) / 'reddit_posts.jsonl'
HEADERS = {'User-Agent': 'LeLM-scraper/1.0'}

QUERIES = [
    'hot take', 'unpopular opinion', 'overrated underrated',
    'bold prediction', 'worst take', 'GOAT debate',
]

def fetch_json(url, params=None):
    for attempt in range(3):
        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
        if resp.status_code == 429:
            wait = int(resp.headers.get('Retry-After', 10))
            print(f'  Rate limited, waiting {wait}s...')
            time.sleep(wait)
            continue
        resp.raise_for_status()
        return resp.json()
    raise RuntimeError(f'Failed after retries: {url}')

scraped_ids = set()

with open(RAW_FILE, 'a') as f:
    for query in QUERIES:
        print(f'Searching: {query}...')
        after = None
        for page in range(5):
            params = {'q': query, 'restrict_sr': 'on', 'sort': 'top', 't': 'all', 'limit': 100}
            if after:
                params['after'] = after
            data = fetch_json('https://www.reddit.com/r/nba/search.json', params)
            posts = data['data']['children']
            if not posts:
                break

            for pw in posts:
                if pw['kind'] != 't3':
                    continue
                d = pw['data']
                if d['id'] in scraped_ids:
                    continue
                f.write(json.dumps({
                    'id': d['id'], 'type': 'post',
                    'title': d.get('title', ''), 'selftext': d.get('selftext', ''),
                    'score': d.get('score', 0), 'created_utc': d.get('created_utc', 0),
                }) + '\n')
                scraped_ids.add(d['id'])

                # Fetch top comments
                time.sleep(2)
                try:
                    cdata = fetch_json(f"https://www.reddit.com/r/nba/comments/{d['id']}.json",
                                       {'sort': 'top', 'limit': 10})
                    if len(cdata) >= 2:
                        for child in cdata[1]['data'].get('children', [])[:10]:
                            if child['kind'] != 't1':
                                continue
                            cd = child['data']
                            if cd['id'] not in scraped_ids:
                                f.write(json.dumps({
                                    'id': cd['id'], 'type': 'comment',
                                    'post_title': d.get('title', ''), 'body': cd.get('body', ''),
                                    'score': cd.get('score', 0), 'created_utc': cd.get('created_utc', 0),
                                }) + '\n')
                                scraped_ids.add(cd['id'])
                except Exception as e:
                    print(f'  Comment error: {e}')

            after = data['data'].get('after')
            if not after:
                break
            time.sleep(2)

print(f'Total scraped: {len(scraped_ids)} items -> {RAW_FILE}')

In [None]:
import re
import random
from collections import Counter

DIRECT_TEMPLATES = [
    "Give me your hottest NBA take right now.",
    "What's your most controversial NBA opinion?",
    "Drop an NBA hot take that would get you yelled at on Twitter.",
    "Give me a spicy NBA take.",
    "What's your boldest NBA prediction?",
    "Hit me with an unpopular NBA opinion.",
]

TOPIC_TEMPLATES = [
    "What's your hot take on {topic}?",
    "Give me your most controversial opinion about {topic}.",
    "Drop a spicy take about {topic}.",
]

def clean_text(text):
    text = re.sub(r'https?://\S+', '', text)
    text = re.sub(r'/?(u|r)/\w+', '', text)
    text = re.sub(r'\[removed\]|\[deleted\]', '', text)
    text = re.sub(r'edit:.*$', '', text, flags=re.IGNORECASE | re.DOTALL)
    return re.sub(r'\s+', ' ', text).strip()

def trigram_jaccard(a, b):
    def tri(s): return Counter(s.lower()[i:i+3] for i in range(len(s)-2))
    ta, tb = tri(a), tri(b)
    if not ta or not tb: return 0.0
    return sum((ta & tb).values()) / sum((ta | tb).values())

# Load raw data
items = [json.loads(l) for l in open(RAW_FILE)]
print(f'Raw items: {len(items)}')

# Filter
filtered = []
for item in items:
    if item['type'] == 'post':
        text = clean_text(f"{item['title']} {item.get('selftext', '')}")
        if item.get('score', 0) < 10: continue
    else:
        text = clean_text(item.get('body', ''))
        if item.get('score', 0) < 25: continue
    if len(text) < 50 or len(text) > 1500: continue
    if any(m in text.lower() for m in ['i am a bot', 'beep boop']): continue
    topic = item.get('title', item.get('post_title', ''))
    filtered.append((text, topic))
print(f'After filter: {len(filtered)}')

# Deduplicate
unique = []
for text, topic in filtered:
    if not any(trigram_jaccard(text, u[0]) > 0.8 for u in unique):
        unique.append((text, topic))
print(f'After dedup: {len(unique)}')

# Format
rng = random.Random(42)
examples = []
for text, topic in unique:
    if rng.random() < 0.4 or not topic:
        user_msg = rng.choice(DIRECT_TEMPLATES)
    else:
        user_msg = rng.choice(TOPIC_TEMPLATES).format(topic=topic)
    examples.append({'messages': [
        {'role': 'system', 'content': SYSTEM_PROMPT},
        {'role': 'user', 'content': user_msg},
        {'role': 'assistant', 'content': text},
    ]})
rng.shuffle(examples)

# Split
split = int(len(examples) * 0.95)
train_data, val_data = examples[:split], examples[split:]

TRAIN_FILE = Path(DRIVE_DIR) / 'train.jsonl'
VAL_FILE = Path(DRIVE_DIR) / 'val.jsonl'
for path, data in [(TRAIN_FILE, train_data), (VAL_FILE, val_data)]:
    with open(path, 'w') as f:
        for ex in data: f.write(json.dumps(ex) + '\n')

print(f'Train: {len(train_data)} | Val: {len(val_data)}')

## Step 3: Fine-tune with QLoRA

In [None]:
from unsloth import FastLanguageModel
from datasets import Dataset
from trl import SFTTrainer, SFTConfig

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=64,
    lora_alpha=128,
    lora_dropout=0,
    target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
    bias='none',
    use_gradient_checkpointing='unsloth',
)

In [None]:
# Load datasets
train_dataset = Dataset.from_list([json.loads(l) for l in open(TRAIN_FILE)])
val_dataset = Dataset.from_list([json.loads(l) for l in open(VAL_FILE)])
print(f'Train: {len(train_dataset)} | Val: {len(val_dataset)}')

def formatting_func(example):
    return tokenizer.apply_chat_template(
        example['messages'], tokenize=False, add_generation_prompt=False
    )

OUTPUT_DIR = os.path.join(DRIVE_DIR, 'lelm-adapter')

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    args=SFTConfig(
        output_dir=OUTPUT_DIR,
        num_train_epochs=3,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        lr_scheduler_type='cosine',
        warmup_steps=10,
        optim='adamw_8bit',
        weight_decay=0.01,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=10,
        save_strategy='epoch',
        eval_strategy='epoch',
        seed=42,
        max_seq_length=MAX_SEQ_LENGTH,
        dataset_text_field='text',
    ),
    formatting_func=formatting_func,
)

trainer.train()

In [None]:
# Save adapter
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f'Adapter saved to {OUTPUT_DIR}')

## Step 4: Inference

In [None]:
FastLanguageModel.for_inference(model)

def generate(prompt, max_new_tokens=512):
    messages = [
        {'role': 'system', 'content': SYSTEM_PROMPT},
        {'role': 'user', 'content': prompt},
    ]
    inputs = tokenizer.apply_chat_template(
        messages, tokenize=True, add_generation_prompt=True, return_tensors='pt'
    ).to(model.device)
    outputs = model.generate(
        input_ids=inputs, max_new_tokens=max_new_tokens,
        temperature=0.8, top_p=0.9, do_sample=True,
    )
    return tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True).strip()

# Demo prompts
demos = [
    "Give me your hottest LeBron James take.",
    "Is Nikola Jokic the best player in the NBA right now?",
    "Who's the most overrated player in the league?",
    "Give me your boldest Finals prediction.",
    "Is the 3-point revolution ruining basketball?",
]

for prompt in demos:
    print(f'\n>> {prompt}')
    print('-' * 40)
    print(generate(prompt))
    print()

In [None]:
# Interactive — type your own prompts
prompt = "Who will win the championship this year?"  # @param {type:"string"}
print(generate(prompt))