<a href="https://colab.research.google.com/github/BraedynL0530/PortfolioWebsite/blob/master/NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# README Summary Generator - FIXED VERSION
# Key fixes: Progress visibility, faster model, better error handling

# SETUP
from google.colab import drive
drive.mount('/content/drive')

!pip install -q transformers torch accelerate safetensors

# CONFIGURATION
import json
import os
import sys
from pathlib import Path
from tqdm.auto import tqdm
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Force output flush for Colab
sys.stdout.flush()

# Paths
DRIVE_BASE = '/content/drive/MyDrive/readme_training'
INPUT_FILE = f'{DRIVE_BASE}/training_data.json'
CHECKPOINT_FILE = f'{DRIVE_BASE}/summaries_checkpoint.json'
OUTPUT_FILE = f'{DRIVE_BASE}/summaries_final.json'

os.makedirs(DRIVE_BASE, exist_ok=True)

# CRITICAL: Use a smaller, faster model for Colab
# Pick ONE (uncomment it):

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # RECOMMENDED: Fast, no auth
# MODEL_NAME = "microsoft/phi-2"  # Good quality, medium speed
# MODEL_NAME = "google/flan-t5-base"  # Very fast, different architecture
# MODEL_NAME = "TheBloke/vicuna-7B-1.1-HF"  # Slow but higher quality

BATCH_SIZE = 1  # Process one at a time for better progress tracking
CHECKPOINT_INTERVAL = 5  # Save every 5 summaries

print(f"‚úÖ Model: {MODEL_NAME}")
print(f"‚úÖ Checkpoint every: {CHECKPOINT_INTERVAL}")
print(f"‚úÖ Drive path: {DRIVE_BASE}")
sys.stdout.flush()

# LOAD MODEL
print("\nüì¶ Loading model... (this may take 2-5 minutes)")
sys.stdout.flush()

try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        device_map="auto",
        dtype=torch.float16,
        low_cpu_mem_usage=True
    )

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    print("‚úÖ Model loaded successfully!")
    print(f"‚úÖ Using device: {model.device}")
    sys.stdout.flush()

except Exception as e:
    print(f"‚ùå MODEL LOAD FAILED: {e}")
    print("Try using: TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    sys.stdout.flush()
    raise

# LOAD DATA
print("\nüìÇ Loading README data...")
sys.stdout.flush()

with open(INPUT_FILE, 'r') as f:
    readmes_data = json.load(f)

print(f"‚úÖ Loaded {len(readmes_data)} READMEs")
sys.stdout.flush()

# Load checkpoint
try:
    with open(CHECKPOINT_FILE, 'r') as f:
        processed_summaries = json.load(f)
    processed_indices = {s['id'] for s in processed_summaries}
    print(f"üìã Resuming: {len(processed_summaries)} already done")
except FileNotFoundError:
    processed_summaries = []
    processed_indices = set()
    print("üìã Starting fresh")

sys.stdout.flush()

# Add unique IDs if missing
for i, item in enumerate(readmes_data):
    if 'id' not in item:
        item['id'] = i

remaining = [r for r in readmes_data if r['id'] not in processed_indices]
print(f"üìä Remaining: {len(remaining)}")
sys.stdout.flush()

# SUMMARY GENERATOR
def generate_summary(readme_text, max_length=1500):
    """Generate summary with better error handling"""

    # Truncate
    if len(readme_text) > max_length:
        readme_text = readme_text[:max_length] + "..."

    # Improved prompt for better technical summaries
    prompt = f"""Write a technical summary of this GitHub project as a single paragraph with 3-4 complete sentences.

Your summary should cover:
- What the project does and its main purpose
- Technologies used (programming languages, frameworks, libraries) - only mention what's in the README
- Key features or implementation details

Do NOT use numbered lists or bullet points. Write in natural paragraph form.

README:
{readme_text}

Summary:"""

    try:
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=2048
        ).to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=100,  # Reduced for speed
                temperature=0.7,
                do_sample=True,
                top_p=0.9,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

        full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract summary - now looking for "Technical Summary:"
        if "Technical Summary:" in full_output:
            summary = full_output.split("Technical Summary:")[-1].strip()
        elif "Summary:" in full_output:
            summary = full_output.split("Summary:")[-1].strip()
        else:
            summary = full_output[len(prompt):].strip()

        # Clean up
        summary = summary.replace('\n', ' ').strip()

        # Remove any trailing incomplete sentences (but keep full summary)
        # Only trim if it's unreasonably long (over 1000 chars)
        if len(summary) > 1000:
            # Try to cut at last sentence
            last_period = summary[:1000].rfind('.')
            if last_period > 500:
                summary = summary[:last_period + 1]

        return summary

    except Exception as e:
        print(f"Generation error: {e}")
        return f"Error generating summary: {str(e)[:100]}"

# MAIN PROCESSING LOOP
print("\nüöÄ Starting generation...\n")
print("=" * 60)
sys.stdout.flush()

for i, readme_data in enumerate(remaining):
    try:
        print(f"\n[{i+1}/{len(remaining)}] Processing: {readme_data.get('repo_name', 'Unknown')}")
        sys.stdout.flush()

        # Generate
        summary = generate_summary(readme_data['readme'])

        # Save result
        readme_data['summary'] = summary
        processed_summaries.append(readme_data)

        # Show FULL summary for first 5, then preview for rest
        if i < 5:
            print(f"   ‚úì FULL: {summary}")
        else:
            preview = summary[:100] + "..." if len(summary) > 100 else summary
            print(f"   ‚úì {preview}")
        sys.stdout.flush()

        # Checkpoint
        if (i + 1) % CHECKPOINT_INTERVAL == 0:
            with open(CHECKPOINT_FILE, 'w') as f:
                json.dump(processed_summaries, f, indent=2)
            print(f"\nüíæ CHECKPOINT SAVED: {len(processed_summaries)} summaries")
            print("=" * 60)
            sys.stdout.flush()

    except Exception as e:
        print(f"\n‚ùå FAILED {readme_data.get('repo_name', 'Unknown')}: {e}")
        sys.stdout.flush()
        continue

# FINAL SAVE
print("\n" + "=" * 60)
print("üíæ Saving final results...")
sys.stdout.flush()

with open(CHECKPOINT_FILE, 'w') as f:
    json.dump(processed_summaries, f, indent=2)

with open(OUTPUT_FILE, 'w') as f:
    json.dump(processed_summaries, f, indent=2)

print(f"""
‚ú® COMPLETE! ‚ú®

üìä Stats:
   Total: {len(processed_summaries)} summaries
   Checkpoint: {CHECKPOINT_FILE}
   Final: {OUTPUT_FILE}

üéØ Next: Download from Google Drive and train your model!
""")
sys.stdout.flush()

# PREVIEW
print("\nüìã Sample summaries:")
for i, item in enumerate(processed_summaries[:3]):
    print(f"\n{i+1}. {item.get('repo_name', 'Unknown')} ({item.get('stars', 0)} ‚≠ê)")
    print(f"   {item['summary']}")
sys.stdout.flush()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
‚úÖ Checkpoint every: 5
‚úÖ Drive path: /content/drive/MyDrive/readme_training

üì¶ Loading model... (this may take 2-5 minutes)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

‚úÖ Model loaded successfully!
‚úÖ Using device: cuda:0

üìÇ Loading README data...
‚úÖ Loaded 6317 READMEs
üìã Resuming: 6317 already done
üìä Remaining: 0

üöÄ Starting generation...


üíæ Saving final results...

‚ú® COMPLETE! ‚ú®

üìä Stats:
   Total: 6317 summaries
   Checkpoint: /content/drive/MyDrive/readme_training/summaries_checkpoint.json
   Final: /content/drive/MyDrive/readme_training/summaries_final.json

üéØ Next: Download from Google Drive and train your model!


üìã Sample summaries:

1. Unknown (0 ‚≠ê)
   The WebArchivePlayer is a desktop tool which provides a simple point-and-click wrapper for viewing any web archive file (in WARC and ARC format). To create a web archive (WARC) file of your own, simply browse any page and download the recorded WARC file. The player allows users to pick one or more ARC/WARC from their local machine and browse the contents from any browser. No internet connection is necessary in

2. Unknown (0 ‚≠ê)
   - RVM / Capistrano integrat

In [5]:
from torch.autograd import forward_ad
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import math
from dataclasses import  dataclass
torch.manual_seed(42)

#config
@dataclass
class config:
  vocab_size: int #unique words
  block_size: int #how far back(context) it can see, memory/ how many tokens back
  n_layer: int   # stacked blocks, more layers more reasoning more train time
  n_head: int   # attentions per layer, how many "eyes" looking for a new pattern
  n_embd: int   #size of vector for each token
  dropout: float   #prevents overfitting by stopping random paths
  pad_token_id: int = 50256



"""
self attention: part 1 of transformer
Q K V, query key value. helps use the two embeddings to learn diffrent meanings for words and give the diffrent vectors even if the same word
below is theory class is optimized, it condences the prjections into one huge vector and splits. other than that its nearly identical just more efficent
"""

"""
#learnable compenets
q_prog = nn.Linear(C, C, bias =False)
k_prog = nn.Linear(C, C, bias =False)
v_prog = nn.Linear(C, C, bias =False)

#weights
q_prog.weight.data = torch.randn(C,C)
q_prog.weight.data = torch.randn(C,C)
q_prog.weight.data = torch.randn(C,C)

#preform projection
q = q_prog(x)
k = k_prog(x)
v = v_prog(x)

scores = q @ k.transpose(-2,-1)
print("scores",scores)



Attention(Q,K,V)=softmax(‚ÄãQK^‚ä§/dk‚Äã‚Äã)V

d_k = k.size(-1)#last dimesion of
scaled_scores = scores / math.sqrt(d_k)
attention_weights = F.softmax(scaled_scores, dim=1)
print("scaled scores", scaled_scores)
print("scaled scores -> percentages", attention_weights)

# aggreation Last part of attention!
output = attention_weights @ v
print("output!:",output)

"""

# Core logic for MultiHead
class CausalSelfAttention(nn.Module):
  def __init__(self, config :config):
    super().__init__()
    assert config.n_embd % config.n_head == 0
    self.n_head = config.n_head
    self.n_embd = config.n_embd
    self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=False) # Fuzed layer = more efficent

    self.register_buffer( # part of causal masking
        "bias",# buffer name
        torch.tril(torch.ones(config.block_size,config.block_size))
        .view(1,1, config.block_size, config.block_size)
    )

    self.c_proj = nn.Linear(config.n_embd,config.n_embd)

  def forward(self, x):
    B, T, C = x.size()
    head_dim = C // self.n_head

    # project once -> split
    qkv = self.c_attn(x)
    q, k, v = qkv.split(C, dim=2)

    # reshape into heads
    q = q.view(B, T, self.n_head, head_dim).transpose(1, 2)
    k = k.view(B, T, self.n_head, head_dim).transpose(1, 2)
    v = v.view(B, T, self.n_head, head_dim).transpose(1, 2)

    # attention
    att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(head_dim))
    att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float("-inf")) # prevents it from seeing future tokens
    att = F.softmax(att, dim=-1)

    # aggregate :3
    y = att @ v

    # merge heads
    y = y.transpose(1, 2).contiguous().view(B, T, C)

    # final projection
    y = self.c_proj(y)
    return y


class MLP(nn.Module):
  def __init__(self, config :config):
    super().__init__()
    self.fc = nn.Linear(config.n_embd, 4 * config.n_embd) #expands dimestions, think of it as more room to think / combining features
    self.proj = nn.Linear(4 * config.n_embd, config.n_embd) # condenses back so it can be added back to attetion
    self.drop = nn.Dropout(config.dropout) #refer to config

  def forward(self, x):
    x = self.fc(x)
    x = F.gelu(x) # makes x nonlinear so fc and proj dont just merge into one straight line
    x =self.proj(x)
    x = self.drop(x)

    return x

class Block(nn.Module): #residual connection
  def __init__(self, config : config): #litterly just does f(x) + x instead of f(x) so mlp dosesnt relearn it takes the learned/trained data and keeps it
    super().__init__()
    self.ln_1 = nn.LayerNorm(config.n_embd)
    self.attn = CausalSelfAttention(config)
    self.ln_2 = nn.LayerNorm(config.n_embd)
    self.mlp = MLP(config)

  def forward(self, x):
    # focus (the "+")
    x = x +self.attn(self.ln_1(x))

    x = x +self.mlp(self.ln_2(x))

    return x

In [19]:
class NLP(nn.Module):
  def __init__(self, config: config):
    super().__init__()
    # Input
    self.wte = nn.Embedding(config.vocab_size, config.n_embd)
    self.wpe = nn.Embedding(config.block_size, config.n_embd)
    self.drop = nn.Dropout(config.dropout)
    self.config = config
    self.pad_token_id = config.pad_token_id



    # Processing, makes a stack/block / LAYER for deeper understanding
    # Data flows through sequncesnsy so more refined/better understanding
    self.h = nn.ModuleList([Block(config) for _ in range(config.n_layer)])

    #output layers
    self.ln_f = nn.LayerNorm(config.n_embd) # final layer norm
    self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias = False) #language model head, parrel prediction(linear) makes raw score for each possible next token , good for training, and throws away the
    #rest(all but last vector) if not traning
    # Above makes raw score for each possible next token


    self.lm_head.weight = self.wte.weight

    self.apply(self._init_weights)

  def _init_weights(self, module):
        """Initialize weights with small random values"""
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
        elif isinstance(module, nn.LayerNorm):
            torch.nn.init.zeros_(module.bias)
            torch.nn.init.ones_(module.weight)

  def forward(self, idx, targets=None):
    B, T = idx.size()
    attention_mask = (idx != self.pad_token_id).float()

    assert T <= self.config.block_size, f"Sequence length {T} exceeds block_size {self.config.block_size}"

    tok_emb = self.wte(idx)
    pos = torch.arange(T, device=idx.device).unsqueeze(0)
    pos_emb = self.wpe(pos)
    x = self.drop(tok_emb + pos_emb)

    for block in self.h:
        x = block(x)

    x = self.ln_f(x)
    logits = self.lm_head(x)

    loss = None
    if targets is not None:
        loss = F.cross_entropy(
            logits.view(-1, logits.size(-1)),
            targets.view(-1),
            ignore_index=-100
        )

    return logits, loss

  @torch.no_grad()
  def summarize(self, idx, max_new_tokens, temperature=1.0, top_k=None):
      for _ in range(max_new_tokens):
          if idx.size(1) > self.config.block_size:
              idx_cond = idx[:, -self.config.block_size:]
          else:
              idx_cond = idx

          logits, _ = self(idx_cond)
          logits = logits[:, -1, :] / temperature

          # Prevent generating pad token
          logits[:, self.pad_token_id] = -float('inf')

          # Optional: top-k sampling
          if top_k is not None:
              v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
              logits[logits < v[:, [-1]]] = -float('inf')

          probs = F.softmax(logits, dim=-1)
          next_token = torch.multinomial(probs, num_samples=1)

          # Stop if EOS token
          if next_token.item() == self.config.pad_token_id:
              break

          idx = torch.cat((idx, next_token), dim=1)

      return idx

In [None]:
from transformers import GPT2Tokenizer
from torch.optim.lr_scheduler import CosineAnnealingLR
import joblib
import json
import os
from google.colab import drive

#let ai add checkpointing
drive.mount('/content/drive')

# Setup paths
DRIVE_BASE = '/content/drive/MyDrive/model_checkpoints'
os.makedirs(DRIVE_BASE, exist_ok=True)

CHECKPOINT_PATH = f'{DRIVE_BASE}/training_checkpoint.pt'
FINAL_MODEL_PATH = f'{DRIVE_BASE}/v0.7_final.joblib'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

with open(OUTPUT_FILE) as file:
    data = json.load(file)

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

print(f"Tokenizer vocab_size: {len(tokenizer)}")

config_instance = config(
    vocab_size=len(tokenizer),
    block_size=1024,
    n_layer=6,
    n_head=12,
    n_embd=768,
    dropout=0.2,
    pad_token_id=tokenizer.pad_token_id
)

print(f"Config vocab_size: {config_instance.vocab_size}")

model = NLP(config_instance).to(device)

print(f"Model wte shape: {model.wte.weight.shape}")

BATCH_SIZE = 5
NUM_EPOCHS = 3
GRAD_ACCUM_STEPS = 4
IGNORE_INDEX = -100

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS * (len(data) // BATCH_SIZE))

# Resume from checkpoint if exists
start_epoch = 0
start_batch = 0

if os.path.exists(CHECKPOINT_PATH):
    print(f"\nüìÇ Found checkpoint! Loading from {CHECKPOINT_PATH}")
    checkpoint = torch.load(CHECKPOINT_PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    start_epoch = checkpoint['epoch']
    start_batch = checkpoint['batch']
    print(f"‚úÖ Resuming from Epoch {start_epoch + 1}, Batch {start_batch}")
else:
    print("\nüÜï Starting fresh training")

for epoch in range(start_epoch, NUM_EPOCHS):
    print(f"\n{'='*60}")
    print(f"EPOCH {epoch + 1}/{NUM_EPOCHS}")
    print(f"{'='*60}\n")

    if epoch == start_epoch and start_batch > 0:
        print(f"‚è© Skipping to batch {start_batch}...")

    optimizer.zero_grad()

    total_batches = len(data) // BATCH_SIZE

    for i in range(0, len(data), BATCH_SIZE):
        step = i // BATCH_SIZE

        # Skip already processed batches if resuming
        if epoch == start_epoch and step < start_batch:
            continue

        batch = data[i:i+BATCH_SIZE]
        input_ids = []
        target_ids = []

        for item in batch:
          readme_tokens = tokenizer.encode(
              item["readme"],
              max_length=768,
              truncation=True,
              add_special_tokens=False
          )
          summary_tokens = tokenizer.encode(
              " SUMMARY: " + item["summary"],
              max_length=255,
              truncation=True,
              add_special_tokens=False
          )
          summary_tokens = summary_tokens + [tokenizer.eos_token_id]

          combined_tokens = readme_tokens + summary_tokens
          if len(combined_tokens) > 1024:
              combined_tokens = combined_tokens[:1024]

          actual_readme_len = len(readme_tokens) if len(combined_tokens) >= len(readme_tokens) else len(combined_tokens)
          tokens = combined_tokens
          targets = [IGNORE_INDEX] * actual_readme_len + combined_tokens[actual_readme_len:]

          pad_len = 1024 - len(tokens)
          if pad_len > 0:
              tokens += [tokenizer.pad_token_id] * pad_len
              targets += [IGNORE_INDEX] * pad_len

          input_ids.append(tokens)
          target_ids.append(targets)

        input_ids = torch.tensor(input_ids, dtype=torch.long).to(device)
        target_ids = torch.tensor(target_ids, dtype=torch.long).to(device)

        logits, loss = model(input_ids, targets=target_ids)
        loss = loss / GRAD_ACCUM_STEPS
        loss.backward()

        if (step + 1) % GRAD_ACCUM_STEPS == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        # Log every 50 batches
        if step % 50 == 0:
            progress = (step / total_batches) * 100
            print(f"[Epoch {epoch+1}] Batch {step}/{total_batches} ({progress:.1f}%) | Loss: {(loss.item() * GRAD_ACCUM_STEPS):.4f}")

        # Save checkpoint every 100 batches
        if step % 100 == 0 and step > 0:
            print(f"\nüíæ Saving checkpoint...")
            torch.save({
                'epoch': epoch,
                'batch': step,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'loss': loss.item() * GRAD_ACCUM_STEPS,
            }, CHECKPOINT_PATH)
            print(f"‚úÖ Checkpoint saved to Drive\n")

        del logits, loss, input_ids, target_ids

    # Handle leftover gradients
    if (len(data) // BATCH_SIZE) % GRAD_ACCUM_STEPS != 0:
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    # Save after each epoch
    print(f"\n{'='*60}")
    print(f"‚úÖ Epoch {epoch + 1} Complete!")
    print(f"{'='*60}")

    epoch_model_path = f'{DRIVE_BASE}/v0.7_epoch{epoch+1}.joblib'
    joblib.dump(model, epoch_model_path)
    print(f"üíæ Epoch model saved: {epoch_model_path}")

    # Update checkpoint for next epoch
    torch.save({
        'epoch': epoch + 1,
        'batch': 0,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
    }, CHECKPOINT_PATH)

    # Quick test after each epoch
    print(f"\nüß™ Testing model...")
    model.eval()
    test_readme = data[0]["readme"][:500]
    prompt = f"{test_readme} SUMMARY:"
    tokens = tokenizer.encode(prompt, return_tensors='pt').to(device)

    with torch.no_grad():
        generated = model.summarize(tokens, max_new_tokens=60, temperature=0.8)

    result = tokenizer.decode(generated[0], skip_special_tokens=True)
    print(f"Sample output: {result.split('SUMMARY:')[-1].strip()}\n")
    model.train()

# Final save
print(f"\nüéâ Training Complete!")
joblib.dump(model, FINAL_MODEL_PATH)
print(f"üíæ Final model saved: {FINAL_MODEL_PATH}")

# Clean up checkpoint
if os.path.exists(CHECKPOINT_PATH):
    os.remove(CHECKPOINT_PATH)
    print(f"üóëÔ∏è Training checkpoint cleaned up")

In [None]:
import torch
from transformers import GPT2Tokenizer
import joblib
from google.colab import drive

drive.mount('/content/drive')

DRIVE_BASE = '/content/drive/MyDrive/model_checkpoints'
FINAL_MODEL_PATH = f'{DRIVE_BASE}/v0.7_final.joblib'

model = joblib.load(FINAL_MODEL_PATH)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()

# Test README
test_readme = """
 React UI Components

A comprehensive React component library for building modern web applications with ease.

Features

Pre-built Components: Includes buttons, forms, modals, tooltips, and navigation components
TypeScript Support: Fully typed components with IntelliSense support
Dark Mode: Built-in theming system with automatic dark mode detection
Responsive Design: Mobile-first components that adapt to any screen size
Accessibility: WCAG 2.1 compliant with proper ARIA labels

Installation


 Quick Start


How It Works

The library uses React hooks and context API for state management. Each component is built with styled-components for CSS-in-JS styling. The theming system uses CSS variables that can be toggled via a ThemeProvider wrapper. All components are tree-shakeable to minimize bundle size.

Documentation

Visit our docs at https://docs.example.com
"""

prompt = f"{test_readme} SUMMARY:"
tokens = tokenizer.encode(prompt, return_tensors='pt').to(device)

# Generate
with torch.no_grad():
    generated = model.summarize(tokens, max_new_tokens=50, temperature=1.1) #just relized i spelt it wrong

result = tokenizer.decode(generated[0], skip_special_tokens=True)
print(f"Input: {test_readme}\n")
print(f"Full output: {result}\n")
print(f"Summary only: {result.split('SUMMARY:')[-1].strip()}")


In [None]:
import json

# Load the file you think has good summaries
with open(OUTPUT_FILE) as f:  # or whatever file BART created
    data = json.load(f)

# Check first 3 summaries
for i in range(3):
    print(f"\n{'='*60}")
    print(f"README #{i+1}:")
    print(f"First 200 chars: {data[i]['readme'][:200]}")
    print(f"\nSUMMARY: {data[i]['summary']}")
    print(f"{'='*60}")