<a href="https://colab.research.google.com/github/22f3002718/ai-notebooks/blob/main/starcoder2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q transformers accelerate


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "bigcode/starcoder2-3b"

# Load tokenizer (converts text to tokens and back)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load model (uses float16 to run efficiently on Colab GPUs)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",           # Automatically puts it on GPU
    torch_dtype=torch.float16    # Efficient precision
)
model.eval()


In [None]:
prompt = "def is_prime(n):\n    \"\"\"Check if a number is prime.\"\"\"\n"

# Tokenize the prompt
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate complete function
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=128,                     # enough tokens for logic
        do_sample=True,                         # enables randomness
        temperature=0.7,                        # balance creativity
        top_k=50,
        eos_token_id=tokenizer.eos_token_id,    # stop properly
        pad_token_id=tokenizer.eos_token_id     # avoid warning
    )

# Decode the output tokens to readable text
generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print only the generated part, ignoring repeat of the prompt
print("\n🧠 Generated Code:\n")
print(generated_code[len(prompt):].strip())


In [None]:
def run(prompt, max_tokens=100):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_tokens, do_sample=False)
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded.strip()

# 1️⃣ LENGTH & LATENCY
print("🔍 Length + Latency Test")
for length in [128, 256, 512, 768, 1024]:
    test_prompt = "def example():\n    # " + "This is a comment.\n    # " * (length // 10)
    try:
        start = time.time()
        output = run(test_prompt, max_tokens=20)
        end = time.time()
        print(f"✅ {length} tokens → Success | ⏱ {end - start:.2f}s")
    except Exception as e:
        print(f"❌ {length} tokens → Failure | 💥 {e}")
        break

# 2️⃣ COMPLEXITY TEST
print("\n🔍 Complexity Prompt Test")
prompts = [
    "# Add two numbers",                           # simple
    "# Do something useful",                       # vague
    "# Implement greatness",                       # abstract
]

for p in prompts:
    out = run(p, max_tokens=60)
    success = "def" in out or "=" in out
    status = "✅ Success" if success else "❌ Failure"
    print(f"\nPrompt: {p}\n{status}\n📝 Output:\n{out}")

# 3️⃣ STABILITY TEST (minor changes to prompt)
print("\n🔍 Prompt Stability Test")
base = "def reverse_string(s):"
variation = "def reverse_str(s):"

out1 = run(base, 60)
out2 = run(variation, 60)

same = out1.strip().splitlines()[:2] == out2.strip().splitlines()[:2]
status = "✅ Stable" if same else "❌ Unstable"
print(f"\nPrompt 1: {base}\n📝 {out1}\n\nPrompt 2: {variation}\n📝 {out2}\n{status}")

# 4️⃣ AUTOCOMPLETION TEST
print("\n🔍 Code Autocompletion Test")
prompt = "def factorial(n):\n    \"\"\"Returns factorial\"\"\"\n    if n == 0:"
out = run(prompt, 60)
success = "return" in out and "*" in out
status = "✅ Success" if success else "❌ Failure"
print(f"Prompt:\n{prompt}\n\n📝 Output:\n{out}\n{status}")

# 5️⃣ SYNTAX CHECK TEST (does it produce valid Python code?)
print("\n🔍 Syntax Check Test")

import ast

def is_valid_syntax(code):
    try:
        ast.parse(code)
        return True
    except:
        return False

prompt = "def is_even(n):\n    return n % 2 == 0"
output = run(prompt, 10)
syntax_ok = is_valid_syntax(output)
status = "✅ Valid Python" if syntax_ok else "❌ Invalid"
print(f"\n📝 Output:\n{output}\n{status}")


In [None]:
# ✅ Install Transformers if not already
!pip install -q transformers accelerate

# ✅ Load StarCoder2-3B from Hugging Face
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "bigcode/starcoder2-3b"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

# ✅ Code generation function
def generate_code(prompt, max_new_tokens=128):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# ✅ Prompts to test: Python, Java, Go, React
prompts = {
    "Python": "def fibonacci(n):",
    "Java": "public class Factorial {\n    public static int factorial(int n) {",
    "Go": "package main\nimport \"fmt\"\n\nfunc isPrime(n int) bool {",
    "React (JSX)": "import React, { useState } from 'react';\n\nfunction Counter() {"
}

# ✅ Run generation for each language
for lang, prompt in prompts.items():
    print(f"\n🧠 {lang} Prompt:\n{prompt}")
    code = generate_code(prompt)
    print(f"\n💻 {lang} Output:\n{code}")
