In [1]:
# ============================================================================
# Util code : Restart the kernel
# ============================================================================
import IPython
#IPython.Application.instance().kernel.do_shutdown(True)

In [2]:
# ============================================================================
# Install Required Libraries
# ============================================================================
!pip install -q --upgrade bitsandbytes accelerate transformers

In [3]:
# ============================================================================
# Import Libraries
# ============================================================================
import torch
import json
from IPython.display import Markdown, display, update_display
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


from google.colab import drive
drive.mount('/content/drive/')

import os
# List all notebooks in the folder
notebook_folder = '/content/drive/MyDrive/Colab_Notebooks'
os.listdir(notebook_folder)


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


['Untitled0.ipynb',
 'Copy of Preprocessing.ipynb',
 'Untitled',
 'Untitled1.ipynb',
 'Untitled2.ipynb',
 'Untitled3.ipynb',
 'Untitled4.ipynb',
 'Copy of starcoder2-3b.ipynb',
 'starcoder2-7b.ipynb',
 'Dataset.json',
 'Output_MultiShot.json',
 'IR_NLTK_Preprocessing (1).ipynb',
 'Output_ZeroShot.json',
 'Starcoder2-7b v4.ipynb']

In [4]:
# ============================================================================
# Load StarCoder2 Model
# ============================================================================

# Quantization Config - this allows us to load the model into memory and use less memory
quant_config = BitsAndBytesConfig(
    load_in_4bit=True, # quantization for huge memory saving
    bnb_4bit_use_double_quant=True, # double quantization for a bit more memory saving
    bnb_4bit_compute_dtype=torch.bfloat16, # std good practice
    bnb_4bit_quant_type="nf4" # minor perfromance improvement
)

model_name = "bigcode/starcoder2-7b"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token # std good practice

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto"  # Better than .to("cuda") for quantized models
)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
# ============================================================================
# Generate Function
# ============================================================================
def generate(full_prompt):
  # tokenize the prompt (no chat template)
  input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to("cuda")
  attention_mask = torch.ones_like(input_ids, dtype=torch.long, device="cuda")

  outputs = model.generate(
      input_ids=input_ids,
      attention_mask=attention_mask,
      max_new_tokens=128,
      temperature=0.2,  # lower for consistency
      top_p=0.9,  # Tighter sampling
      do_sample=True,  # Required when using temperature
      pad_token_id=tokenizer.eos_token_id
  )
  full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
  # Extract only the newly generated explanation
  response = full_text.split("Output:")[-1].split("---")[0].strip()

  return response




In [6]:
# Load Dataset from Drive
with open('/content/drive/MyDrive/Colab_Notebooks/Dataset.json', 'r') as f:
    dataset = json.load(f)

In [7]:
# ============================================================================
# Create Prompt Function
# ============================================================================
def create_prompt(sample, examples=0):

    # Enhanced system message for code generation
    prompt = """You are an expert code assistant. Generate accurate, clean, and efficient code.

Rules:
- Provide ONLY the code solution
- No explanations before or after the code
- Use proper indentation and formatting
- Follow best practices

"""

    # Add examples if provided (n-shot)
    if examples > 0:
        prompt += "Here are example solutions:\n\n"

        # Collect example demonstrations (avoid using the test sample itself)
        samples = []
        for item in dataset:
            # Skip if this is the current test sample
            if item.get("id") == sample.get("id"):
                continue

            samples.append({
                "instruction": item["instruction"],
                "input": item["input"],
                "output": item["output"]
            })

            if len(samples) >= examples:
                break

        # Add example demonstrations with clear separators
        for i, ex in enumerate(samples, 1):
            prompt += f"""Instruction: {ex["instruction"]}
Input:
{ex["input"]}
Output:
{ex["output"]}

---

"""

        # Add the actual query (without answer)
        prompt += f"""Now solve this:

Instruction: {sample["instruction"]}
Input:
{sample["input"]}
Output:
"""
    else:
        # 0-shot - direct task
        prompt += f"""Instruction: {sample["instruction"]}
Input:
{sample["input"]}
Output:
"""

    return prompt

In [8]:
# ============================================================================
# Zero-Shot Prompt
# ============================================================================
def ZeroShot(max_tests=110):
  print("\n" + "="*80)
  print("StarCoder2 Zero-Shot Prompting")
  print("="*80)
  results = []
  tests_processed = 0

  # Inference Loop
  for item in dataset:
      # Stop when we've processed enough tests
      if tests_processed >= max_tests:
          break

      prediction = generate(create_prompt(item))
      results.append({
          "id": item["id"],
          "input": item["input"],
          "expected": item["output"],
          "actual": prediction
      })
      print(f"Processed ID: {item['id']} ({tests_processed + 1}/{max_tests})")
      tests_processed += 1

  print(f"\nTotal tests processed: {tests_processed}")


  # Save to Drive
  with open('/content/drive/MyDrive/Colab_Notebooks/Output_ZeroShot.json', 'w') as f:
      json.dump(results, f, indent=4)

In [9]:
# ============================================================================
# Multi-Shot Prompting
# ============================================================================
def MultiShot(nShots, max_tests=150):
    """
    Performs multi-shot prompting on the dataset.

    Args:
        nShots: Number of examples to include in the prompt (0 for zero-shot)
        max_tests: Maximum number of test samples to process (default: 10)

    Returns:
        None (saves results to file)
    """
    print("\n" + "="*80)
    print("StarCoder2 Multi-Shot Prompting")
    print("="*80)
    results = []
    tests_processed = 0

    # Inference Loop
    for item in dataset:
        # Skip samples used as examples
        if item["id"] <= nShots:
            continue

        # Stop when we've processed enough tests
        if tests_processed >= max_tests:
            break

        prediction = generate(create_prompt(item, nShots))
        results.append({
            "id": item["id"],
            "input": item["input"],
            "expected": item["output"],
            "actual": prediction
        })
        print(f"Processed ID: {item['id']} ({tests_processed + 1}/{max_tests})")
        tests_processed += 1

    print(f"\nTotal tests processed: {tests_processed}")

    # Save to Drive
    with open('/content/drive/MyDrive/Colab_Notebooks/Output_MultiShot.json', 'w') as f:
        json.dump(results, f, indent=4)

    print("Results saved to Output_MultiShot.json")


In [14]:
def run_custom(input_text="", nShots=0):
    # We do a manual sample
    sample = {
        "id": 10**9,  # A large number so that it is impossible to collide with IDs in the dataset
        "instruction": "Summarize the purpose of this C++ code in one or two sentences.",
        "input": input_text,
        "output": ""
    }

    # We use create_prompt as it is
    prompt = create_prompt(sample, examples=nShots)

    # We generate using generate
    prediction = generate(prompt)

    return prediction

In [15]:
code = run_custom(
    input_text="""int main() {
    int n;
    cin >> n;
    int sum = 0;
    for (int i = 1; i <= n; i++) {
        sum += i;
    }
    cout << sum << endl;
    return 0;
}""",
    nShots=5  # He will use 5 examples from the dataset as n-shot
)
print(code)

This program reads an integer from the standard input and calculates the sum of all integers from 1 to the given number. It demonstrates basic input-output operations and arithmetic operations in C++.
