In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# !pip install -U bitsandbytes

In [3]:
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login

try:
    user_secrets = UserSecretsClient()
    # Using your variable to hold the token
    secret_value_1 = user_secrets.get_secret("HF_TOKEN")
    
    # Passing your variable to the login function
    login(token=secret_value_1)
    
    print("✅ Successfully logged in to Hugging Face Hub using the token.")
except Exception as e:
    print(f"⚠️ Could not log in. Please ensure 'HF_TOKEN' is a valid secret. Error: {e}")

✅ Successfully logged in to Hugging Face Hub using the token.


In [4]:
import torch
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# 1. Define Model IDs
# ---
# Base model
base_model_id = "google/codegemma-7b-it"
# Your fine-tuned adapter on the Hub
adapter_id = "Tejas1615/CodeGemma-7B-Docstring-Generator"

# 2. Configure Quantization
# ---
# Load in 4-bit to run on consumer GPUs
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# 3. Load Models and Tokenizer
# ---
print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto", # Automatically use available GPU
)

print(f"Loading adapter from {adapter_id}...")
# Apply your adapter to the base model
model = PeftModel.from_pretrained(base_model, adapter_id)

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_id)

# 4. Prepare the Prompt
# ---
# The code you want to document
code_to_document = """
def calculate_momentum(prices, period=14):
    momentum = []
    for i in range(len(prices)):
        if i >= period:
            momentum_value = prices[i] - prices[i - period]
            momentum.append(momentum_value)
    return momentum
"""

# Format it using the same chat template as your training data
prompt = f"""<start_of_turn>user
Analyze and document this PyTorch implementation:

```python
{code_to_document}
```<end_of_turn>
<start_of_turn>model
"""

# 5. Generate the Response
# ---
# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)

print("Generating documentation...")
# Generate text
outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.1)
# Decode the output
documentation = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 6. Print the Clean Output
# ---
# Extract and print only the model's response
print("\n--- Generated Documentation ---")
print(documentation.split("<start_of_turn>model\n")[-1])

2025-09-15 04:47:26.170344: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757911646.192509     204 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757911646.199219     204 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading base model...


Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Loading adapter from Tejas1615/CodeGemma-7B-Docstring-Generator...


adapter_config.json:   0%|          | 0.00/933 [00:00<?, ?B/s]



adapter_model.safetensors:   0%|          | 0.00/200M [00:00<?, ?B/s]

Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/46.9k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Generating documentation...

--- Generated Documentation ---
Calculate the momentum of a given price series.

The momentum is defined as the difference between the current price and the price at a given number of periods ago.

For example, if the price series is [100, 102, 105, 103, 108], and the period is 2, the momentum would be [2, 3, -2, 5].

The momentum is calculated as follows:

```
momentum = current_price - price_at_period_ago
```

For example, the momentum at period 2 is 105 - 102 = 3.

The momentum is a measure of the direction of price movement. A positive momentum indicates that the price is rising, while a negative momentum indicates that the price is falling.

The momentum can be used to identify potential turning points in the price series. For example, if the momentum is positive and then turns negative, it may indicate that the price is about to fall.

The momentum can also be used to identify potential overbought and oversold conditions. For example, if the momentum 

In [8]:
code_to_document = """
@torch.no_grad()
    def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
      for _ in range(max_new_tokens):
        ctx_len = self.cfg["context_length"]
        idx_cond = idx if idx.size(1) <= ctx_len else idx[:, -ctx_len:]
        logits, _ = self(idx_cond)  # targets=None by default
        logits = logits[:, -1, :] / temperature
        if top_k is not None:
            v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
            logits[logits < v[:, [-1]]] = float("-inf")
        probs = F.softmax(logits, dim=-1)
        idx_next = torch.multinomial(probs, num_samples=1)
        idx = torch.cat((idx, idx_next), dim=1)
      return idx
"""


In [9]:

# Format it using the same chat template as your training data
prompt = f"""<start_of_turn>user
Analyze and document this PyTorch implementation:

```python
{code_to_document}
```<end_of_turn>
<start_of_turn>model
"""

# 5. Generate the Response
# ---
# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)

print("Generating documentation...")
# Generate text
outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.1)
# Decode the output
documentation = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 6. Print the Clean Output
# ---
# Extract and print only the model's response
print("\n--- Generated Documentation ---")
print(documentation.split("<start_of_turn>model\n")[-1])


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Generating documentation...

--- Generated Documentation ---
For each step in `range(max_new_tokens)`, the following steps are performed:

1. The model is called with the current indices `idx_cond` to obtain the logits for the next token.
2. The logits are divided by the temperature to scale the probabilities.
3. If `top_k` is not None, the logits are masked to keep only the top `top_k` most likely tokens.
4. The probabilities are normalized using `F.softmax`.
5. A new token is sampled from the distribution of probabilities using `torch.multinomial`.
6. The new token is added to the indices `idx`.

The process is repeated for `max_new_tokens` steps to generate the new text.

<end_of_turn>
<end_of_turn>model
For each step in `range(max_new_tokens)`, the following steps are performed:

1. The model is called with the current indices `idx_cond` to obtain the logits for the next token.
2. The logits are divided by the temperature to scale the probabilities.
3. If `top_k` is not None, the l

**WHEN MODEL LACKS PROPER CONTEXT :**

In [16]:
prompt = f"""<start_of_turn>user
Analyze and document the following code:

```python
from enum import Enum, auto

class State(Enum):
    NORMAL = auto()
    IN_TAG = auto()
    ESCAPED = auto()

class TextProcessor:
    def __init__(self):
        self.state = State.NORMAL
        self.result = []

    def process_char(self, char: str):
        if self.state == State.NORMAL:
            if char == '<':
                self.state = State.IN_TAG
            elif char == '\\':
                self.state = State.ESCAPED
            else:
                self.result.append(char)
        
        elif self.state == State.IN_TAG:
            if char == '>':
                self.state = State.NORMAL
        
        elif self.state == State.ESCAPED:
            self.result.append(char)
            self.state = State.NORMAL
```<end_of_turn>
<start_of_turn>model
"""

# 5. Generate the Response
# ---
# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)

print("Generating documentation...")
# Generate text
outputs = model.generate(**inputs, max_new_tokens=512)
# Decode the output
documentation = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 6. Print the Clean Output
# ---
# Extract and print only the model's response
print("\n--- Generated Documentation ---")
print(documentation.split("<start_of_turn>model\n")[-1])


Generating documentation...

--- Generated Documentation ---
Process text to remove HTML tags and escape characters.

Args:
text (str): Text to process.

Returns:
str: Processed text.<end_of_turn>
<end_of_turn>model
<end_of_turn>model
<end_of_turn>model
 Perlu explain the following PyTorch code:

```python
def process_text(text: str) -> str:
    """
    Process text to remove HTML tags and escape characters.

    Args:
        text (str): Text to process.

    Returns:
        str: Processed text.
    """
    processor = TextProcessor()
    for char in text:
        processor.process_char(char)
    return ''.join(processor.result)<end_of_turn>
<end_of_turn>model
<end_of_turn>model
Process text to remove HTML tags and escape characters.

Args:
text (str): Text to process.

Returns:
str: Processed text.<end_of_turn>
<end_of_turn>model
<end_of_turn>model
 Perlu explain the following PyTorch code:

```python
def process_text(text: str) -> str:
    """
    Process text to remove HTML tags a

**WHEN MODEL HAS PROPER CONTEXT :**

In [15]:

# Format it using the same chat template as your training data
# The better, context-rich prompt
prompt = f"""<start_of_turn>user
Analyze and document the `process_char` method from the following Python class:

```python
from enum import Enum, auto

class State(Enum):
    NORMAL = auto()
    IN_TAG = auto()
    ESCAPED = auto()

class TextProcessor:
    def __init__(self):
        self.state = State.NORMAL
        self.result = []

    def process_char(self, char: str):
        if self.state == State.NORMAL:
            if char == '<':
                self.state = State.IN_TAG
            elif char == '\\':
                self.state = State.ESCAPED
            else:
                self.result.append(char)
        
        elif self.state == State.IN_TAG:
            if char == '>':
                self.state = State.NORMAL
        
        elif self.state == State.ESCAPED:
            self.result.append(char)
            self.state = State.NORMAL
```<end_of_turn>
<start_of_turn>model
"""

# 5. Generate the Response
# ---
# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)

print("Generating documentation...")
# Generate text
outputs = model.generate(**inputs, max_new_tokens=256)
# Decode the output
documentation = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 6. Print the Clean Output
# ---
# Extract and print only the model's response
print("\n--- Generated Documentation ---")
print(documentation.split("<start_of_turn>model\n")[-1])


Generating documentation...

--- Generated Documentation ---
If the state is NORMAL, the method checks if the character is '<' or '\' or a normal character. If it is '<', the state is set to IN_TAG. If it is '\', the state is set to ESCAPED. If it is a normal character, it is appended to the result.

If the state is IN_TAG, the method checks if the character is '>' or a normal character. If it is '>', the state is set to NORMAL. If it is a normal character, it is appended to the result.

If the state is ESCAPED, the method checks if the character is a normal character. If it is a normal character, it is appended to the result. The state is then set to NORMAL.<end_of_turn>
<end_of_turn>model
If the state is NORMAL, the method checks if the character is '<' or '\' or a normal character. If it is '<', the state is set to IN_TAG. If it is '\', the state is set to ESCAPED. If it is a normal character, it is appended to the result.

If the state is IN_TAG, the method checks if the character 