In [2]:
%%capture
%pip install -U transformers accelerate

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextStreamer
import torch


base_model = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"

tokenizer = AutoTokenizer.from_pretrained(base_model)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    return_dict=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

Device set to use cuda:0


In [6]:
messages = [{"role": "user", "content": "Who is Vincent van Gogh?"}]

prompt = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

outputs = pipe(prompt, max_new_tokens=120, do_sample=True)

print(outputs[0]["generated_text"])

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Who is Vincent van Gogh?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Vincent van Gogh (1853-1890) was a Dutch post-impressionist painter, widely considered one of the greatest painters in history. He is famous for his bold, expressive, and emotionally charged works of art, which continue to inspire and captivate people around the world.

**Early Life and Career**

Born in Groot-Zundert, Netherlands, Van Gogh was the eldest son of a Protestant pastor. He was a quiet, introverted child who suffered from anxiety and depression. After struggling with his faith and career, he became a teacher and later a pastor, but


In [7]:
import pandas as pd
import torch
import os

# Load the Excel file
file_path = '/kaggle/input/sample19/CSV to XLSX Test Data.xlsx'
data = pd.read_excel(file_path)

def extract_corrected_code(response):
    lines = response.split('\n')
    in_code_block = False
    current_block = []
    code_blocks = []
    
    for line in lines:
        if line.strip().startswith("```"):
            # Toggle code block state
            if not in_code_block:
                in_code_block = True
                current_block = []
            else:
                in_code_block = False
                # End of code block, save it
                code_blocks.append('\n'.join(current_block))
        else:
            if in_code_block:
                current_block.append(line)
    
    # Return the last code block if any found
    if code_blocks:
        return code_blocks[-1].strip()
    return ""

def query_model_for_correction(buggy_code, buggy_line):
    messages = [
        {"role": "system", "content": """You are a code fixing assistant. Your task is to identify and correct bugs in Python code.

Example:
Buggy code:
def is_prime(num):
    if num <= 1:
        return False
    for i in range(2, int(num ** 0.5) + 1):
        if num % i === 0:
            return False
    return True

Bug location: if num % i === 0:
Fixed code:
def is_prime(num):
    if num <= 1:
        return False
    for i in range(2, int(num ** 0.5) + 1):
        if num % i == 0:
            return False
    return True

Your task is to carefully analyze the given buggy code, identify the specific problematic line, and generate the correct, optimized version of the code."""},
        {"role": "user", "content": f"Buggy Code:\n{buggy_code}\n\nBuggy Line:\n{buggy_line}\n\nPlease provide the corrected version of this code."}
    ]
   
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")
   
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1024,
            do_sample=True,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id
        )
   
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

results = []

for idx, row in data.iterrows():
    print(f"Processing Snippet {idx + 1}...")
   
    buggy_code = row['Buggy Code']
    buggy_line = row['Buggy Line']
   
    if pd.isna(buggy_code) or pd.isna(buggy_line):
        continue
   
    response = query_model_for_correction(buggy_code, buggy_line)
    print(f"Raw Model Response for Snippet {idx + 1}:\n{response}\n{'-' * 50}")
   
    try:
        corrected_code = extract_corrected_code(response)
        if corrected_code:
            results.append({
                'Buggy Code': buggy_code,
                'Buggy Line': buggy_line,
                'Model Generated Correction': corrected_code
            })
            print(f"Corrected Code for Snippet {idx + 1}:\n{corrected_code}\n{'-' * 50}")
        else:
            print(f"No correction found for snippet {idx + 1}")
            results.append({
                'Buggy Code': buggy_code,
                'Buggy Line': buggy_line,
                'Model Generated Correction': 'No correction found'
            })
    except Exception as e:
        print(f"Error processing snippet {idx + 1}: {str(e)}")
        results.append({
            'Buggy Code': buggy_code,
            'Buggy Line': buggy_line,
            'Model Generated Correction': 'Error in generation'
        })

results_df = pd.DataFrame(results)
output_path = "/kaggle/working/Model_Generated_Corrections_New.xlsx"
results_df.to_excel(output_path, index=False, sheet_name="Corrections")
print(f"Model generated corrections saved to: {output_path}")

Processing Snippet 1...
Raw Model Response for Snippet 1:
system

You are a code fixing assistant. Your task is to identify and correct bugs in Python code.

Example:
Buggy code:
def is_prime(num):
    if num <= 1:
        return False
    for i in range(2, int(num ** 0.5) + 1):
        if num % i === 0:
            return False
    return True

Bug location: if num % i === 0:
Fixed code:
def is_prime(num):
    if num <= 1:
        return False
    for i in range(2, int(num ** 0.5) + 1):
        if num % i === 0:
            return False
    return True

Your task is to carefully analyze the given buggy code, identify the specific problematic line, and generate the correct, optimized version of the code.user

Buggy Code:
def pascal(n):
    rows = [[1]]
    for r in range(1, n):
        row = []
        for c in range(0, r + 1):
            upleft = rows[r - 1][c - 1] if c > 0 else 0
            upright = rows[r - 1][c - 1] if c < r else 0
            row.append(upleft + upright)
      

In [8]:
import smtplib
from email.message import EmailMessage

# Email Configuration
sender_email = "siddharthyadav555@gmail.com"  # Replace with your email
sender_password = "lsso oekq txis gzng"        # Replace with your email password or app-specific password
recipient_email = "siddharthyadav555@gmail.com"  # Replace with the recipient's email
file_path = "/kaggle/working/Buggy_Code_Model_Responses.xlsx"  # Replace with your file path

# Create the Email
msg = EmailMessage()
msg["Subject"] = "Buggy Code Model Responses"
msg["From"] = sender_email
msg["To"] = recipient_email
msg.set_content("Attached is the file containing the Buggy Code Model Responses.")

# Attach the File
try:
    with open(file_path, "rb") as f:
        file_data = f.read()
        file_name = file_path.split("/")[-1]
        msg.add_attachment(file_data, maintype="application", subtype="vnd.openxmlformats-officedocument.spreadsheetml.sheet", filename=file_name)
except FileNotFoundError:
    print(f"File not found: {file_path}")
    exit()

# Send the Email
try:
    with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:  # Gmail's SMTP server
        smtp.login(sender_email, sender_password)
        smtp.send_message(msg)
    print(f"Email sent successfully to {recipient_email}")
except Exception as e:
    print(f"Failed to send email: {e}")

File not found: /kaggle/working/Buggy_Code_Model_Responses.xlsx
Email sent successfully to siddharthyadav555@gmail.com


In [8]:
pip install emails

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting emails
  Downloading emails-0.6-py2.py3-none-any.whl.metadata (2.1 kB)
Collecting cssutils (from emails)
  Downloading cssutils-2.11.1-py3-none-any.whl.metadata (8.7 kB)
Collecting chardet (from emails)
  Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)
Collecting premailer (from emails)
  Downloading premailer-3.10.0-py2.py3-none-any.whl.metadata (15 kB)
Collecting cssselect (from premailer->emails)
  Downloading cssselect-1.2.0-py2.py3-none-any.whl.metadata (2.2 kB)
Downloading emails-0.6-py2.py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.2/56.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading chardet-5.2.0-py3-none-any.whl (199 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.4/199.4 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cssutils-2.11.1-py3-none-any.whl (385 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m385.7/385.7 kB[0m [31m17.0 MB/s