In [2]:
from transformers import AutoTokenizer, T5ForConditionalGeneration
import torch

model_path = "codefix-model"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


T5ForConditionalGeneration(
  (shared): Embedding(32100, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32100, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [4]:
import re

def format_c_code(code):
    for_statements = re.findall(r'for\s*\(.*?\)', code)
    protected = {}
    
    for i, stmt in enumerate(for_statements):
        key = f"__FOR_STMT_{i}__"
        protected[key] = stmt
        code = code.replace(stmt, key)

    code = re.sub(r'\s*;\s*', ';\n', code)
    code = re.sub(r'\s*{\s*', ' {\n', code)
    code = re.sub(r'\s*}\s*', '\n}\n', code)

    for key, stmt in protected.items():
        code = code.replace(key, stmt)

    lines = code.splitlines()
    formatted_lines = []
    indent = 0
    for line in lines:
        line = line.strip()
        if line == '}':
            indent -= 1
        formatted_lines.append('    ' * indent + line)
        if line.endswith('{'):
            indent += 1

    return '\n'.join(formatted_lines)


In [6]:
def correct_c_code(buggy_code):
    model.eval()
    inputs = tokenizer(buggy_code, return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=256)

    output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return format_c_code(output)


In [12]:
buggy = '#include <stdio.h> int main() { for int i=0; i<10 i++) { printf("%d", i) } return 0; }'
print("🔧 Fixed Code:\n", correct_c_code(buggy))


🔧 Fixed Code:
 #include <stdio.h> int main() {
    for (int i=0; i<10; i++) {
        printf("%d", i);
    }
    printf("%d", i);
}
return 0;
