In [1]:
from openai import OpenAI
import time
import os
from dotenv import load_dotenv

load_dotenv()

In [7]:
import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_KEY'))

def run_gemini(prompt):
  # Create the model
  generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
  }

  model = genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    generation_config=generation_config,
  )

  chat_session = model.start_chat(
    history=[
      {
        "role": "user",
        "parts": [
          "Você vai receber um código com um problema lógico de progamação e seu papel é retornar apenas o número da linha com defeito",
        ],
      },
    ]
  )

  response = chat_session.send_message(prompt)

  return response.text

In [38]:
from tenacity import retry, stop_after_attempt, wait_fixed

client = OpenAI(
  api_key=os.getenv('GPT_KEY'),
)

@retry(stop=stop_after_attempt(2), wait=wait_fixed(10))
def get_answer(fault_code):
    completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "Você vai receber um código com um problema lógico de progamação e seu papel é retornar apenas o número da linha com defeito"},
        {"role": "user", "content": fault_code}
        ]
    )
    return completion.choices[0].message.content

Download e processamento do dataset

In [182]:
df = pd.read_csv("sampled_dataset.csv")

In [183]:
df.columns

Index(['ID', 'Pasta', 'faultyVersion', 'correctVersion', 'faultLocation',
       'Dificuldade', 'Dir', 'Date', 'Difficulty'],
      dtype='object')

In [188]:
df.head()

Unnamed: 0,ID,Pasta,faultyVersion,correctVersion,faultLocation,Dificuldade,Dir,Date,Difficulty
0,54981846,abc354_a,H = int(input())\nP = 0\n\nfor i in range(H):\...,H = int(input())\nP = 0\n\nfor i in range(H + ...,4,a,abc354_a/54981846,2024-05-18,21
1,44877017,abc242_a,"#!/usr/bin/env python3\n\nimport sys\n\na, b, ...","#!/usr/bin/env python3\n\nimport sys\n\na, b, ...",9,a,abc242_a/44877017,2022-03-05,37
2,44815699,abc229_b,"import sys\n\na, b = input().split()\n\nn = mi...","import sys\n\na, b = input().split()\n\nn = mi...",7,b,abc229_b/44815699,2021-11-27,42
3,45536785,abc261_a,"l1,r1,l2,r2=map(int,input().split())\nans=0\nf...","l1,r1,l2,r2=map(int,input().split())\nans=0\nf...",3,a,abc261_a/45536785,2022-07-23,51
4,54238360,abc323_b,"N = int(input())\nS = [""""] * N\nfor i in range...","N = int(input())\nS = [""""] * N\nfor i in range...",0,b,abc323_b/54238360,2023-10-07,75


In [187]:
df = df.sort_values(by='Difficulty').reset_index(drop=True)

In [169]:
def add_line_numbers(code_str):

    code_str = code_str.replace('\\n', '\n')
    lines = code_str.split('\n')
    numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
    return '\n'.join(numbered_lines)

df['numberedCode'] = df['faultyVersion'].apply(add_line_numbers)

In [170]:
print(df['faultyVersion'][5])

N = int(input())
list=[]
for i in range(N):
  if (i+1) % 3 == 0:
    list.append("x")
  else:
    list.append("o")
print(" ".join(list))


In [171]:
print(df['numberedCode'][5])

1: N = int(input())
2: list=[]
3: for i in range(N):
4:   if (i+1) % 3 == 0:
5:     list.append("x")
6:   else:
7:     list.append("o")
8: print(" ".join(list))


Rodar poucas iterações para testes

In [172]:
count = 0
for index, row in df.iterrows():
    count+=1
    if count > 3:
        break
    print("Iteração:", count)
    fault_code = row['numberedCode']
    while True: 
        try:
            answer = get_answer(fault_code) 
            df.at[index, 'Answer'] = answer 
            break  
        except ValueError as e:
            print(f"Erro detectado: {e}. Tentando novamente em 2 segundos...")
            time.sleep(2)

Iteração: 1
Iteração: 2
Iteração: 3


In [192]:
df.head()

Unnamed: 0,ID,Pasta,faultyVersion,correctVersion,faultLocation,Dificuldade,Dir,Date,Difficulty
0,54981846,abc354_a,H = int(input())\nP = 0\n\nfor i in range(H):\...,H = int(input())\nP = 0\n\nfor i in range(H + ...,4,a,abc354_a/54981846,2024-05-18,21
1,44877017,abc242_a,"#!/usr/bin/env python3\n\nimport sys\n\na, b, ...","#!/usr/bin/env python3\n\nimport sys\n\na, b, ...",9,a,abc242_a/44877017,2022-03-05,37
2,44815699,abc229_b,"import sys\n\na, b = input().split()\n\nn = mi...","import sys\n\na, b = input().split()\n\nn = mi...",7,b,abc229_b/44815699,2021-11-27,42
3,45536785,abc261_a,"l1,r1,l2,r2=map(int,input().split())\nans=0\nf...","l1,r1,l2,r2=map(int,input().split())\nans=0\nf...",3,a,abc261_a/45536785,2022-07-23,51
4,54238360,abc323_b,"N = int(input())\nS = [""""] * N\nfor i in range...","N = int(input())\nS = [""""] * N\nfor i in range...",0,b,abc323_b/54238360,2023-10-07,75
