In [1]:
import huggingface_hub as hf_hub
from datasets import load_dataset
import pandas as pd

In [2]:
# Login using huggingface token to access this dataset
ds = load_dataset("newfacade/LeetCodeDataset", split="train")


In [3]:
df = pd.DataFrame(ds)
df.head()
df.to_csv("LeetCodeDataset.csv", index=False)

In [None]:
df_small = df.head(10)

In [None]:
import os

test_cases_dir = "../test_cases"
os.makedirs(test_cases_dir, exist_ok=True)

unique_prompts = df['prompt'].unique()

utils_py_path = os.path.join(test_cases_dir, "utils.py")

with open(utils_py_path, 'w', encoding='utf-8') as f:
    f.write("# Combined LeetCode Problem Utils\n")

    f.write(str(unique_prompts[0]))
    f.write("\n\n")

print(f"Created combined utils.py at: {os.path.abspath(utils_py_path)}")


In [None]:
os.makedirs(test_cases_dir, exist_ok=True)

for index, row in df.iterrows():
    task_id = row['task_id']
    test_code = row['test']
    
    filename = f"{task_id}.py"
    filepath = os.path.join(test_cases_dir, filename)
    
    with open(filepath, 'w', encoding='utf-8') as f:

        # Add accuracy calculation wrapper
        f.write("def calculate_accuracy(candidate):\n")
        f.write("    \"\"\"\n")
        f.write("    Calculate accuracy by running all test cases and counting pass/fail\n")
        f.write("    Returns: (passed_count, total_count, accuracy_percentage)\n")
        f.write("    \"\"\"\n")
        f.write("    passed = 0\n")
        f.write("    total = 0\n")
        f.write("    \n")
        
        # Parse the original test code to extract individual assertions
        lines = test_code.split('\n')
        for line in lines:
            line = line.strip()
            if line.startswith('assert candidate(') or line.startswith('assert'):
                # Extract the assertion and convert to try/except
                assertion = line[7:]  # Remove 'assert '
                f.write(f"    total += 1\n")
                f.write(f"    try:\n")
                f.write(f"        result = {assertion}\n")
                f.write(f"        if result:\n")
                f.write(f"            passed += 1\n")
                # f.write(f"        else:\n")
                # f.write(f"            print(f'Failed: {assertion}, got {{result}}')\n")
                f.write(f"    except Exception as e:\n")
                f.write(f"        print(f'Error in {assertion}: {{e}}')\n")
                f.write(f"    \n")
        
        f.write("    accuracy = (passed / total * 100) if total > 0 else 0\n")
        f.write("    return passed, total, accuracy\n\n")
        
        # Keep the original check function
        f.write(test_code)
        f.write("\n\n")

        
    

In [5]:
df_questions = df[['task_id', 'problem_description', 'starter_code', 'entry_point', 'query']].drop_duplicates().reset_index(drop=True)
df_questions.head()
df_questions.to_csv("LeetCodeQuestions.csv", index=False)

In [6]:
df_questions = pd.read_csv("LeetCodeQuestions.csv")
df_questions.head()

Unnamed: 0,task_id,problem_description,starter_code,entry_point,query
0,two-sum,Given an array of integers nums and an integer...,"class Solution:\n def twoSum(self, nums: Li...",Solution().twoSum,You are an expert Python programmer. You will ...
1,add-two-numbers,You are given two non-empty linked lists repre...,# Definition for singly-linked list.\n# class ...,Solution().addTwoNumbers,You are an expert Python programmer. You will ...
2,longest-substring-without-repeating-characters,"Given a string s, find the length of the longe...",class Solution:\n def lengthOfLongestSubstr...,Solution().lengthOfLongestSubstring,You are an expert Python programmer. You will ...
3,median-of-two-sorted-arrays,Given two sorted arrays nums1 and nums2 of siz...,class Solution:\n def findMedianSortedArray...,Solution().findMedianSortedArrays,You are an expert Python programmer. You will ...
4,longest-palindromic-substring,"Given a string s, return the longest palindrom...",class Solution:\n def longestPalindrome(sel...,Solution().longestPalindrome,You are an expert Python programmer. You will ...
