In [1]:
import os
import json
import requests
from typing import Dict, Optional, Tuple
import time
import re
import random
from collections import defaultdict

# Set your OpenAI API key here 
OPENAI_API_KEY = "***********************************************"

# Supported programming languages list 
SUPPORTED_LANGUAGES = {
    "cpp": "C++",
    "java": "Java",
    "python": "Python",
    "python3": "Python3",
    "c": "C",
    "csharp": "C#",
    "javascript": "JavaScript",
    "typescript": "TypeScript",
    "php": "PHP",
    "swift": "Swift",
    "kotlin": "Kotlin",
    "dart": "Dart",
    "go": "Go",
    "ruby": "Ruby",
    "scala": "Scala",
    "rust": "Rust",
    "racket": "Racket",
    "erlang": "Erlang",
    "elixir": "Elixir",
    "cangjie": "Cangjie"
}

class LeetCodeSolutionGenerator:
    def __init__(self, api_key: str, language: str):
        """
        Initialize solution generator
        
        Args:
            api_key (str): OpenAI API key
            language (str): Target programming language
        """
        if language not in SUPPORTED_LANGUAGES:
            raise ValueError(f"Unsupported language: {language}. Supported languages: {', '.join(SUPPORTED_LANGUAGES.keys())}")
            
        self.api_key = api_key
        self.language = language.lower()
        self.language_name = SUPPORTED_LANGUAGES[language]
        self.api_url = "https://api.openai.com/v1/chat/completions"
        self.headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}"
        }
        
        # File extension mapping
        self.FILE_EXTENSIONS = {
            "erlang": "erl"
        }
        
        # Initialize statistics
        self.total_problems = 0
        self.generated_problems = 0
        self.error_problems = 0
        self.skipped_problems = 0
        self.problem_details = []
    
    def _read_problem(self, file_path: str) -> Dict:
        """Read and parse LeetCode problem JSON file"""
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _generate_prompt(self, problem_data: Dict) -> str:
        """Generate prompt for OpenAI API based on problem data"""
        title = problem_data.get('title', '')
        description = problem_data.get('description', '')
        
        # Generate specific prompt for different languages
        prompt = f"Please provide a {self.language_name} solution code for LeetCode problem '{title}'. Only provide code that can run directly on the LeetCode platform, do not include any explanations or comments.\n\nProblem description:\n{description}"
        
        return prompt
    
    def _extract_code(self, solution: str) -> str:
        """Extract pure code from generated answer (enhanced cleanup logic)"""
        # Try to find code block boundaries
        code_start = -1
        code_end = -1
        
        # Find possible code block start markers (support multiple variants)
        start_patterns = [
            r"```\w*\n",        # Standard code block marker (```erlang\n)
            r"```\n",           # Code block without language marker (```\n)
            r"'''.*?\n",        # Python multiline string marker
            r'""".*?\n',        # Python multiline string marker
            r"//\s*code start\s*\n", # Comment marker
            r"#\s*code start\s*\n"   # Python comment marker
        ]
        
        for pattern in start_patterns:
            match = re.search(pattern, solution)
            if match:
                code_start = match.end()
                break
        
        # Find possible code block end markers
        end_patterns = [
            r"\n```\s*$",        # Standard end marker
            r"\n```\s*\n",       # Standard end marker followed by newline
            r"\n'''\s*$",        # Python end marker
            r'\n"""\s*$',        # Python end marker
            r"\n//\s*code end\s*$", # Comment end marker
            r"\n#\s*code end\s*$"   # Python comment end marker
        ]
        
        for pattern in end_patterns:
            match = re.search(pattern, solution)
            if match:
                code_end = match.start()
                break
        
        # Extract main code section
        if code_start >= 0 and code_end >= 0 and code_end > code_start:
            solution = solution[code_start:code_end].strip()
        elif code_start >= 0:
            solution = solution[code_start:].strip()
        elif code_end >= 0:
            solution = solution[:code_end].strip()
        
        # Remove possible language identification lines (more comprehensive pattern)
        solution = re.sub(r'^(//|#|--|<!--|/\*|\*)\s*(language|code|implementation|solution)\s*:\s*\w*\s*$', 
                         '', solution, flags=re.MULTILINE | re.IGNORECASE)
        
        # Remove all comments (depending on language)
        if self.language == "erlang":
            # Erlang uses % for single-line comments
            solution = re.sub(r'^%.*\n?', '', solution, flags=re.MULTILINE)
        
        # Remove extra blank lines
        solution = re.sub(r'\n\s*\n', '\n\n', solution)
        
        # Remove final explanatory text (if any)
        solution = re.split(r'\n(?:This code|Code explanation|Explanation|note:|Note:)', solution, flags=re.IGNORECASE)[0]
        
        return solution.strip()
    
    def _call_openai_api(self, prompt: str) -> Tuple[Optional[str], Optional[str]]:
        """Call OpenAI API to generate solution"""
        max_retries = 15
        retry_count = 0
        error_details = []
        
        # Exponential backoff parameters
        base_delay = 5
        max_delay = 120
        
        while retry_count < max_retries:
            try:
                payload = {
                    "model": "gpt-4o-mini",
                    "messages": [
                        {
                            "role": "system",
                            "content": "You are a professional programming assistant specialized in generating runnable solution code for LeetCode problems. Do not include any explanations or comments, only provide code."
                        },
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    "temperature": 0.7,
                    "max_tokens": 2000
                }
                
                # Increase timeout and add retry adapter
                session = requests.Session()
                session.mount('https://', requests.adapters.HTTPAdapter(max_retries=3))
                
                response = session.post(
                    self.api_url,
                    headers=self.headers,
                    json=payload,
                    timeout=(20, 60)  # Increase timeout: connect 20s, read 60s
                )
                
                if response.status_code == 200:
                    # Parse OpenAI API response
                    response_data = response.json()
                    if 'choices' in response_data and response_data['choices']:
                        message = response_data['choices'][0]['message']
                        if message and 'content' in message:
                            return message['content'], None
                    return None, "OpenAI API returned empty response"
                else:
                    error_msg = f"HTTP error: {response.status_code} - {response.text[:100] if response.text else 'No response content'}"
                    print(f"Error calling OpenAI API (attempt {retry_count+1}/{max_retries}): {error_msg}")
                    error_details.append(error_msg)
            
            except requests.exceptions.Timeout as e:
                error_msg = f"Request timeout: {str(e)}"
                print(f"OpenAI API request timeout (attempt {retry_count+1}/{max_retries}): {error_msg}")
                error_details.append(error_msg)
            except requests.exceptions.ConnectionError as e:
                error_msg = f"Connection error: {str(e)}"
                print(f"OpenAI API connection error (attempt {retry_count+1}/{max_retries}): {error_msg}")
                error_details.append(error_msg)
            except Exception as e:
                error_msg = f"Exception: {str(e)}"
                print(f"Exception when calling OpenAI API (attempt {retry_count+1}/{max_retries}): {error_msg}")
                error_details.append(error_msg)
            
            # Exponential backoff + random jitter
            delay = min(base_delay * (2 ** retry_count), max_delay)
            jitter = random.uniform(0, 1)
            sleep_time = delay + jitter
            
            print(f"Waiting {sleep_time:.2f} seconds before retry...")
            time.sleep(sleep_time)
            retry_count += 1
        
        final_error = f"OpenAI API call failed, reached maximum retry attempts ({max_retries})\nError details:\n" + "\n".join(error_details)
        print(final_error)
        return None, final_error
    
    def _get_problem_slug(self, problem_file: str) -> str:
        """Extract problem slug from filename"""
        filename = os.path.basename(problem_file)
        filename = os.path.splitext(filename)[0]
        if '-' in filename:
            return filename.split('-', 1)[1]
        return filename
    
    def _solution_exists(self, problem_name: str, output_dir: str) -> bool:
        """Check if solution already exists"""
        ext = self.FILE_EXTENSIONS.get(self.language, "txt")
        solution_file = os.path.join(output_dir, f"{problem_name}.{ext}")
        return os.path.exists(solution_file)
    
    def process_problem(self, problem_file: str, output_dir: str) -> bool:
        """
        Process a single LeetCode problem: only generate solution (no submission)
        
        Args:
            problem_file (str): Path to problem JSON file
            output_dir (str): Directory to save solutions
            
        Returns:
            bool: True if successful, False if failed
        """
        self.total_problems += 1
        problem_name = os.path.splitext(os.path.basename(problem_file))[0]
        problem_details = {
            "problem": problem_name,
            "status": "Not processed",
            "details": "N/A"
        }
        
        try:
            # Check if solution already exists
            if self._solution_exists(problem_name, output_dir):
                self.skipped_problems += 1
                problem_details["status"] = "Already exists, skipped"
                print(f"Solution already exists, skipping problem: {problem_name}")
                return True
            
            # Read problem data
            problem_data = self._read_problem(problem_file)
            problem_title = problem_data.get('title', problem_name)
            
            # Generate prompt
            prompt = self._generate_prompt(problem_data)
            
            # Get solution from OpenAI (with retry mechanism)
            solution, api_error = self._call_openai_api(prompt)
            
            if solution:
                # Extract pure code
                solution = self._extract_code(solution)
                
                # Create output directory if it doesn't exist
                os.makedirs(output_dir, exist_ok=True)
                
                # Generate output filename
                ext = self.FILE_EXTENSIONS.get(self.language, "txt")
                output_file = os.path.join(output_dir, f"{problem_name}.{ext}")
                
                # Save solution
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(solution)
                
                print(f"Successfully generated solution: {output_file}")
                self.generated_problems += 1
                problem_details["status"] = "Generated successfully"
                
                return True
            else:
                error_msg = f"Failed to generate solution: {api_error if api_error else 'Unknown error'}"
                print(error_msg)
                self.error_problems += 1
                problem_details["status"] = "Generation failed"
                problem_details["details"] = api_error if api_error else "OpenAI API call failed"
                return False
                
        except Exception as e:
            import traceback
            error_msg = f"Error processing {problem_file}: {str(e)}\nException stack: {traceback.format_exc()}"
            print(error_msg)
            self.error_problems += 1
            problem_details["status"] = f"Processing exception: {str(e)}"
            problem_details["details"] = error_msg
            return False
        finally:
            self.problem_details.append(problem_details)
            
            # Print current progress
            print(f"\nCurrent progress: {self.total_problems} problems")
            print(f"Generated: {self.generated_problems} problems, Errors: {self.error_problems} problems, Skipped: {self.skipped_problems} problems")
    
    def process_all_problems(self, problems_dir: str, output_dir: str) -> None:
        """
        Process all LeetCode problems in directory (only generate solutions)
        
        Args:
            problems_dir (str): Directory containing problem JSON files
            output_dir (str): Directory to save solutions
        """
        start_time = time.time()
        
        # Get list of problem files and shuffle randomly
        problem_files = [f for f in os.listdir(problems_dir) if f.endswith('.json')]
        random.shuffle(problem_files)  # Randomize problem order
        total_files = len(problem_files)
        print(f"Found {total_files} problem files, will process in random order")
        
        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)
        
        for i, filename in enumerate(problem_files):
            problem_file = os.path.join(problems_dir, filename)
            print(f"\n{'='*50}")
            print(f"Processing: {filename} ({i+1}/{total_files})")
            self.process_problem(problem_file, output_dir)
            # Add processing interval to avoid rate limiting
            time.sleep(5)
        
        # Print statistics after processing is complete
        self.print_statistics(start_time)
    
    def print_statistics(self, start_time: float) -> None:
        """Print statistics of processing results"""
        elapsed_time = time.time() - start_time
        minutes, seconds = divmod(elapsed_time, 60)
        
        print("\n" + "="*50)
        print(f"LeetCode {self.language_name} Solution Generation Statistics Report")
        print("="*50)
        print(f"Total problems: {self.total_problems}")
        print(f"Generated problems: {self.generated_problems}")
        print(f"Skipped problems: {self.skipped_problems}")
        print(f"Error problems: {self.error_problems}")
        
        print("\nProblem status distribution:")
        for problem in self.problem_details:
            print(f"- {problem['problem']}: {problem['status']}")
        
        print("\nError details:")
        for problem in self.problem_details:
            if problem['status'] not in ["Generated successfully", "Already exists, skipped"]:
                print(f"\nProblem: {problem['problem']}")
                print(f"Status: {problem['status']}")
                if problem['details'] != "N/A":
                    print(f"Details: {problem['details']}")
        
        print(f"\nTotal time: {int(minutes)} minutes {int(seconds)} seconds")
        print("="*50)

def main():
    # Check if API key is set
    if OPENAI_API_KEY == "sk-************************************************":
        print("Please set your OpenAI API key in the OPENAI_API_KEY variable at the top of the file")
        return
    
    # Languages to process (only Erlang)
    languages = ["erlang"]
    
    # Create solution directories for each language and process all problems
    for lang in languages:
        print(f"\n{'#'*60}")
        print(f"Starting {SUPPORTED_LANGUAGES[lang]} solution processing")
        print(f"{'#'*60}")
        
        # Initialize generator
        generator = LeetCodeSolutionGenerator(
            api_key=OPENAI_API_KEY,
            language=lang
        )
        
        # Set output directory
        output_dir = f"{lang}_solutions"
        
        try:
            generator.process_all_problems("leetcode_data", output_dir)
        except KeyboardInterrupt:
            print(f"\n{SUPPORTED_LANGUAGES[lang]} processing interrupted, printing current statistics...")
            generator.print_statistics(time.time())
        except Exception as e:
            print(f"{SUPPORTED_LANGUAGES[lang]} processing error: {str(e)}")
            generator.print_statistics(time.time())
        
        # Wait for a while before processing next language
        time.sleep(10)

if __name__ == "__main__":
    main()


############################################################
Starting C++ solution processing
############################################################
Found 3011 problem files, will process in random order

Processing: 2760-longest-even-odd-subarray-with-threshold.json (1/3011)
Successfully generated solution: cpp_solutions/2760-longest-even-odd-subarray-with-threshold.cpp

Current progress: 1 problems
Generated: 1 problems, Errors: 0 problems, Skipped: 0 problems

Processing: 3417-zigzag-grid-traversal-with-skip.json (2/3011)
Successfully generated solution: cpp_solutions/3417-zigzag-grid-traversal-with-skip.cpp

Current progress: 2 problems
Generated: 2 problems, Errors: 0 problems, Skipped: 0 problems

Processing: 48-rotate-image.json (3/3011)
Successfully generated solution: cpp_solutions/48-rotate-image.cpp

Current progress: 3 problems
Generated: 3 problems, Errors: 0 problems, Skipped: 0 problems

Processing: 1206-design-skiplist.json (4/3011)
Successfully generated solutio