In [2]:
import os
import json
import requests
from typing import Dict, Optional, Tuple
import time
import re
import random
from collections import defaultdict

# Set your Gemini API key here
GEMINI_API_KEY = "AIzaSyBneqZajaSGPeyE0Fgic9GuS0FiT2PRjrQ"




# Supported programming languages list
SUPPORTED_LANGUAGES = {
    "cpp": "C++",
    "java": "Java",
    "python": "Python",
    "python3": "Python3",
    "c": "C",
    "csharp": "C#",
    "javascript": "JavaScript",
    "typescript": "TypeScript",
    "php": "PHP",
    "swift": "Swift",
    "kotlin": "Kotlin",
    "dart": "Dart",
    "go": "Go",
    "ruby": "Ruby",
    "scala": "Scala",
    "rust": "Rust",
    "racket": "Racket",
    "erlang": "Erlang",
    "elixir": "Elixir",
    "cangjie": "Cangjie"
}

class LeetCodeSolutionGenerator:
    def __init__(self, api_key: str, language: str = "c"):  # Changed default language to C
        """
        Initialize solution generator
        
        Args:
            api_key (str): Gemini API key
            language (str): Target programming language (default: "c")
        """
        if language not in SUPPORTED_LANGUAGES:
            raise ValueError(f"Unsupported language: {language}. Supported languages: {', '.join(SUPPORTED_LANGUAGES.keys())}")
            
        self.api_key = api_key
        self.language = language.lower()
        self.language_name = SUPPORTED_LANGUAGES[language]
        self.api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"
        self.headers = {
            "Content-Type": "application/json"
        }
        
        # File extension mapping
        self.FILE_EXTENSIONS = {
            "rust": "rs",
            "erlang": "erl",
            "python": "py",
            "cpp": "cpp",
            "java": "java",
            "c": "c",  # C file extension
            "csharp": "cs",
            "javascript": "js",
            "typescript": "ts",
            "php": "php",
            "swift": "swift",
            "kotlin": "kt",
            "dart": "dart",
            "go": "go",
            "ruby": "rb",
            "scala": "scala",
            "racket": "rkt",
            "elixir": "ex",
            "cangjie": "cj"
        }
        
        # Initialize statistics
        self.total_problems = 0
        self.generated_problems = 0
        self.error_problems = 0
        self.skipped_problems = 0
        self.problem_details = []
    
    def _read_problem(self, file_path: str) -> Dict:
        """Read and parse LeetCode problem JSON file"""
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _generate_prompt(self, problem_data: Dict) -> str:
        """Generate prompt for Gemini API based on problem data (optimized simplified version)"""
        title = problem_data.get('title', '')
        description = problem_data.get('description', '')
        
        # Simplified prompt to reduce API processing burden
        prompt = f"Please provide a {self.language_name} solution code for LeetCode problem '{title}'. Only provide code that can run directly on LeetCode platform, do not include any explanations or comments.\n\nProblem Description:\n{description}"
        
        return prompt
    
    def _extract_code(self, solution: str) -> str:
        """Extract pure code from generated answer (enhanced cleaning logic)"""
        # Try to find code block boundaries
        code_start = -1
        code_end = -1
        
        # Find possible code block start markers (support multiple variants)
        start_patterns = [
            r"```\w*\n",        # Standard code block marker (```c\n)
            r"```\n",            # Code block without language marker (```\n)
            r"'''.*?\n",         # Python multi-line string marker
            r'""".*?\n',         # Python multi-line string marker
            r"//\s*code start\s*\n", # English comment marker
            r"#\s*code start\s*\n"   # Python comment marker
        ]
        
        for pattern in start_patterns:
            match = re.search(pattern, solution)
            if match:
                code_start = match.end()
                break
        
        # Find possible code block end markers
        end_patterns = [
            r"\n```\s*$",        # Standard end marker
            r"\n```\s*\n",        # Standard end marker followed by newline
            r"\n'''\s*$",         # Python end marker
            r'\n"""\s*$',         # Python end marker
            r"\n//\s*code end\s*$", # English end marker
            r"\n#\s*code end\s*$"   # Python end marker
        ]
        
        for pattern in end_patterns:
            match = re.search(pattern, solution)
            if match:
                code_end = match.start()
                break
        
        # Extract main code section
        if code_start >= 0 and code_end >= 0 and code_end > code_start:
            solution = solution[code_start:code_end].strip()
        elif code_start >= 0:
            solution = solution[code_start:].strip()
        elif code_end >= 0:
            solution = solution[:code_end].strip()
        
        # Remove possible language identifier lines (more comprehensive patterns)
        solution = re.sub(r'^(//|#|--|<!--|/\*|\*)\s*(language|code|implementation|solution)\s*:\s*\w*\s*$', 
                         '', solution, flags=re.MULTILINE | re.IGNORECASE)
        
        # Remove all comments (based on different languages)
        if self.language in ["python", "python3"]:
            solution = re.sub(r'^#.*\n?', '', solution, flags=re.MULTILINE)
        elif self.language in ["cpp", "java", "c", "csharp", "javascript", "typescript", "go", "rust"]:
            solution = re.sub(r'^//.*\n?', '', solution, flags=re.MULTILINE)
            solution = re.sub(r'/\*.*?\*/', '', solution, flags=re.DOTALL)
        elif self.language == "erlang":
            # Erlang uses % for single-line comments
            solution = re.sub(r'^%.*\n?', '', solution, flags=re.MULTILINE)
        
        # C-specific cleaning
        if self.language == "c":
            # Remove main function (LeetCode doesn't need it)
            solution = re.sub(r'int\s+main\s*\([^)]*\)\s*\{[\s\S]*?\n\}', '', solution)
            # Remove test code
            solution = re.sub(r'#ifdef\s+TEST[\s\S]*?#endif', '', solution)
            # Remove unnecessary includes that might not be available on LeetCode
            solution = re.sub(r'#include\s*<[^>]+>', '', solution)
            # Keep only necessary standard library includes
            if "#include" not in solution:
                solution = "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n" + solution
        
        # Remove extra blank lines
        solution = re.sub(r'\n\s*\n', '\n\n', solution)
        
        # Remove trailing explanatory text (if exists)
        solution = re.split(r'\n(?:This code|Code explanation|Explanation|note:|Note:)', solution, flags=re.IGNORECASE)[0]
        
        return solution.strip()
    
    def _call_gemini_api(self, prompt: str) -> Tuple[Optional[str], Optional[str]]:
        """Call Gemini API to generate solution"""
        max_retries = 15
        retry_count = 0
        error_details = []
        
        # Exponential backoff parameters
        base_delay = 5
        max_delay = 120
        
        while retry_count < max_retries:
            try:
                payload = {
                    "contents": [
                        {
                            "parts": [
                                {"text": prompt}
                            ]
                        }
                    ],
                    "generationConfig": {
                        "temperature": 0.7,
                        "maxOutputTokens": 2000
                    }
                }
                
                # Increase timeout and add retry adapter
                session = requests.Session()
                session.mount('https://', requests.adapters.HTTPAdapter(max_retries=3))
                
                response = session.post(
                    self.api_url,
                    headers=self.headers,
                    json=payload,
                    timeout=(20, 60)  # Increase timeout: connect 20s, read 60s
                )
                
                if response.status_code == 200:
                    # Parse Gemini API response
                    response_data = response.json()
                    if 'candidates' in response_data and response_data['candidates']:
                        parts = response_data['candidates'][0]['content']['parts']
                        if parts:
                            return parts[0]['text'], None
                    return None, "Gemini API returned empty response"
                else:
                    error_msg = f"HTTP error: {response.status_code} - {response.text[:100] if response.text else 'No response content'}"
                    print(f"Error calling Gemini API (attempt {retry_count+1}/{max_retries}): {error_msg}")
                    error_details.append(error_msg)
            
            except requests.exceptions.Timeout as e:
                error_msg = f"Request timeout: {str(e)}"
                print(f"Gemini API request timeout (attempt {retry_count+1}/{max_retries}): {error_msg}")
                error_details.append(error_msg)
            except requests.exceptions.ConnectionError as e:
                error_msg = f"Connection error: {str(e)}"
                print(f"Gemini API connection error (attempt {retry_count+1}/{max_retries}): {error_msg}")
                error_details.append(error_msg)
            except Exception as e:
                error_msg = f"Exception: {str(e)}"
                print(f"Exception occurred while calling Gemini API (attempt {retry_count+1}/{max_retries}): {error_msg}")
                error_details.append(error_msg)
            
            # Exponential backoff + random jitter
            delay = min(base_delay * (2 ** retry_count), max_delay)
            jitter = random.uniform(0, 1)
            sleep_time = delay + jitter
            
            print(f"Waiting {sleep_time:.2f} seconds before retry...")
            time.sleep(sleep_time)
            retry_count += 1
        
        final_error = f"Gemini API call failed, reached maximum retry attempts ({max_retries})\nError details:\n" + "\n".join(error_details)
        print(final_error)
        return None, final_error
    
    def _get_problem_slug(self, problem_file: str) -> str:
        """Extract problem slug from filename"""
        filename = os.path.basename(problem_file)
        filename = os.path.splitext(filename)[0]
        if '-' in filename:
            return filename.split('-', 1)[1]
        return filename
    
    def _solution_exists(self, problem_name: str, output_dir: str) -> bool:
        """Check if solution already exists"""
        ext = self.FILE_EXTENSIONS.get(self.language, "c")
        solution_file = os.path.join(output_dir, f"{problem_name}.{ext}")
        return os.path.exists(solution_file)
    
    def process_problem(self, problem_file: str, output_dir: str) -> bool:
        """
        Process single LeetCode problem: only generate solution (no submission)
        
        Args:
            problem_file (str): Path to problem JSON file
            output_dir (str): Directory to save solutions
            
        Returns:
            bool: True if successful, False if failed
        """
        self.total_problems += 1
        problem_name = os.path.splitext(os.path.basename(problem_file))[0]
        problem_details = {
            "problem": problem_name,
            "status": "Not processed",
            "details": "N/A"
        }
        
        try:
            # Check if solution already exists
            if self._solution_exists(problem_name, output_dir):
                self.skipped_problems += 1
                problem_details["status"] = "Already exists, skipped"
                print(f"Solution already exists, skipping problem: {problem_name}")
                return True
            
            # Read problem data
            problem_data = self._read_problem(problem_file)
            problem_title = problem_data.get('title', problem_name)
            
            # Generate prompt
            prompt = self._generate_prompt(problem_data)
            
            # Get solution from Gemini (with retry mechanism)
            solution, api_error = self._call_gemini_api(prompt)
            
            if solution:
                # Extract pure code
                solution = self._extract_code(solution)
                
                # Create output directory if it doesn't exist
                os.makedirs(output_dir, exist_ok=True)
                
                # Generate output filename
                ext = self.FILE_EXTENSIONS.get(self.language, "c")
                output_file = os.path.join(output_dir, f"{problem_name}.{ext}")
                
                # Save solution
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(solution)
                
                print(f"Successfully generated solution: {output_file}")
                self.generated_problems += 1
                problem_details["status"] = "Generated successfully"
                
                return True
            else:
                error_msg = f"Failed to generate solution: {api_error if api_error else 'Unknown error'}"
                print(error_msg)
                self.error_problems += 1
                problem_details["status"] = "Generation failed"
                problem_details["details"] = api_error if api_error else "Gemini API call failed"
                return False
                
        except Exception as e:
            import traceback
            error_msg = f"Error processing {problem_file}: {str(e)}\nException stack: {traceback.format_exc()}"
            print(error_msg)
            self.error_problems += 1
            problem_details["status"] = f"Processing exception: {str(e)}"
            problem_details["details"] = error_msg
            return False
        finally:
            self.problem_details.append(problem_details)
            
            # Print current progress
            print(f"\nCurrent progress: {self.total_problems} problems")
            print(f"Generated: {self.generated_problems} problems, Errors: {self.error_problems} problems, Skipped: {self.skipped_problems} problems")
    
    def process_all_problems(self, problems_dir: str, output_dir: str) -> None:
        """
        Process all LeetCode problems in directory (only generate solutions)
        
        Args:
            problems_dir (str): Directory containing problem JSON files
            output_dir (str): Directory to save solutions
        """
        start_time = time.time()
        
        # Get problem file list and shuffle randomly
        problem_files = [f for f in os.listdir(problems_dir) if f.endswith('.json')]
        random.shuffle(problem_files)  # Shuffle problem order randomly
        total_files = len(problem_files)
        print(f"Found {total_files} problem files, will process in random order")
        
        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)
        
        for i, filename in enumerate(problem_files):
            problem_file = os.path.join(problems_dir, filename)
            print(f"\n{'='*50}")
            print(f"Processing: {filename} ({i+1}/{total_files})")
            self.process_problem(problem_file, output_dir)
            # Add processing interval to avoid request rate limiting
            time.sleep(5)
        
        # Print statistics after processing completes
        self.print_statistics(start_time)
    
    def print_statistics(self, start_time: float) -> None:
        """Print statistics of processing results"""
        elapsed_time = time.time() - start_time
        minutes, seconds = divmod(elapsed_time, 60)
        
        print("\n" + "="*50)
        print(f"LeetCode {self.language_name} Solution Generation Statistics Report")
        print("="*50)
        print(f"Total problems: {self.total_problems}")
        print(f"Generated problems: {self.generated_problems}")
        print(f"Skipped problems: {self.skipped_problems}")
        print(f"Error problems: {self.error_problems}")
        
        print("\nProblem status distribution:")
        for problem in self.problem_details:
            print(f"- {problem['problem']}: {problem['status']}")
        
        print("\nError details:")
        for problem in self.problem_details:
            if problem['status'] not in ["Generated successfully", "Already exists, skipped"]:
                print(f"\nProblem: {problem['problem']}")
                print(f"Status: {problem['status']}")
                if problem['details'] != "N/A":
                    print(f"Details: {problem['details']}")
        
        print(f"\nTotal time: {int(minutes)} minutes {int(seconds)} seconds")
        print("="*50)

def main():
    # Check if API key is set
    if GEMINI_API_KEY == "Paste your API key here":
        print("Please set your API key in the GEMINI_API_KEY variable at the beginning of the file")
        return
    
    # Initialize generator (using C language)
    generator = LeetCodeSolutionGenerator(
        api_key=GEMINI_API_KEY,
        language="c"  # Changed to C language
    )
    
    # Process all problems
    problems_dir = "leetcode_data"
    output_dir = "c_solutions"  # Changed output directory
    
    try:
        generator.process_all_problems(problems_dir, output_dir)
    except KeyboardInterrupt:
        print("\nProgram interrupted, printing current statistics...")
        generator.print_statistics(time.time())
    except Exception as e:
        print(f"Program error: {str(e)}")
        generator.print_statistics(time.time())

if __name__ == "__main__":
    main()

Found 3011 problem files, will process in random order

Processing: LCP 37-zui-xiao-ju-xing-mian-ji.json (1/3011)
Solution already exists, skipping problem: LCP 37-zui-xiao-ju-xing-mian-ji

Current progress: 1 problems
Generated: 0 problems, Errors: 0 problems, Skipped: 1 problems

Processing: 1835-find-xor-sum-of-all-pairs-bitwise-and.json (2/3011)
Solution already exists, skipping problem: 1835-find-xor-sum-of-all-pairs-bitwise-and

Current progress: 2 problems
Generated: 0 problems, Errors: 0 problems, Skipped: 2 problems

Processing: 2217-find-palindrome-with-fixed-length.json (3/3011)
Solution already exists, skipping problem: 2217-find-palindrome-with-fixed-length

Current progress: 3 problems
Generated: 0 problems, Errors: 0 problems, Skipped: 3 problems

Processing: 590-n-ary-tree-postorder-traversal.json (4/3011)
Solution already exists, skipping problem: 590-n-ary-tree-postorder-traversal

Current progress: 4 problems
Generated: 0 problems, Errors: 0 problems, Skipped: 4 prob