In [1]:
import os
import json
import tempfile
import shutil
import subprocess
import ast
from pathlib import Path
from typing import List, Dict, Tuple
import google.generativeai as genai


def generate_coverage_report(
    code_file_path: str,
    test_cases_list: List[str],
    output_path: str
) -> Dict:
    """
    Generate coverage report for code file with given test cases.
    
    Args:
        code_file_path: Path to Python file to test
        test_cases_list: List of test assertions like ["assert candidate(...) == ...", ...]
        output_path: Directory to save coverage reports
        
    Returns:
        Dictionary with:
        - line_coverage: float (percentage)
        - branch_coverage: float or None (percentage)
        - tests_passed: int
        - tests_failed: int
        - uncovered_lines: List[int] - line numbers not covered
        - uncovered_branches: List[Tuple[int, int]] - (line_number, branch_id) not covered
        - output_dir: str - where reports were saved
    """
    print(f"\n{'='*80}")
    print(f"Generating Coverage Report")
    print(f"{'='*80}\n")
    print(f"Code file: {code_file_path}")
    print(f"Test cases: {len(test_cases_list)}")
    
    # Create output directory
    os.makedirs(output_path, exist_ok=True)
    
    # Create temporary directory
    temp_dir = tempfile.mkdtemp(prefix='coverage_')
    
    try:
        # Copy code file
        code_filename = Path(code_file_path).name
        code_module = Path(code_file_path).stem
        temp_code = os.path.join(temp_dir, code_filename)
        shutil.copy2(code_file_path, temp_code)
        
        # Get function name
        with open(code_file_path, 'r') as f:
            content = f.read()
        
        func_name = 'candidate'
        tree = ast.parse(content)
        for node in ast.walk(tree):
            if isinstance(node, ast.FunctionDef):
                if not node.name.startswith('_'):
                    func_name = node.name
                    break
        
        # Create test file
        test_file = os.path.join(temp_dir, 'test_solution.py')
        test_content = f"""import sys
from {code_module} import {func_name}

candidate = {func_name}
skjkasdkd = {func_name}

"""
        
        for i, test_case in enumerate(test_cases_list):
            test_code = test_case.strip()
            if not test_code.startswith('assert'):
                test_code = f"assert {test_code}"
            
            test_content += f"""
def test_case_{i}():
    {test_code}
"""
        
        with open(test_file, 'w') as f:
            f.write(test_content)
        
        # Run pytest with coverage
        coverage_json = os.path.join(temp_dir, 'coverage.json')
        html_dir = os.path.join(temp_dir, 'htmlcov')
        
        cmd = [
            'pytest', test_file,
            f'--cov={code_module}',
            '--cov-branch',
            '--cov-report=term-missing',
            f'--cov-report=json:{coverage_json}',
            f'--cov-report=html:{html_dir}',
            '-v', '--tb=short', '-p', 'no:warnings'
        ]
        
        result = subprocess.run(
            cmd, capture_output=True, text=True, cwd=temp_dir, timeout=30
        )
        
        # Parse results
        line_cov = 0.0
        branch_cov = None
        uncovered_lines = []
        uncovered_branches = []
        
        if os.path.exists(coverage_json):
            with open(coverage_json, 'r') as f:
                cov_data = json.load(f)
            
            for file_path, data in cov_data.get('files', {}).items():
                if code_filename in file_path:
                    summary = data.get('summary', {})
                    line_cov = summary.get('percent_covered', 0.0)
                    
                    if 'num_branches' in summary and summary['num_branches'] > 0:
                        covered = summary.get('covered_branches', 0)
                        total = summary.get('num_branches', 0)
                        if total > 0:
                            branch_cov = (covered / total) * 100
                    
                    uncovered_lines = data.get('missing_lines', [])
                    
                    # Parse missing branches as list of tuples
                    branches = data.get('missing_branches', [])
                    for branch in branches:
                        if len(branch) >= 2:
                            uncovered_branches.append((branch[0], branch[1]))
                    break
        
        # Parse test results
        import re
        passed_match = re.search(r'(\d+) passed', result.stdout)
        failed_match = re.search(r'(\d+) failed', result.stdout)
        
        tests_passed = int(passed_match.group(1)) if passed_match else 0
        tests_failed = int(failed_match.group(1)) if failed_match else 0
        
        if tests_passed == 0 and tests_failed == 0:
            tests_passed = len(test_cases_list) if 'passed' in result.stdout.lower() else 0
            tests_failed = len(test_cases_list) - tests_passed
        
        # Save outputs
        with open(os.path.join(output_path, 'coverage_report.txt'), 'w') as f:
            f.write(result.stdout + "\n\n" + result.stderr)
        
        if os.path.exists(coverage_json):
            shutil.copy2(coverage_json, os.path.join(output_path, 'coverage.json'))
        
        if os.path.exists(html_dir):
            html_out = os.path.join(output_path, 'htmlcov')
            if os.path.exists(html_out):
                shutil.rmtree(html_out)
            shutil.copytree(html_dir, html_out)
        
        # Print results
        print(f"\n{'='*80}")
        print("COVERAGE RESULTS")
        print(f"{'='*80}")
        print(f"Tests: {tests_passed}/{len(test_cases_list)} passed")
        print(f"Line Coverage: {line_cov:.1f}%")
        if branch_cov is not None:
            print(f"Branch Coverage: {branch_cov:.1f}%")
        print(f"\nUncovered Lines: {uncovered_lines}")
        print(f"Uncovered Branches: {uncovered_branches}")
        print(f"\nReports saved to: {output_path}/")
        print(f"{'='*80}\n")
        
        return {
            'line_coverage': line_cov,
            'branch_coverage': branch_cov,
            'tests_passed': tests_passed,
            'tests_failed': tests_failed,
            'total_tests': len(test_cases_list),
            'uncovered_lines': uncovered_lines,
            'uncovered_branches': uncovered_branches,
            'output_dir': output_path
        }
        
    finally:
        shutil.rmtree(temp_dir, ignore_errors=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def generate_enhanced_tests_prompt(
    code: str,
    current_testcases: List[str],
    uncovered_lines: List[int],
    uncovered_branches: List[Tuple[int, int]]
) -> str:
    """
    Generate prompt for LLM to create tests targeting uncovered lines/branches.
    
    Args:
        code: Source code as string
        current_testcases: List of existing test assertions
        uncovered_lines: List of line numbers not covered
        uncovered_branches: List of (line_number, branch_id) tuples not covered
        
    Returns:
        Prompt string for LLM
    """
    # Add line numbers to code
    code_lines = code.split('\n')
    numbered_code = '\n'.join([f"{i+1:3d}: {line}" for i, line in enumerate(code_lines)])
    
    # Format uncovered branches
    branch_info = ""
    if uncovered_branches:
        branch_dict = {}
        for line, branch in uncovered_branches:
            if line not in branch_dict:
                branch_dict[line] = []
            branch_dict[line].append(branch)
        
        branch_info = "\n**Uncovered Branches:**\n"
        for line, branches in sorted(branch_dict.items()):
            branch_info += f"  - Line {line}: branches {branches}\n"
    
    prompt = f"""You are a test generation expert. Generate NEW test cases to increase code coverage.

**Code (with line numbers):**
```python
{numbered_code}
```

**Uncovered Lines:** {uncovered_lines}
{branch_info}

**Existing Tests (DO NOT duplicate):**
```python
{chr(10).join(current_testcases[:10])}
{'...' if len(current_testcases) > 10 else ''}
```

**Task:**
Generate 3-5 NEW test cases that specifically target the uncovered lines {uncovered_lines} and branches listed above.

**Requirements:**
1. Each test must target SPECIFIC uncovered lines/branches
2. Test edge cases: empty inputs, None, boundary values, negative numbers
3. Test both True and False paths of conditionals
4. Format: "assert candidate(...) == expected_value"
5. DO NOT duplicate existing tests

**Output Format (JSON):**
Return ONLY valid JSON in this exact format:
{{
    "testcases": [
        "assert candidate(...) == ...",
        "assert candidate(...) == ...",
        "assert candidate(...) == ..."
    ]
}}

Generate the JSON now:"""
    
    return prompt


In [None]:
combined_tests=['assert candidate([2, 3, 4, 1, 2, 4]) == 1',
 'assert candidate([-1, -2, -3]) == -6',
 'assert candidate([-1, -2, -3, 2, -10]) == -14',
 'assert candidate([-9999999999999999]) == -9999999999999999',
 'assert candidate([0, 10, 20, 1000000]) == 0',
 'assert candidate([-1, -2, -3, 10, -5]) == -6',
 'assert candidate([100, -1, -2, -3, 10, -5]) == -6',
 'assert candidate([10, 11, 13, 8, 3, 4]) == 3',
 'assert candidate([100, -33, 32, -1, 0, -2]) == -33',
 'assert candidate([-10]) == -10',
 'assert candidate([7]) == 7',
 'assert candidate([1, -1]) == -1',
 'assert candidate([]) == 0',
 'assert candidate([0]) == 0',
 'assert candidate([1]) == 1',
 'assert candidate([-1]) == -1',
 'assert candidate([2, -1, 2]) == -1']

code_file = "codes_enhanced/gemma_Self_Planning/HumanEval_114.py"
test_cases = combined_tests
output_dir = "Task2/test_coverage_output_better_testcases_114"
    
    # Step 1: Generate coverage report
result = generate_coverage_report(code_file, test_cases, output_dir)

In [4]:
%cd ..

/Users/suyashmaniyar/Desktop/UMass/Courses/SoftwareEngineering/Assignment_02/HumanEval_Analysis_Final


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [7]:
def extract_testcases_from_llm_response(
    llm_response: str,
    existing_testcases: List[str]
) -> List[str]:
    """
    Extract test cases from LLM response and combine with existing tests.
    
    Args:
        llm_response: Raw response from LLM
        existing_testcases: Current list of test cases
        
    Returns:
        Combined list of test cases (existing + new unique ones)
    """
    new_testcases = []
    
    try:
        # Try to parse as JSON
        # Remove markdown code blocks if present
        response = llm_response.strip()
        if '```json' in response:
            response = response.split('```json')[1].split('```')[0].strip()
        elif '```' in response:
            response = response.split('```')[1].split('```')[0].strip()
        
        data = json.loads(response)
        new_testcases = data.get('testcases', [])
        
    except json.JSONDecodeError:
        # Fallback: extract assert statements
        lines = llm_response.split('\n')
        for line in lines:
            line = line.strip()
            if line.startswith('assert candidate('):
                # Clean up
                line = line.replace('```python', '').replace('```', '').strip()
                if line.endswith(','):
                    line = line[:-1]
                if line.endswith('"') or line.endswith("'"):
                    # Remove quotes if wrapped
                    if line.startswith('"') or line.startswith("'"):
                        line = line[1:-1]
                new_testcases.append(line)
    
    # Deduplicate
    def normalize(test):
        return "".join(test.split()).lower()
    
    existing_normalized = {normalize(t) for t in existing_testcases}
    unique_new = []
    
    for test in new_testcases:
        if normalize(test) not in existing_normalized:
            unique_new.append(test)
            existing_normalized.add(normalize(test))
    
    # Combine
    combined = existing_testcases + unique_new
    
    print(f"\nExtracted {len(new_testcases)} test cases from LLM")
    print(f"Unique new tests: {len(unique_new)}")
    print(f"Total tests: {len(combined)}")
    
    return combined


# Example usage
if __name__ == "__main__":
    # Example
    code_file = "codes_enhanced/gemma_Self_Planning/HumanEval_114.py"
    test_cases =  [
        "assert candidate([2, 3, 4, 1, 2, 4]) == 1",
        "assert candidate([-1, -2, -3]) == -6",
        "assert candidate([-1, -2, -3, 2, -10]) == -14",
        "assert candidate([-9999999999999999]) == -9999999999999999",
        "assert candidate([0, 10, 20, 1000000]) == 0",
        "assert candidate([-1, -2, -3, 10, -5]) == -6",
        "assert candidate([100, -1, -2, -3, 10, -5]) == -6",
        "assert candidate([10, 11, 13, 8, 3, 4]) == 3",
        "assert candidate([100, -33, 32, -1, 0, -2]) == -33",
        "assert candidate([-10]) == -10",
        "assert candidate([7]) == 7",
        "assert candidate([1, -1]) == -1"
    ]
    output_dir = "Task2/test_coverage_output"
    
    # Step 1: Generate coverage report
    result = generate_coverage_report(code_file, test_cases, output_dir)
    
    # Step 2: Read code
    with open(code_file, 'r') as f:
        code = f.read()
    
    # Step 3: Generate prompt
    prompt = generate_enhanced_tests_prompt(
        code,
        test_cases,
        result['uncovered_lines'],
        result['uncovered_branches']
    )
    
    print("\n" + "="*80)
    print("GENERATED PROMPT:")
    print("="*80)
    print(prompt)
    
 



Generating Coverage Report

Code file: codes_enhanced/gemma_Self_Planning/HumanEval_114.py
Test cases: 12

COVERAGE RESULTS
Tests: 12/12 passed
Line Coverage: 85.7%
Branch Coverage: 75.0%

Uncovered Lines: [15]
Uncovered Branches: [(14, 15)]

Reports saved to: Task2/test_coverage_output/


GENERATED PROMPT:
You are a test generation expert. Generate NEW test cases to increase code coverage.

**Code (with line numbers):**
```python
  1: # The problem asks to find the minimum sum of any non-empty sub-array within a given array of integers.
  2: # We can use Kadane's algorithm to solve this problem efficiently. Kadane's algorithm is typically used for finding the maximum sub-array sum,
  3: # but we can modify it to find the minimum sub-array sum by tracking the minimum sum so far instead of the maximum sum.
  4: # The core idea is to iterate through the array and keep track of the current minimum sum ending at each position and the overall minimum sum encountered so far.
  5: 
  6: def 

In [None]:
genai.configure(api_key=YOUR_API_KEY)
MODEL_NAME = "gemma-3-27b-it"

model = genai.GenerativeModel(
    MODEL_NAME,
    generation_config={
        "temperature": 2,
        "max_output_tokens": 2048,
    },
)
response = model.generate_content(prompt)
llm_response = response.text


combined_tests = extract_testcases_from_llm_response(llm_response, test_cases)
print(f"\nCombined test cases: {len(combined_tests)}")


Extracted 5 test cases from LLM
Unique new tests: 5
Total tests: 17

Combined test cases: 17


In [13]:
print(prompt)

You are a test generation expert. Generate NEW test cases to increase code coverage.

**Code (with line numbers):**
```python
  1: # The problem asks to find the minimum sum of any non-empty sub-array within a given array of integers.
  2: # We can use Kadane's algorithm to solve this problem efficiently. Kadane's algorithm is typically used for finding the maximum sub-array sum,
  3: # but we can modify it to find the minimum sub-array sum by tracking the minimum sum so far instead of the maximum sum.
  4: # The core idea is to iterate through the array and keep track of the current minimum sum ending at each position and the overall minimum sum encountered so far.
  5: 
  6: def minSubArraySum(nums):
  7:     """
  8:     Given an array of integers nums, find the minimum sum of any non-empty sub-array
  9:     of nums.
 10:     Example
 11:     minSubArraySum([2, 3, 4, 1, 2, 4]) == 1
 12:     minSubArraySum([-1, -2, -3]) == -6
 13:     """
 14:     if not nums:
 15:         return 0 

In [12]:
print(llm_response)

```json
{
    "testcases": [
        "assert candidate([]) == 0",
        "assert candidate([0]) == 0",
        "assert candidate([1]) == 1",
        "assert candidate([-1]) == -1",
        "assert candidate([2, -1, 2]) == -1"
    ]
}
```


In [10]:
combined_tests

['assert candidate([2, 3, 4, 1, 2, 4]) == 1',
 'assert candidate([-1, -2, -3]) == -6',
 'assert candidate([-1, -2, -3, 2, -10]) == -14',
 'assert candidate([-9999999999999999]) == -9999999999999999',
 'assert candidate([0, 10, 20, 1000000]) == 0',
 'assert candidate([-1, -2, -3, 10, -5]) == -6',
 'assert candidate([100, -1, -2, -3, 10, -5]) == -6',
 'assert candidate([10, 11, 13, 8, 3, 4]) == 3',
 'assert candidate([100, -33, 32, -1, 0, -2]) == -33',
 'assert candidate([-10]) == -10',
 'assert candidate([7]) == 7',
 'assert candidate([1, -1]) == -1',
 'assert candidate([]) == 0',
 'assert candidate([0]) == 0',
 'assert candidate([1]) == 1',
 'assert candidate([-1]) == -1',
 'assert candidate([2, -1, 2]) == -1']

In [None]:
combined_tests=['assert candidate([2, 3, 4, 1, 2, 4]) == 1',
 'assert candidate([-1, -2, -3]) == -6',
 'assert candidate([-1, -2, -3, 2, -10]) == -14',
 'assert candidate([-9999999999999999]) == -9999999999999999',
 'assert candidate([0, 10, 20, 1000000]) == 0',
 'assert candidate([-1, -2, -3, 10, -5]) == -6',
 'assert candidate([100, -1, -2, -3, 10, -5]) == -6',
 'assert candidate([10, 11, 13, 8, 3, 4]) == 3',
 'assert candidate([100, -33, 32, -1, 0, -2]) == -33',
 'assert candidate([-10]) == -10',
 'assert candidate([7]) == 7',
 'assert candidate([1, -1]) == -1',
 'assert candidate([]) == 0',
 'assert candidate([0]) == 0',
 'assert candidate([1]) == 1',
 'assert candidate([-1]) == -1',
 'assert candidate([2, -1, 2]) == -1']

code_file = "codes_enhanced/gemma_Self_Planning/HumanEval_114.py"
test_cases = combined_tests
output_dir = "Task2/test_coverage_output_better_testcases_114"
    
    # Step 1: Generate coverage report
result = generate_coverage_report(code_file, test_cases, output_dir)


Generating Coverage Report

Code file: codes_enhanced/gemma_Self_Planning/HumanEval_114.py
Test cases: 17

COVERAGE RESULTS
Tests: 17/17 passed
Line Coverage: 100.0%
Branch Coverage: 100.0%

Uncovered Lines: []
Uncovered Branches: []

Reports saved to: Task2/test_coverage_output_better_testcases_114/



In [None]:
code_file = "codes_enhanced/gemma_Self_Planning/HumanEval_114.py"
test_cases = combined_tests
output_dir = "Task2/test_coverage_output_better_testcases_114"
    
    # Step 1: Generate coverage report
result = generate_coverage_report(code_file, test_cases, output_dir)

In [26]:
code_file = "/Users/suyashmaniyar/Desktop/UMass/Courses/SoftwareEngineering/Assignment_02/HumanEval_Analysis_Final/Task2/APPS_codes/APPS_4467.py"
test_cases =  [
      "assert candidate('3\\n2 0\\n3 1\\n1 3\\n4 2\\n0 4\\n5 5') == '2'",
      "assert candidate('3\\n0 0\\n1 1\\n5 2\\n2 3\\n3 4\\n4 5') == '2'",
      "assert candidate('2\\n2 2\\n3 3\\n0 0\\n1 1') == '0'",
      "assert candidate('5\\n0 0\\n7 3\\n2 2\\n4 8\\n1 6\\n8 5\\n6 9\\n5 4\\n9 1\\n3 7') == '5'",
      "assert candidate('5\\n0 0\\n1 1\\n5 5\\n6 6\\n7 7\\n2 2\\n3 3\\n4 4\\n8 8\\n9 9') == '4'"
    ]
output_dir = "/Users/suyashmaniyar/Desktop/UMass/Courses/SoftwareEngineering/Assignment_02/HumanEval_Analysis_Final/Task2/APPS_reports/apps_4467"
    
    # Step 1: Generate coverage report
result = generate_coverage_report(code_file, test_cases, output_dir)


Generating Coverage Report

Code file: /Users/suyashmaniyar/Desktop/UMass/Courses/SoftwareEngineering/Assignment_02/HumanEval_Analysis_Final/Task2/APPS_codes/APPS_4467.py
Test cases: 5

COVERAGE RESULTS
Tests: 5/5 passed
Line Coverage: 85.4%
Branch Coverage: 77.8%

Uncovered Lines: [21, 37, 44]
Uncovered Branches: [(20, 21), (32, 28), (36, 37), (43, 44)]

Reports saved to: /Users/suyashmaniyar/Desktop/UMass/Courses/SoftwareEngineering/Assignment_02/HumanEval_Analysis_Final/Task2/APPS_reports/apps_4467/

