Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions levelup/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import json
import re
from pathlib import Path
from typing import Optional

import google.generativeai as genai
import pdfplumber
import typer

from levelup import config
from levelup.prompts import get_resume_analysis_prompt

app = typer.Typer(name="levelup", help="AI-powered CV analysis from the command line.")

LANGUAGES = [
"Czech",
"Danish",
"Dutch",
"English",
"Finnish",
"French",
"German",
"Greek",
"Italian",
"Kurdish (Kurmanji)",
"Polish",
"Portuguese",
"Russian",
"Spanish",
"Swedish",
"Turkish",
"Ukrainian",
]


def _extract_text(pdf_path: Path) -> str:
with pdfplumber.open(pdf_path) as pdf:
parts = [page.extract_text() or "" for page in pdf.pages]
return "\n".join(parts).strip()


def _extract_json(raw: str) -> dict | None:
fence = re.search(r"```(?:json)?\s*({[\s\S]*?})\s*```", raw, re.IGNORECASE)
if fence:
block = fence.group(1)
else:
match = re.search(r"\{[\s\S]*\}", raw)
block = match.group(0) if match else None
Comment on lines +47 to +48
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Greedy regex may over-match if trailing text contains braces.

The pattern \{[\s\S]*\} matches from the first { to the last } in the string. If the LLM response includes trailing text with braces (e.g., {"score": 90} Note: see {docs}), this captures invalid content and causes parsing to fail or return corrupted data.

Consider using a balanced-brace approach or iteratively trying to parse progressively smaller substrings:

Proposed fix: iterative JSON boundary detection
 def _extract_json(raw: str) -> dict | None:
     fence = re.search(r"```(?:json)?\s*({[\s\S]*?})\s*```", raw, re.IGNORECASE)
     if fence:
         block = fence.group(1)
     else:
-        match = re.search(r"\{[\s\S]*\}", raw)
-        block = match.group(0) if match else None
+        # Find first '{' and try parsing from there to each subsequent '}'
+        start = raw.find("{")
+        if start == -1:
+            return None
+        block = None
+        for i, ch in enumerate(raw[start:], start):
+            if ch == "}":
+                candidate = raw[start : i + 1]
+                try:
+                    result = json.loads(candidate)
+                    if isinstance(result, dict):
+                        return result
+                except json.JSONDecodeError:
+                    continue
+        return None
     if not block:
         return None
     try:
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@levelup/cli.py` around lines 47 - 48, The current greedy regex that sets
block = match.group(0) if match else None can over-match; replace it with an
iterative JSON boundary detection: find the first '{' in raw (return None if
none), then iterate from that start index to each subsequent '}' building
candidate substrings, try json.loads(candidate) for each, and when a candidate
successfully parses to a dict return that result; if none parse return None.
Update the logic that assigns block/result to use this approach (references:
variable raw, block and the parsing branch that currently uses re.search) so
trailing braces in raw no longer corrupt the extracted JSON.

if not block:
return None
try:
result = json.loads(block)
return result if isinstance(result, dict) else None
except json.JSONDecodeError:
return None


@app.command()
def analyze(
resume: Path = typer.Argument(..., help="Path to the PDF resume file."),
language: str = typer.Option(
"English", "--language", "-l", help="Report language."
),
role: Optional[str] = typer.Option(
None, "--role", "-r", help="Target role for the analysis."
),
output: Optional[Path] = typer.Option(
None, "--output", "-o", help="Save JSON output to a file."
),
) -> None:
if not resume.exists():
typer.echo(f"Error: file not found: {resume}", err=True)
raise typer.Exit(1)

if language not in LANGUAGES:
typer.echo(
f"Error: unsupported language '{language}'.\nAvailable: {', '.join(LANGUAGES)}",
err=True,
)
raise typer.Exit(1)

if not config.GEMINI_API_KEY:
typer.echo("Error: GEMINI_API_KEY is not set.", err=True)
raise typer.Exit(1)

typer.echo("Extracting text from PDF...")
try:
text = _extract_text(resume)
except Exception as e:
typer.echo(f"Error reading PDF: {e}", err=True)
raise typer.Exit(1)

if not text:
typer.echo("Error: could not extract text from the PDF.", err=True)
raise typer.Exit(1)

typer.echo("Analyzing resume...")
genai.configure(api_key=config.GEMINI_API_KEY)
model = genai.GenerativeModel("gemini-2.0-flash-lite")
prompt = get_resume_analysis_prompt(text, language, role)

try:
response = model.generate_content(prompt)
raw = (response.text or "").strip()
except Exception as e:
typer.echo(f"Error calling LLM: {e}", err=True)
raise typer.Exit(1)

result = _extract_json(raw)
if not result:
typer.echo("Error: could not parse the analysis response.", err=True)
raise typer.Exit(1)

output_json = json.dumps(result, indent=2, ensure_ascii=False)

if output:
output.write_text(output_json, encoding="utf-8")
typer.echo(f"Results saved to {output}")
else:
typer.echo(output_json)


def main() -> None:
app()
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"alembic>=1.17.1",
"typer>=0.12.0",
"celery>=5.5.3",
"dotenv>=0.9.9",
"fastapi>=0.120.4",
Expand All @@ -33,6 +34,9 @@ dependencies = [
"pdfplumber==0.11.8"
]

[project.scripts]
levelup = "levelup.cli:main"

[dependency-groups]
dev = [
# docs
Expand Down
Loading
Loading