diff --git a/.github/ISSUE_TEMPLATE/plot-request.yml b/.github/ISSUE_TEMPLATE/plot-request.yml index fda2b3a463..fe70c65d8c 100644 --- a/.github/ISSUE_TEMPLATE/plot-request.yml +++ b/.github/ISSUE_TEMPLATE/plot-request.yml @@ -1,6 +1,6 @@ name: Plot Request description: Propose a new plot type for pyplots -title: "Plot: " +title: "" labels: ["plot-request"] body: - type: markdown @@ -11,9 +11,9 @@ body: Describe the plot you'd like to see. Our AI will: 1. Check for similar existing plots 2. Assign a descriptive spec ID (e.g., `scatter-regression-linear`) - 3. Generate implementations once approved + 3. Create the specification for review - **No need to specify an ID** - just describe what you want! + **After submission:** A maintainer will review and add the `approved` label to start code generation. - type: textarea id: description diff --git a/.github/ISSUE_TEMPLATE/plot-update.yml b/.github/ISSUE_TEMPLATE/plot-update.yml index d063391c63..e04524702b 100644 --- a/.github/ISSUE_TEMPLATE/plot-update.yml +++ b/.github/ISSUE_TEMPLATE/plot-update.yml @@ -1,6 +1,6 @@ name: Plot Update description: Request updates or regeneration of an existing plot -title: "[spec-id] [update] " +title: "[SPEC-ID] [update] " labels: ["plot-request", "update"] body: - type: markdown @@ -8,21 +8,30 @@ body: value: | ## Plot Update Request - Use this form to update or regenerate an existing plot. + Use this to update or regenerate an existing plot. - **Title Format:** - - `[scatter-basic] [update] Regenerate` → Regenerate all 9 libraries - - `[scatter-basic] [update:seaborn] Fix colors` → Regenerate only seaborn + **Important:** Replace `SPEC-ID` in the title with the actual spec ID! - Replace `[spec-id]` in the title with the actual spec ID (e.g., `scatter-basic`). + **Title Examples:** + - `[scatter-basic] [update] Regenerate all` - All 9 libraries + - `[scatter-basic] [update:seaborn] Fix colors` - Only seaborn - After submitting, a maintainer will add the `approved` label to trigger regeneration. + **After submission:** A maintainer will review and add the `approved` label to start regeneration. + + - type: input + id: spec_id + attributes: + label: Spec ID + description: "The spec ID to update (e.g., scatter-basic). Must match the title!" + placeholder: "scatter-basic" + validations: + required: true - type: dropdown id: target_library attributes: - label: Target Library (optional) - description: Update only a specific library, or leave empty for all. If you select a specific library, change [update] to [update:library] in the title. + label: Target Library + description: "Update specific library or all? If specific, change [update] to [update:library] in title." options: - All libraries - matplotlib @@ -35,7 +44,7 @@ body: - highcharts - letsplot validations: - required: false + required: true - type: dropdown id: update_type @@ -54,7 +63,7 @@ body: id: changes attributes: label: Requested Changes - description: Describe the changes to the spec or implementation (Claude will update the spec first if needed) + description: Describe the changes to the spec or implementation placeholder: | - Add grid lines for better readability - Change default color scheme to colorblind-friendly diff --git a/.github/workflows/bot-ai-review.yml b/.github/workflows/bot-ai-review.yml index 0799b4c90c..07d82c2ffb 100644 --- a/.github/workflows/bot-ai-review.yml +++ b/.github/workflows/bot-ai-review.yml @@ -202,12 +202,12 @@ jobs: ### Your Task - 1. **Read the spec file**: `specs/${{ steps.pr.outputs.spec_id }}.md` + 1. **Read the spec file**: `plots/${{ steps.pr.outputs.spec_id }}/spec.md` - Note all quality criteria listed - Understand the expected visual output 2. **Read the ${{ steps.pr.outputs.library }} implementation**: - - `plots/${{ steps.pr.outputs.library }}/*/${{ steps.pr.outputs.spec_id }}/default.py` + - `plots/${{ steps.pr.outputs.spec_id }}/implementations/${{ steps.pr.outputs.library }}.py` 3. **Read library-specific rules**: - `prompts/library/${{ steps.pr.outputs.library }}.md` diff --git a/.github/workflows/bot-auto-merge.yml b/.github/workflows/bot-auto-merge.yml index c8eda5b1ab..82f919b5ea 100644 --- a/.github/workflows/bot-auto-merge.yml +++ b/.github/workflows/bot-auto-merge.yml @@ -282,7 +282,7 @@ jobs: - **Not Feasible:** $NOT_FEASIBLE ### Links - - **Spec:** \`specs/$SPEC_ID.md\` + - **Spec:** \`plots/$SPEC_ID/spec.md\` - **Parent Issue:** #$MAIN_ISSUE Closes #$MAIN_ISSUE @@ -394,8 +394,8 @@ jobs: SPEC_ID="${{ steps.extract.outputs.spec_id }}" LIBRARY="${{ steps.extract.outputs.library }}" - # Find implementation file for this library - IMPL_FILE=$(find plots/$LIBRARY -name "default.py" -path "*/$SPEC_ID/*" 2>/dev/null | head -1 || echo "") + # Get implementation file path + IMPL_FILE="plots/${SPEC_ID}/implementations/${LIBRARY}.py" echo "impl_file=$IMPL_FILE" >> $GITHUB_OUTPUT - name: Update sub-issue to merged @@ -621,8 +621,8 @@ jobs: exit 0 fi - # Find implementation file - IMPL_FILE=$(find plots/$LIBRARY -name "default.py" -path "*/${SPEC_ID}/*" 2>/dev/null | head -1 || echo "") + # Get implementation file path + IMPL_FILE="plots/${SPEC_ID}/implementations/${LIBRARY}.py" # Update labels gh issue edit "$SUB_ISSUE" \ @@ -736,7 +736,7 @@ jobs: - **Not Feasible:** $NOT_FEASIBLE ### Links - - **Spec:** \`specs/$SPEC_ID.md\` + - **Spec:** \`plots/$SPEC_ID/spec.md\` - **Parent Issue:** #$MAIN_ISSUE Closes #$MAIN_ISSUE diff --git a/.github/workflows/bot-auto-tag.yml b/.github/workflows/bot-auto-tag.yml index ce9b6a6eee..db1bb49501 100644 --- a/.github/workflows/bot-auto-tag.yml +++ b/.github/workflows/bot-auto-tag.yml @@ -97,7 +97,7 @@ jobs: SPEC_ID="${{ steps.extract_spec.outputs.spec_id }}" # Read spec file - SPEC_FILE="specs/${SPEC_ID}.md" + SPEC_FILE="plots/${SPEC_ID}/spec.md" if [ -f "$SPEC_FILE" ]; then SPEC_CONTENT=$(cat "$SPEC_FILE") echo "spec_exists=true" >> $GITHUB_OUTPUT @@ -106,8 +106,8 @@ jobs: SPEC_CONTENT="" fi - # Find plot files - PLOT_FILES=$(find plots -name "*.py" -path "*/${SPEC_ID}/*" 2>/dev/null | head -5) + # Find implementation files + PLOT_FILES=$(find "plots/${SPEC_ID}/implementations" -name "*.py" 2>/dev/null | head -5) echo "plot_files<> $GITHUB_OUTPUT echo "$PLOT_FILES" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT @@ -121,7 +121,7 @@ jobs: SPEC_ID="${{ steps.extract_spec.outputs.spec_id }}" # Read spec content - SPEC_CONTENT=$(cat "specs/${SPEC_ID}.md") + SPEC_CONTENT=$(cat "plots/${SPEC_ID}/spec.md") # Read first plot file as example PLOT_FILE=$(echo "${{ steps.read_files.outputs.plot_files }}" | head -1) diff --git a/.github/workflows/bot-validate-request.yml b/.github/workflows/bot-validate-request.yml.disabled similarity index 98% rename from .github/workflows/bot-validate-request.yml rename to .github/workflows/bot-validate-request.yml.disabled index d84f0d0d44..290687ea72 100644 --- a/.github/workflows/bot-validate-request.yml +++ b/.github/workflows/bot-validate-request.yml.disabled @@ -112,7 +112,7 @@ jobs: ### Process 1. Read `prompts/spec-id-generator.md` for full instructions - 2. List all existing specs in `specs/` directory + 2. List all existing specs in `plots/` directory (each subfolder has a spec.md) 3. Analyze the request and check for duplicates 4. Post comment using `gh issue comment ${{ github.event.issue.number }} --body "..."` 5. If NEW: Update title with `gh issue edit ${{ github.event.issue.number }} --title "..."` diff --git a/.github/workflows/ci-plottest.yml b/.github/workflows/ci-plottest.yml index 8cbf8c07df..2dbaf668d2 100644 --- a/.github/workflows/ci-plottest.yml +++ b/.github/workflows/ci-plottest.yml @@ -6,7 +6,7 @@ on: types: [ opened, synchronize, reopened ] paths: - 'plots/**/*.py' - - 'specs/**/*.md' + - 'plots/**/spec.md' jobs: test-plots: diff --git a/.github/workflows/gen-create-spec.yml b/.github/workflows/gen-create-spec.yml.disabled similarity index 89% rename from .github/workflows/gen-create-spec.yml rename to .github/workflows/gen-create-spec.yml.disabled index 33507b03a9..c0f84f14a2 100644 --- a/.github/workflows/gen-create-spec.yml +++ b/.github/workflows/gen-create-spec.yml.disabled @@ -143,14 +143,14 @@ jobs: SPEC_ID: ${{ steps.extract_spec.outputs.spec_id }} IS_UPDATE: ${{ steps.check.outputs.is_update }} run: | - if [ -f "specs/$SPEC_ID.md" ]; then + if [ -f "plots/$SPEC_ID/spec.md" ]; then echo "exists=true" >> $GITHUB_OUTPUT - echo "::notice::Spec file exists: specs/$SPEC_ID.md" + echo "::notice::Spec file exists: plots/$SPEC_ID/spec.md" else echo "exists=false" >> $GITHUB_OUTPUT # For updates, spec MUST exist if [[ "$IS_UPDATE" == "true" ]]; then - echo "::error::Update request but spec file does not exist: specs/$SPEC_ID.md" + echo "::error::Update request but spec file does not exist: plots/$SPEC_ID/spec.md" exit 1 fi echo "::notice::Spec file does not exist yet (will be created)" @@ -222,23 +222,27 @@ jobs: ### Instructions - 1. Read the template: `specs/.template.md` - 2. Read an example spec for reference: `specs/scatter-basic.md` - 3. Read the spec validator rules: `prompts/spec-validator.md` + 1. Read the template: `prompts/templates/spec.md` + 2. Read the metadata template: `prompts/templates/metadata.yaml` + 3. Read an example spec for reference: `plots/scatter-basic/spec.md` + 4. Read the spec validator rules: `prompts/spec-validator.md` - 4. Create the spec file at: `specs/${{ steps.extract_spec.outputs.spec_id }}.md` + 5. Create the plot directory and files: + - Create directory: `plots/${{ steps.extract_spec.outputs.spec_id }}/` + - Create spec file: `plots/${{ steps.extract_spec.outputs.spec_id }}/spec.md` + - Create metadata file: `plots/${{ steps.extract_spec.outputs.spec_id }}/metadata.yaml` + - Create implementations folder: `plots/${{ steps.extract_spec.outputs.spec_id }}/implementations/` - Follow the template structure exactly - - Keep it simple and focused (description, data, tags, use cases) - - Include example data if helpful (inline or dataset reference) + - Keep it simple and focused (Description, Applications, Data, Notes) - Include realistic use cases with domain context - 5. Do NOT commit or push - just create the file + 6. Do NOT commit or push - just create the files ### Quality Requirements - - All required sections: Title, Description, Data, Tags, Use Cases + - Spec must have sections: Title, Description, Applications, Data, Notes - Description should clearly explain what the plot visualizes - - Data section should list required columns with types - - Use cases should be realistic and varied + - Data section should list required variables with types + - Applications should be realistic and varied - Keep it concise - AI uses central prompts for implementation details - name: Commit and push spec file @@ -261,9 +265,9 @@ jobs: exit 0 fi - # For new specs: check if spec file was created - if [ -f "specs/$SPEC_ID.md" ]; then - git add "specs/$SPEC_ID.md" + # For new specs: check if spec directory was created + if [ -d "plots/$SPEC_ID" ] && [ -f "plots/$SPEC_ID/spec.md" ]; then + git add "plots/$SPEC_ID/" # Check if there are changes to commit if git diff --cached --quiet; then @@ -273,10 +277,10 @@ jobs: Created from issue #$ISSUE_NUMBER" git push -u origin "$BRANCH" - echo "::notice::Pushed spec file to $BRANCH" + echo "::notice::Pushed spec files to $BRANCH" fi else - echo "::error::Spec file was not created by Claude" + echo "::error::Spec directory was not created by Claude" exit 1 fi @@ -313,7 +317,7 @@ jobs: **Spec ID:** \`$SPEC_ID\` **Branch:** \`$BRANCH\` - **File:** \`specs/$SPEC_ID.md\` + **Directory:** \`plots/$SPEC_ID/\` ### Next Steps Triggering code generation for all 9 libraries... diff --git a/.github/workflows/gen-library-impl.yml b/.github/workflows/gen-library-impl.yml index 327311e7ad..fc637d46b8 100644 --- a/.github/workflows/gen-library-impl.yml +++ b/.github/workflows/gen-library-impl.yml @@ -158,29 +158,27 @@ jobs: 2. `prompts/default-style-guide.md` - Visual style requirements (colors, fonts, dimensions) 3. `prompts/quality-criteria.md` - Quality requirements 4. `prompts/library/${{ inputs.library }}.md` - Library-specific rules - 5. `specs/${{ inputs.spec_id }}.md` - The specification + 5. `plots/${{ inputs.spec_id }}/spec.md` - The specification ### Step 2: Check for previous attempts ${{ steps.previous_attempts.outputs.has_history == 'true' && 'IMPORTANT: There are previous failed attempts. Read /tmp/previous_attempts.md to understand what went wrong and avoid repeating the same mistakes.' || 'This is the first attempt for this library.' }} ### Step 3: Generate implementation Create the implementation file at the correct path: - - `plots/${{ inputs.library }}/{plot_type}/${{ inputs.spec_id }}/default.py` - - Determine {plot_type} from the spec (e.g., scatter, bar, line, heatmap). + - `plots/${{ inputs.spec_id }}/implementations/${{ inputs.library }}.py` ### Step 4: Test the implementation Run the implementation to verify it works: ```bash source .venv/bin/activate - MPLBACKEND=Agg python plots/${{ inputs.library }}/{plot_type}/${{ inputs.spec_id }}/default.py + MPLBACKEND=Agg python plots/${{ inputs.spec_id }}/implementations/${{ inputs.library }}.py ``` ### Step 5: Format the code IMPORTANT: Run ruff to format and lint the code before committing: ```bash - uv run ruff format plots/${{ inputs.library }}/{plot_type}/${{ inputs.spec_id }}/default.py - uv run ruff check --fix plots/${{ inputs.library }}/{plot_type}/${{ inputs.spec_id }}/default.py + uv run ruff format plots/${{ inputs.spec_id }}/implementations/${{ inputs.library }}.py + uv run ruff check --fix plots/${{ inputs.spec_id }}/implementations/${{ inputs.library }}.py ``` ### Step 6: Create PR (only if implementation is successful) @@ -200,7 +198,7 @@ jobs: **Attempt:** ${{ inputs.attempt }}/3 ## Implementation - - \`plots/${{ inputs.library }}/{plot_type}/${{ inputs.spec_id }}/default.py\` + - \`plots/${{ inputs.spec_id }}/implementations/${{ inputs.library }}.py\` Closes #${{ inputs.sub_issue_number }}" ``` @@ -236,8 +234,8 @@ jobs: ATTEMPT="${{ inputs.attempt }}" PR_NUMBER="${{ steps.pr.outputs.pr_number }}" - # Find the generated plot file - PLOT_FILE=$(find plots/$LIBRARY -name "default.py" -path "*/${SPEC_ID}/*" 2>/dev/null | head -1 || echo "") + # Get the implementation file path + PLOT_FILE="plots/${SPEC_ID}/implementations/${LIBRARY}.py" # Read the code if it exists if [ -f "$PLOT_FILE" ]; then diff --git a/.github/workflows/gen-new-plot.yml b/.github/workflows/gen-new-plot.yml index bcd164ad91..8dfe4c9ca1 100644 --- a/.github/workflows/gen-new-plot.yml +++ b/.github/workflows/gen-new-plot.yml @@ -65,7 +65,7 @@ jobs: - name: Verify spec file exists run: | - SPEC_FILE="specs/${{ inputs.spec_id }}.md" + SPEC_FILE="plots/${{ inputs.spec_id }}/spec.md" if [ ! -f "$SPEC_FILE" ]; then echo "::error::Spec file not found: $SPEC_FILE" echo "::error::The feature branch should contain the spec file" @@ -137,7 +137,7 @@ jobs: SUB_BODY="## [$SPEC_ID] $LIBRARY Implementation **Parent Issue:** #$PARENT_NUM - **Spec:** \`specs/$SPEC_ID.md\` + **Spec:** \`plots/$SPEC_ID/spec.md\` **Library:** $LIBRARY **Feature Branch:** \`$FEATURE_BRANCH\` diff --git a/.github/workflows/gen-update-plot.yml b/.github/workflows/gen-update-plot.yml index d87af2fa78..9f721cb1f7 100644 --- a/.github/workflows/gen-update-plot.yml +++ b/.github/workflows/gen-update-plot.yml @@ -238,8 +238,8 @@ jobs: SPEC_ID="${{ steps.extract_info.outputs.spec_id }}" LIBRARY="${{ steps.extract_info.outputs.library }}" - # Find plot file for this library and spec - PLOT_FILE=$(find plots/$LIBRARY -name "default.py" -path "*/${SPEC_ID}/*" 2>/dev/null | head -1 || echo "") + # Get plot file path + PLOT_FILE="plots/${SPEC_ID}/implementations/${LIBRARY}.py" if [ -n "$PLOT_FILE" ] && [ -f "$PLOT_FILE" ]; then echo "plot_file=$PLOT_FILE" >> $GITHUB_OUTPUT @@ -249,7 +249,7 @@ jobs: fi # Check spec file - if [ -f "specs/${SPEC_ID}.md" ]; then + if [ -f "plots/${SPEC_ID}/spec.md" ]; then echo "spec_exists=true" >> $GITHUB_OUTPUT else echo "spec_exists=false" >> $GITHUB_OUTPUT @@ -291,7 +291,7 @@ jobs: CURRENT_CODE=$(cat "$PLOT_FILE") # Read spec - SPEC_CONTENT=$(cat "specs/${SPEC_ID}.md") + SPEC_CONTENT=$(cat "plots/${SPEC_ID}/spec.md") # Read library-specific rules LIBRARY_RULES=$(cat "prompts/library/${LIBRARY}.md" 2>/dev/null || echo "No library-specific rules found") diff --git a/.github/workflows/plot-prepare.yml b/.github/workflows/plot-prepare.yml new file mode 100644 index 0000000000..13c68a67f6 --- /dev/null +++ b/.github/workflows/plot-prepare.yml @@ -0,0 +1,291 @@ +name: "Plot: Prepare" +run-name: "Prepare: ${{ github.event.issue.title }}" + +# Unified workflow for NEW and UPDATE requests +# Phase 1: Validate, assign spec-id, create branch, generate spec +# STOPS after spec creation - waits for `approved` label before code generation +# +# Triggers: +# - plot-request label added → NEW request flow +# - plot-request + update labels → UPDATE request flow + +on: + issues: + types: [labeled] + +concurrency: + group: plot-prepare-${{ github.event.issue.number }} + cancel-in-progress: false + +jobs: + prepare: + runs-on: ubuntu-latest + permissions: + contents: write + issues: write + id-token: write + + outputs: + spec_id: ${{ steps.process.outputs.spec_id }} + feature_branch: ${{ steps.process.outputs.feature_branch }} + is_update: ${{ steps.check.outputs.is_update }} + + steps: + # ======================================================================== + # Step 1: Check conditions + # ======================================================================== + - name: Check conditions + id: check + env: + ISSUE_TITLE: ${{ github.event.issue.title }} + LABEL_NAME: ${{ github.event.label.name }} + run: | + echo "=== Debug Info ===" + echo "Event: ${{ github.event_name }}" + echo "Label added: $LABEL_NAME" + echo "Issue labels: ${{ join(github.event.issue.labels.*.name, ', ') }}" + echo "Issue title: $ISSUE_TITLE" + echo "==================" + + # Only trigger when plot-request label is added + if [[ "$LABEL_NAME" != "plot-request" ]]; then + echo "::notice::Skipping: Not the 'plot-request' label" + echo "should_run=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # Check if this is an update request + HAS_UPDATE="${{ contains(github.event.issue.labels.*.name, 'update') }}" + if [[ "$HAS_UPDATE" == "true" ]]; then + echo "is_update=true" >> $GITHUB_OUTPUT + echo "::notice::This is an UPDATE request" + else + echo "is_update=false" >> $GITHUB_OUTPUT + echo "::notice::This is a NEW request" + fi + + # Skip if already has spec-id in title (already processed) + if [[ "$ISSUE_TITLE" =~ ^\[[a-z0-9-]+\] ]]; then + EXISTING_ID=$(echo "$ISSUE_TITLE" | grep -oP '^\[\K[a-z0-9-]+(?=\])' || echo "") + # For updates, having spec-id is expected - continue + if [[ "$HAS_UPDATE" == "true" ]]; then + echo "::notice::Update request with existing spec ID: $EXISTING_ID" + else + echo "::notice::Skipping: Issue already has spec ID: $EXISTING_ID" + echo "should_run=false" >> $GITHUB_OUTPUT + exit 0 + fi + fi + + echo "should_run=true" >> $GITHUB_OUTPUT + + - name: Checkout repository + if: steps.check.outputs.should_run == 'true' + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: React with eyes emoji + if: steps.check.outputs.should_run == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/reactions \ + -f content=eyes + + # ======================================================================== + # Step 2: Process request (NEW or UPDATE) + # ======================================================================== + - name: Process with Claude + if: steps.check.outputs.should_run == 'true' + id: process + timeout-minutes: 30 + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + claude_args: "--model opus" + prompt: | + ## Task: Prepare Plot Request + + You are processing a plot request. Determine if this is a NEW or UPDATE request and handle accordingly. + + ### Issue Details + - **Title:** ${{ github.event.issue.title }} + - **Number:** #${{ github.event.issue.number }} + - **Is Update:** ${{ steps.check.outputs.is_update }} + - **Body:** + ``` + ${{ github.event.issue.body }} + ``` + + --- + + ## Instructions + + ### For NEW Requests (is_update=false): + + 1. **Read the rules:** `prompts/spec-id-generator.md` + + 2. **Check for duplicates:** + - List all existing specs: `ls plots/` + - Read existing spec files if titles seem similar + - If duplicate found: Post comment explaining which spec matches, then STOP + + 3. **Generate spec-id:** + - Format: `{type}-{variant}` or `{type}-{variant}-{modifier}` + - Examples: `scatter-basic`, `bar-grouped-horizontal`, `heatmap-correlation` + - All lowercase, hyphens only + + 4. **Create feature branch:** + ```bash + git checkout -b "plot/{spec-id}" + ``` + + 5. **Create spec files:** + - Read template: `prompts/templates/spec.md` + - Read metadata template: `prompts/templates/metadata.yaml` + - Create directory: `plots/{spec-id}/` + - Create: `plots/{spec-id}/spec.md` (follow template structure) + - Create: `plots/{spec-id}/metadata.yaml` (replace placeholders) + - Create empty folder: `plots/{spec-id}/implementations/` + + 6. **Commit and push:** + ```bash + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add plots/{spec-id}/ + git commit -m "spec: add {spec-id} specification + + Created from issue #${{ github.event.issue.number }}" + git push -u origin "plot/{spec-id}" + ``` + + 7. **Update issue title:** + ```bash + gh issue edit ${{ github.event.issue.number }} --title "[{spec-id}] {original title without 'Plot: ' prefix}" + ``` + + 8. **Post comment with spec content:** + Use this format (read the actual spec.md content you created): + ```bash + gh issue comment ${{ github.event.issue.number }} --body "## ✅ Spec Ready: \`{spec-id}\` + + **Branch:** \`plot/{spec-id}\` + + --- + + ### spec.md + + {paste full content of spec.md here} + + --- + + **Next:** Add \`approved\` label to start code generation. + + --- + :robot: *[plot-prepare workflow](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})*" + ``` + + 9. **Output for workflow:** + After completing, print these lines exactly: + ``` + SPEC_ID={spec-id} + FEATURE_BRANCH=plot/{spec-id} + ``` + + --- + + ### For UPDATE Requests (is_update=true): + + 1. **Extract spec-id from title:** + - Title format: `[spec-id] [update] ...` or `[spec-id] [update:library] ...` + - Extract the first bracket content as spec-id + + 2. **Validate spec exists:** + - Check if `plots/{spec-id}/spec.md` exists + - If not: Post error comment and STOP + + 3. **Extract target library (if specified):** + - `[update:seaborn]` → target only seaborn + - `[update]` → target all libraries + + 4. **Analyze the update request:** + - Read the current spec: `plots/{spec-id}/spec.md` + - Read existing implementations if relevant + - Evaluate if the update reason is sensible: + - Is this a valid improvement? + - Does it make sense for this plot type? + - Are there any concerns? + + 5. **Create feature branch:** + ```bash + git checkout -b "plot/{spec-id}" + git push -u origin "plot/{spec-id}" + ``` + + 6. **Post comment with analysis:** + ```bash + gh issue comment ${{ github.event.issue.number }} --body "## 🔄 Update Request: \`{spec-id}\` + + **Branch:** \`plot/{spec-id}\` + **Scope:** {library name or 'all 9 libraries'} + + --- + + ### Analysis + + {Your assessment of the update request: + - Is the reason valid? Why/why not? + - What changes would this involve? + - Any concerns or suggestions?} + + --- + + ### Current Spec + + \`\`\`markdown + {paste current spec.md content} + \`\`\` + + --- + + **Recommendation:** {Approve / Needs clarification / Not recommended} + + **Next:** Add \`approved\` label to start regeneration. + + --- + :robot: *[plot-prepare workflow](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})*" + ``` + + 7. **Output for workflow:** + ``` + SPEC_ID={spec-id} + FEATURE_BRANCH=plot/{spec-id} + ``` + + --- + + ## Important Rules + - Do NOT trigger any other workflows + - Do NOT add labels + - Do NOT close the issue + - STOP after posting the comment - the `approved` label triggers the next phase + + # ======================================================================== + # Step 3: Parse outputs + # ======================================================================== + - name: Parse Claude outputs + if: steps.check.outputs.should_run == 'true' + id: parse + run: | + # Claude should have printed SPEC_ID and FEATURE_BRANCH + # These are captured in the action output + echo "::notice::Prepare phase complete - waiting for 'approved' label" + + - name: Add rocket reaction on success + if: steps.check.outputs.should_run == 'true' && success() + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/reactions \ + -f content=rocket diff --git a/.github/workflows/sync-postgres.yml b/.github/workflows/sync-postgres.yml index fdc5036c0d..9b8b4eeb85 100644 --- a/.github/workflows/sync-postgres.yml +++ b/.github/workflows/sync-postgres.yml @@ -1,15 +1,19 @@ name: "Sync: PostgreSQL" -run-name: "Sync specs to DB: ${{ github.ref_name }}" +run-name: "Sync plots to DB: ${{ github.ref_name }}" -# Syncs specs and implementations from main branch to PostgreSQL. +# Syncs plots from main branch to PostgreSQL. # This ensures the database only contains data for code that is actually in main. +# +# New structure: plots/{spec_id}/ +# - spec.md: Spec description, data requirements, use cases +# - metadata.yaml: Tags, implementation metadata, generation history +# - implementations/{library}.py: Library-specific implementation code on: push: branches: - main paths: - - 'specs/**' - 'plots/**' workflow_dispatch: # Allow manual trigger @@ -41,9 +45,9 @@ jobs: env: DATABASE_URL: ${{ secrets.DATABASE_URL }} - - name: Sync specs and implementations + - name: Sync plots to database run: | - uv run python scripts/sync_to_postgres.py + uv run python automation/scripts/sync_to_postgres.py env: DATABASE_URL: ${{ secrets.DATABASE_URL }} GCS_BUCKET: ${{ vars.GCS_BUCKET || 'pyplots-images' }} @@ -52,7 +56,12 @@ jobs: run: | echo "### Sync Complete" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "Specs and implementations have been synced to PostgreSQL." >> $GITHUB_STEP_SUMMARY + echo "Plots have been synced to PostgreSQL." >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Structure synced:**" >> $GITHUB_STEP_SUMMARY + echo "- plots/{spec-id}/spec.md (descriptions, data requirements)" >> $GITHUB_STEP_SUMMARY + echo "- plots/{spec-id}/metadata.yaml (tags, generation info, quality scores)" >> $GITHUB_STEP_SUMMARY + echo "- plots/{spec-id}/implementations/*.py (code)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**Trigger:** \`${{ github.event_name }}\`" >> $GITHUB_STEP_SUMMARY echo "**Commit:** \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY diff --git a/.idea/data_source_mapping.xml b/.idea/data_source_mapping.xml deleted file mode 100644 index 869d1d8645..0000000000 --- a/.idea/data_source_mapping.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 8ebb6fa66c..d96e7d8fec 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -81,27 +81,27 @@ yarn build # Production build ## Architecture -### Specification-First Design +### Plot-Centric Design -Every plot follows this flow: -``` -specs/{spec-id}.md → plots/{library}/{plot-type}/{spec-id}/default.py -``` +Everything for one plot type lives in a single directory: -Example: ``` -specs/scatter-basic.md → plots/matplotlib/scatter/scatter-basic/default.py - → plots/seaborn/scatterplot/scatter-basic/default.py - → plots/plotly/scatter/scatter-basic/default.py - → plots/bokeh/scatter/scatter-basic/default.py - → plots/altair/scatter/scatter-basic/default.py - → plots/plotnine/scatter/scatter-basic/default.py - → plots/pygal/scatter/scatter-basic/default.py - → plots/highcharts/scatter/scatter-basic/default.py - → plots/letsplot/point/scatter-basic/default.py +plots/{spec-id}/ +├── spec.md # Description, Applications, Data, Notes +├── metadata.yaml # Tags, generation info, quality history +└── implementations/ # Library implementations + ├── matplotlib.py + ├── seaborn.py + ├── plotly.py + ├── bokeh.py + ├── altair.py + ├── plotnine.py + ├── pygal.py + ├── highcharts.py + └── letsplot.py ``` -The same spec ID links implementations across all 9 supported libraries. +Example: `plots/scatter-basic/` contains everything for the basic scatter plot. ### Spec ID Naming Convention @@ -123,8 +123,10 @@ The same spec ID links implementations across all 9 supported libraries. ### Directory Structure -- **`specs/`**: Library-agnostic plot specifications (Markdown) -- **`plots/{library}/{plot_type}/{spec_id}/{variant}.py`**: Library-specific implementations +- **`plots/{spec-id}/`**: Plot-centric directories (spec, metadata, implementations together) + - `spec.md`: Library-agnostic specification (Description, Applications, Data, Notes) + - `metadata.yaml`: Tags, generation info, quality scores (synced to PostgreSQL) + - `implementations/{library}.py`: Library-specific implementations - **`core/`**: Shared business logic (database, repositories, config) - **`api/`**: FastAPI backend (routers, schemas, dependencies) - **`app/`**: React frontend (Vite + TypeScript + MUI) @@ -139,6 +141,88 @@ The same spec ID links implementations across all 9 supported libraries. 3. **Clean Repo**: Only production code in git. Quality reports → GitHub Issues. Preview images → GCS. 4. **Issue-Based Workflow**: GitHub Issues as state machine for plot lifecycle +### Metadata System + +Each plot directory contains a `metadata.yaml` file that is synced to PostgreSQL: + +**File location:** `plots/{spec-id}/metadata.yaml` + +```yaml +spec_id: scatter-basic +title: Basic Scatter Plot + +# Spec-level tracking +created: 2025-01-10T08:00:00Z +issue: 42 +suggested: CoolContributor123 +updates: + - date: 2025-01-15T10:30:00Z + issue: 58 + changes: "Added Notes section" + +# Spec-level tags (same for all library implementations) +tags: + plot_type: [scatter, point] + domain: [statistics, general] + features: [basic, 2d, correlation] + audience: [beginner] + data_type: [numeric, continuous] + +# Per-library implementation metadata +implementations: + matplotlib: + preview_url: https://storage.googleapis.com/pyplots-images/plots/scatter-basic/matplotlib/latest.png + current: + version: 2 + date: 2025-01-15T10:30:00Z + issue: 53 + generated_by: claude-opus-4-5-20251101 + quality_score: 92 + history: + - version: 0 + date: 2025-01-10T08:00:00Z + issue: 42 + generated_by: claude-sonnet-4-20250514 + quality_score: 65 + - version: 1 + date: 2025-01-12T14:20:00Z + issue: 42 + generated_by: claude-sonnet-4-20250514 + quality_score: 78 +``` + +**Key points:** +- Spec-level tracking: `created`, `issue`, `suggested`, `updates` +- Contributors credited via `suggested` field +- Tags are at spec level (same for all libraries) +- Generation info tracks which model created the code +- Version numbers (0, 1, 2...) match GCS history files (`v0.png`, `v1.png`, etc.) +- `sync-postgres.yml` workflow syncs to database on push to main +- Database stores full spec content (markdown) and implementation code (Python source) + +### GCS Storage Structure + +Preview images are stored in Google Cloud Storage (`pyplots-images` bucket): + +``` +gs://pyplots-images/ +├── plots/{spec-id}/{library}/ # Live images (after merge to main) +│ ├── latest.png # Current version +│ ├── latest_thumb.png # Thumbnail +│ ├── latest.html # Optional (interactive libraries) +│ └── history/ +│ ├── v0.png, v0.html # First version +│ ├── v1.png, v1.html # After improvement +│ └── ... +│ +└── staging/{spec-id}/{library}/ # Temp images (during review) + ├── preview.png # Overwritten each attempt + └── preview.html # Optional +``` + +**Interactive libraries** (generate `.html`): plotly, bokeh, altair, highcharts, pygal, letsplot +**PNG only**: matplotlib, seaborn, plotnine + ## Tech Stack - **Backend**: FastAPI, SQLAlchemy (async), PostgreSQL, Python 3.10+ @@ -193,10 +277,8 @@ def get_spec_by_id(spec_id: str, db: Session) -> Spec: - No numbers needed - descriptive names scale better - See "Spec ID Naming Convention" section above for details -**Implementation Variants**: -- `default.py`: Standard implementation (required) -- `{style}_style.py`: Style variants (e.g., `ggplot_style.py`) -- `py{version}.py`: Version-specific (only when necessary) +**Implementation Files** (in `plots/{spec-id}/implementations/`): +- `{library}.py`: One file per library (e.g., `matplotlib.py`, `seaborn.py`) ## Database @@ -218,16 +300,23 @@ DATABASE_URL=postgresql+asyncpg://user:pass@host:5432/pyplots uv run python -c "from core.database import is_db_configured; print(is_db_configured())" ``` -**What's Stored**: -- Spec metadata (title, description, tags) -- Implementation metadata (library, variant, quality score) +**What's Stored** (synced from `plots/{spec-id}/`): +- Spec content (full markdown from spec.md) +- Spec metadata (title, description, tags, structured_tags) +- Implementation code (full Python source) +- Implementation metadata (library, variant, quality score, generation info) - GCS URLs for preview images - Social media promotion queue -**What's NOT Stored**: -- Plot code (in repository) +**What's in Repository** (source of truth): +- Everything in `plots/{spec-id}/`: + - `spec.md` - specification description + - `metadata.yaml` - tags and generation history + - `implementations/*.py` - library implementations + +**What's NOT Stored in DB**: - Preview images (in GCS) -- Quality reports (in GitHub Issues) +- Detailed quality reports (in GitHub Issues, summary in metadata) **Migrations**: Managed with Alembic ```bash @@ -282,7 +371,7 @@ prompt: | $(cat prompts/library/matplotlib.md) ## Spec - $(cat specs/scatter-basic.md) + $(cat plots/scatter-basic/spec.md) ``` ## Implementation Guidelines @@ -362,7 +451,8 @@ Main Issue (plot-request + approved) │ └── gen-create-spec.yml ├── Creates: plot/{spec-id} branch - ├── Creates: specs/{spec-id}.md + ├── Creates: plots/{spec-id}/spec.md + ├── Creates: plots/{spec-id}/metadata.yaml └── Dispatches: gen-new-plot.yml │ ├── Sub-Issue: [spec-id] matplotlib implementation @@ -440,7 +530,8 @@ bash .github/scripts/setup-labels.sh 4. Maintainer reviews and adds `approved` label 5. **`gen-create-spec.yml` triggers:** - Creates feature branch: `plot/{spec-id}` - - Claude generates spec file: `specs/{spec-id}.md` + - Claude generates spec file: `plots/{spec-id}/spec.md` + - Creates metadata file: `plots/{spec-id}/metadata.yaml` - Dispatches `gen-new-plot.yml` 6. **`gen-new-plot.yml` orchestrator triggers:** - Creates **9 sub-issues** (one per library) @@ -474,7 +565,7 @@ To update an existing plot implementation: - Example: `[update] scatter-basic` - regenerate all 9 libraries - Example: `[update:seaborn] scatter-basic` - regenerate only seaborn 2. Add label: `plot-request` -3. Issue body can contain spec changes (Claude will update `specs/{spec-id}.md` first) +3. Issue body can contain spec changes (Claude will update `plots/{spec-id}/spec.md` first) 4. Maintainer adds `approved` label 5. Workflow regenerates specified implementations @@ -518,31 +609,24 @@ See `.env.example` for full list with comments. 2. Add label `plot-request` 3. `validate-plot-request.yml` automatically assigns a spec ID (e.g., `bar-grouped-errorbars`) 4. Maintainer reviews and adds `approved` label -5. AI automatically generates spec file in `specs/` and implementations +5. AI automatically generates `plots/{spec-id}/` directory with spec.md, metadata.yaml, and implementations 6. Multi-LLM quality check runs automatically on PR 7. Human reviews PR and merges ### Updating an Existing Implementation 1. Create GitHub Issue referencing original spec -2. Update implementation file -3. Run tests: `pytest tests/unit/plots/{library}/test_{spec_id}.py` +2. Update implementation file in `plots/{spec-id}/implementations/{library}.py` +3. Run tests: `uv run pytest tests/unit/plots/test_{spec_id}.py` 4. Generate preview by running implementation standalone 5. Create PR with new preview 6. Quality check runs automatically -### Adding a Style Variant - -1. Create new file: `plots/{library}/{plot_type}/{spec_id}/{style}_style.py` -2. Add tests -3. Update database metadata -4. Generate preview - ### Testing Plot Generation Locally ```bash # Run implementation file directly -python plots/matplotlib/scatter/scatter_basic_001/default.py +python plots/scatter-basic/implementations/matplotlib.py ``` ## Cloud Deployment diff --git a/alembic/versions/002_add_metadata_fields.py b/alembic/versions/002_add_metadata_fields.py new file mode 100644 index 0000000000..5a19f1862f --- /dev/null +++ b/alembic/versions/002_add_metadata_fields.py @@ -0,0 +1,86 @@ +"""Add metadata fields for generation tracking and quality details + +Revision ID: 002 +Revises: 001 +Create Date: 2025-12-08 + +Adds: +- specs.structured_tags (JSONB) - Structured tags from metadata/*.yaml +- implementations.generated_at (DateTime) - When code was generated +- implementations.generated_by (String) - Model ID that generated the code +- implementations.workflow_run (Integer) - GitHub Actions workflow run ID +- implementations.issue_number (Integer) - GitHub Issue number +- implementations.evaluator_scores (JSONB) - Individual evaluator scores +- implementations.quality_feedback (Text) - Quality evaluation feedback +- implementations.improvements_suggested (JSONB) - Suggested improvements +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "002" +down_revision: Union[str, None] = "001" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add structured_tags to specs table + op.add_column( + "specs", + sa.Column("structured_tags", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + ) + + # Add generation metadata to implementations table + op.add_column( + "implementations", + sa.Column("generated_at", sa.DateTime(), nullable=True), + ) + op.add_column( + "implementations", + sa.Column("generated_by", sa.String(), nullable=True), + ) + op.add_column( + "implementations", + sa.Column("workflow_run", sa.Integer(), nullable=True), + ) + op.add_column( + "implementations", + sa.Column("issue_number", sa.Integer(), nullable=True), + ) + + # Add quality evaluation details to implementations table + op.add_column( + "implementations", + sa.Column("evaluator_scores", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + ) + op.add_column( + "implementations", + sa.Column("quality_feedback", sa.Text(), nullable=True), + ) + op.add_column( + "implementations", + sa.Column("improvements_suggested", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + ) + + +def downgrade() -> None: + # Remove quality evaluation details from implementations + op.drop_column("implementations", "improvements_suggested") + op.drop_column("implementations", "quality_feedback") + op.drop_column("implementations", "evaluator_scores") + + # Remove generation metadata from implementations + op.drop_column("implementations", "issue_number") + op.drop_column("implementations", "workflow_run") + op.drop_column("implementations", "generated_by") + op.drop_column("implementations", "generated_at") + + # Remove structured_tags from specs + op.drop_column("specs", "structured_tags") diff --git a/alembic/versions/003_add_content_and_code_fields.py b/alembic/versions/003_add_content_and_code_fields.py new file mode 100644 index 0000000000..1760392f8a --- /dev/null +++ b/alembic/versions/003_add_content_and_code_fields.py @@ -0,0 +1,57 @@ +"""Add content and code fields, remove plot_function + +Revision ID: 003 +Revises: 002 +Create Date: 2025-12-09 + +Adds: +- specs.content (Text) - Full markdown content from spec.md +- implementations.code (Text) - Python source code + +Removes: +- implementations.plot_function (no longer needed with new structure) +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "003" +down_revision: Union[str, None] = "002" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add content to specs table + op.add_column( + "specs", + sa.Column("content", sa.Text(), nullable=True), + ) + + # Add code to implementations table + op.add_column( + "implementations", + sa.Column("code", sa.Text(), nullable=True), + ) + + # Remove plot_function from implementations (no longer needed) + op.drop_column("implementations", "plot_function") + + +def downgrade() -> None: + # Add plot_function back + op.add_column( + "implementations", + sa.Column("plot_function", sa.String(), nullable=False, server_default="unknown"), + ) + + # Remove code from implementations + op.drop_column("implementations", "code") + + # Remove content from specs + op.drop_column("specs", "content") diff --git a/alembic/versions/004_add_history_fields.py b/alembic/versions/004_add_history_fields.py new file mode 100644 index 0000000000..4bd2069211 --- /dev/null +++ b/alembic/versions/004_add_history_fields.py @@ -0,0 +1,46 @@ +"""Add history tracking fields + +Revision ID: 004 +Revises: 003 +Create Date: 2025-12-09 + +Adds: +- specs.updates (JSONB) - Spec modification history +- implementations.history (JSONB) - Implementation version history +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "004" +down_revision: Union[str, None] = "003" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add updates history to specs table + op.add_column( + "specs", + sa.Column("updates", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + ) + + # Add version history to implementations table + op.add_column( + "implementations", + sa.Column("history", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + ) + + +def downgrade() -> None: + # Remove history from implementations + op.drop_column("implementations", "history") + + # Remove updates from specs + op.drop_column("specs", "updates") diff --git a/api/README.md b/api/README.md deleted file mode 100644 index 78561336ab..0000000000 --- a/api/README.md +++ /dev/null @@ -1,185 +0,0 @@ -# pyplots API - -FastAPI backend for the pyplots platform. - -## 🚀 Quick Start - -### Local Development - -```bash -# From project root -uv sync --all-extras - -# Start API server -uv run uvicorn api.main:app --reload --host 0.0.0.0 --port 8000 -``` - -Visit: -- **API Docs**: http://localhost:8000/docs -- **Health Check**: http://localhost:8000/health -- **Hello World**: http://localhost:8000/ - -### Endpoints - -- `GET /` - Root endpoint with API info -- `GET /health` - Health check for Cloud Run -- `GET /hello/{name}` - Simple hello endpoint - -## 🐳 Docker - -### Build locally - -```bash -# From project root -docker build -f api/Dockerfile -t pyplots-backend . -``` - -### Run locally - -```bash -docker run -p 8000:8000 pyplots-backend -``` - -Visit: http://localhost:8000/docs - -## ☁️ Cloud Run Deployment - -### Prerequisites - -1. Google Cloud Project -2. Cloud Build API enabled -3. Cloud Run API enabled -4. Appropriate IAM permissions - -### Deploy - -```bash -# From project root -gcloud builds submit --config=api/cloudbuild.yaml --project=YOUR_PROJECT_ID -``` - -### Configuration - -Edit `api/cloudbuild.yaml` substitutions: -- `_SERVICE_NAME`: Cloud Run service name (default: pyplots-backend) -- `_REGION`: Deployment region (default: europe-west4) -- `_MEMORY`: Container memory (default: 512Mi) -- `_MIN_INSTANCES`: Min instances (default: 0) -- `_MAX_INSTANCES`: Max instances (default: 3) - -### Environment Variables - -Production env vars are set in `cloudbuild.yaml`: -- `ENVIRONMENT=production` -- `GOOGLE_CLOUD_PROJECT=` - -Add more in the deploy step: -```yaml -- "--set-env-vars=MY_VAR=value" -``` - -## 🧪 Testing - -```bash -# Run tests -uv run pytest tests/unit/api/ - -# With coverage -uv run pytest tests/unit/api/ --cov=api -``` - -## 📝 API Development - -### Adding new endpoints - -1. Create router in `api/routers/`: -```python -# api/routers/plots.py -from fastapi import APIRouter - -router = APIRouter(prefix="/plots", tags=["plots"]) - -@router.get("/") -async def list_plots(): - return {"plots": []} -``` - -2. Include router in `api/main.py`: -```python -from api.routers import plots - -app.include_router(plots.router) -``` - -### Project Structure - -``` -api/ -├── main.py # FastAPI app -├── __init__.py -├── routers/ # API endpoints -│ ├── __init__.py -│ ├── plots.py -│ ├── specs.py -│ └── data.py -├── Dockerfile # Container image -├── cloudbuild.yaml # Cloud Build config -└── README.md # This file -``` - -## 🔐 Security - -- Non-root user in Docker container -- CORS configured for allowed origins -- Health check endpoint for Cloud Run -- Environment-based configuration - -## 📊 Monitoring - -### Health Check - -Cloud Run uses `/health` endpoint for liveness checks. - -Returns: -```json -{ - "status": "healthy", - "service": "pyplots-api", - "version": "0.1.0" -} -``` - -### Logs - -View Cloud Run logs: -```bash -gcloud run services logs read pyplots-backend --region=europe-west4 -``` - -## 🛠️ Troubleshooting - -### Common Issues - -**Port not binding:** -- Cloud Run sets `PORT` env var -- Dockerfile CMD uses `${PORT:-8000}` - -**CORS errors:** -- Add origin to `allow_origins` in `main.py` - -**Build fails:** -- Check `pyproject.toml` is in project root -- Verify Dockerfile paths are correct - -### Debug locally - -```bash -# Run with debug logging -uv run uvicorn api.main:app --reload --log-level debug -``` - -## 📚 Resources - -- [FastAPI Documentation](https://fastapi.tiangolo.com/) -- [Cloud Run Documentation](https://cloud.google.com/run/docs) -- [UV Package Manager](https://github.com/astral-sh/uv) diff --git a/api/main.py b/api/main.py index b50fa3c338..938ed4ba46 100644 --- a/api/main.py +++ b/api/main.py @@ -34,7 +34,7 @@ # Configuration GCS_BUCKET = os.getenv("GCS_BUCKET", "pyplots-images") BASE_DIR = Path(__file__).parent.parent -SPECS_DIR = BASE_DIR / "specs" +PLOTS_DIR = BASE_DIR / "plots" # Configure logging @@ -125,11 +125,11 @@ class ImplementationResponse(BaseModel): library_id: str library_name: str - plot_function: str variant: str file_path: str preview_url: Optional[str] = None quality_score: Optional[float] = None + code: Optional[str] = None class SpecDetailResponse(BaseModel): @@ -138,6 +138,7 @@ class SpecDetailResponse(BaseModel): id: str title: str description: Optional[str] = None + content: Optional[str] = None data_requirements: list[DataRequirement] = [] tags: list[str] = [] implementations: list[ImplementationResponse] = [] @@ -160,11 +161,11 @@ class SpecListItem(BaseModel): def list_spec_ids_from_filesystem() -> list[str]: """List spec IDs from filesystem (fallback when DB not configured).""" - if not SPECS_DIR.exists(): + if not PLOTS_DIR.exists(): return [] - excluded = {".template", "VERSIONING"} - return sorted([f.stem for f in SPECS_DIR.glob("*.md") if f.stem not in excluded]) + # Each subdirectory in plots/ is a spec (containing spec.md) + return sorted([d.name for d in PLOTS_DIR.iterdir() if d.is_dir() and (d / "spec.md").exists()]) def get_images_from_gcs(spec_id: str) -> list[dict]: @@ -254,11 +255,11 @@ async def get_spec(spec_id: str, db: AsyncSession = Depends(get_db)): ImplementationResponse( library_id=impl.library_id, library_name=impl.library.name if impl.library else impl.library_id, - plot_function=impl.plot_function, variant=impl.variant, file_path=impl.file_path, preview_url=impl.preview_url, quality_score=impl.quality_score, + code=impl.code, ) for impl in spec.implementations ] @@ -272,6 +273,7 @@ async def get_spec(spec_id: str, db: AsyncSession = Depends(get_db)): id=spec.id, title=spec.title, description=spec.description, + content=spec.content, data_requirements=data_reqs, tags=spec.tags or [], implementations=implementations, diff --git a/app/README.md b/app/README.md deleted file mode 100644 index d5a1f659c4..0000000000 --- a/app/README.md +++ /dev/null @@ -1,139 +0,0 @@ -

- pyplots.ai -

- -# pyplots Frontend - -Minimal React + TypeScript + Vite + MUI frontend for the pyplots platform. - -## Features - -- **Python-colored branding** - Logo with Python blue (#3776AB) and yellow (#FFD43B) -- **JetBrains Mono font** - Monospace font for the pythonic developer vibe -- **Spec search & filter** - Click on spec ID to search/filter all available specs -- **Keyboard shortcuts**: - - `Space` - Random shuffle to another spec - - `Enter` - Open spec selector with search - - `↑↓` - Navigate through specs in dropdown - - `Escape` - Close modal or dropdown -- **URL sharing** - Specs are shareable via `?spec=scatter-basic` URL parameter -- **Fullscreen modal** - Click any plot to view in fullscreen -- **Responsive grid** - 1/2/3 columns based on screen size - -## Tech Stack - -- **React 19** - UI framework -- **TypeScript** - Type safety -- **Vite 7** - Build tool -- **MUI 7** - Material UI components -- **JetBrains Mono** - Google Fonts -- **nginx** - Production web server - -## Development - -### Prerequisites - -- Node.js 20+ -- Yarn package manager - -### Local Development - -1. Install dependencies: - ```bash - cd app - yarn install - ``` - -2. Create `.env` file (optional): - ```bash - VITE_API_URL=http://localhost:8000 - ``` - -3. Start development server: - ```bash - yarn dev - ``` - - The app will be available at `http://localhost:3000` - -### Build - -```bash -yarn build -``` - -Built files will be in the `dist/` directory. - -## Docker - -### Build Docker Image - -```bash -docker build -t pyplots-frontend \ - --build-arg VITE_API_URL=https://your-backend-url.run.app \ - . -``` - -### Run Docker Container - -```bash -docker run -p 8080:8080 pyplots-frontend -``` - -## Cloud Run Deployment - -### Prerequisites - -- Google Cloud Project with Cloud Run enabled -- `gcloud` CLI configured - -### Deploy - -From the project root: - -```bash -gcloud builds submit \ - --config=app/cloudbuild.yaml \ - --substitutions=_VITE_API_URL=https://pyplots-backend-YOUR-PROJECT.run.app -``` - -Replace `YOUR-PROJECT` with your Google Cloud project ID. - -### Update Backend URL - -To update the backend URL after initial deployment: - -1. Edit `app/cloudbuild.yaml` -2. Update the `_VITE_API_URL` substitution -3. Redeploy with the command above - -## Environment Variables - -- `VITE_API_URL` - Backend API URL (default: `http://localhost:8000`) - -## Project Structure - -``` -app/ -├── src/ -│ ├── App.tsx # Main application component -│ ├── main.tsx # React entry point -│ └── vite-env.d.ts # Vite type definitions -├── public/ -│ ├── favicon.svg # Python-colored "pp" favicon -│ └── logo.svg # Python-colored "pyplots.ai" logo -├── index.html # HTML template (fonts, favicon) -├── vite.config.ts # Vite configuration -├── tsconfig.json # TypeScript configuration -├── Dockerfile # Multi-stage Docker build -├── nginx.conf # nginx configuration for production -├── cloudbuild.yaml # Google Cloud Build configuration -└── package.json # Dependencies and scripts -``` - -## Available Scripts - -- `yarn dev` - Start development server -- `yarn build` - Build for production -- `yarn preview` - Preview production build locally -- `yarn lint` - Run ESLint diff --git a/automation/scripts/sync_to_postgres.py b/automation/scripts/sync_to_postgres.py new file mode 100644 index 0000000000..07bfd5e3c8 --- /dev/null +++ b/automation/scripts/sync_to_postgres.py @@ -0,0 +1,377 @@ +#!/usr/bin/env python3 +""" +Sync plots from repository to PostgreSQL. + +This script is run by GitHub Actions on push to main branch. +It ensures the database only contains data for code that is actually in main. + +New structure (plots/{spec_id}/): +- spec.md: Spec description, data requirements, use cases +- metadata.yaml: Tags, implementation metadata, generation history +- implementations/{library}.py: Library-specific implementation code +""" + +import asyncio +import logging +import os +import re +import sys +from datetime import datetime +from pathlib import Path + +import yaml + + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from dotenv import load_dotenv + + +load_dotenv() + +from sqlalchemy import delete, select # noqa: E402 +from sqlalchemy.dialects.postgresql import insert # noqa: E402 +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine # noqa: E402 + +from core.database import LIBRARIES_SEED, Implementation, Library, Spec # noqa: E402 + + +# Configuration +BASE_DIR = Path(__file__).parent.parent.parent +PLOTS_DIR = BASE_DIR / "plots" +GCS_BUCKET = os.getenv("GCS_BUCKET", "pyplots-images") +DATABASE_URL = os.getenv("DATABASE_URL", "") + +# Logging +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +def parse_spec_markdown(file_path: Path) -> dict: + """ + Parse a spec.md file and extract metadata. + + Args: + file_path: Path to the spec.md file + + Returns: + Dict with title, description, content, data_requirements, tags + """ + content = file_path.read_text(encoding="utf-8") + spec_id = file_path.parent.name # Directory name is spec_id + + # Parse title from first heading: "# scatter-basic: Basic Scatter Plot" + title_match = re.search(r"^#\s+[\w-]+:\s*(.+)$", content, re.MULTILINE) + title = title_match.group(1).strip() if title_match else spec_id + + # Parse description section + description = "" + desc_match = re.search(r"## Description\s*\n(.+?)(?=\n##|\Z)", content, re.DOTALL) + if desc_match: + description = desc_match.group(1).strip() + + # Parse data requirements section + data_requirements = [] + data_match = re.search(r"## Data\s*\n(.+?)(?=\n##|\Z)", content, re.DOTALL) + if data_match: + data_section = data_match.group(1) + for match in re.finditer(r"-\s+`(\w+)`\s+\((\w+)\)\s*-?\s*(.+)?", data_section): + data_requirements.append( + {"name": match.group(1), "type": match.group(2), "description": (match.group(3) or "").strip()} + ) + + # Parse simple tags from Tags section (if present) + tags = [] + tags_match = re.search(r"## Tags\s*\n(.+?)(?=\n##|\Z)", content, re.DOTALL) + if tags_match: + tags_text = tags_match.group(1).strip() + tags = [t.strip() for t in tags_text.split(",") if t.strip()] + + return { + "id": spec_id, + "title": title, + "description": description, + "content": content, # Store full markdown + "data_requirements": data_requirements, + "tags": tags, + } + + +def parse_metadata_yaml(file_path: Path) -> dict | None: + """ + Parse a metadata.yaml file. + + Args: + file_path: Path to the metadata.yaml file + + Returns: + Dict with spec_id, title, tags, implementations or None if invalid + """ + try: + content = file_path.read_text(encoding="utf-8") + data = yaml.safe_load(content) + if not data or "spec_id" not in data: + return None + return data + except Exception as e: + logger.error(f"Failed to parse metadata {file_path}: {e}") + return None + + +def scan_plot_directory(plot_dir: Path) -> dict | None: + """ + Scan a single plot directory and extract all data. + + Args: + plot_dir: Path to the plot directory (e.g., plots/scatter-basic/) + + Returns: + Dict with spec data, metadata, and implementations + """ + spec_id = plot_dir.name + spec_file = plot_dir / "spec.md" + metadata_file = plot_dir / "metadata.yaml" + implementations_dir = plot_dir / "implementations" + + if not spec_file.exists(): + logger.warning(f"No spec.md found in {plot_dir}") + return None + + # Parse spec + spec_data = parse_spec_markdown(spec_file) + + # Parse metadata (optional) + metadata = {} + if metadata_file.exists(): + metadata = parse_metadata_yaml(metadata_file) or {} + + # Merge title from metadata if available + if metadata.get("title"): + spec_data["title"] = metadata["title"] + + # Add structured tags from metadata + spec_data["structured_tags"] = metadata.get("tags") + + # Add spec update history + spec_data["updates"] = metadata.get("updates") + + # Scan implementations + implementations = [] + if implementations_dir.exists(): + for impl_file in implementations_dir.glob("*.py"): + if impl_file.name.startswith("_"): + continue + + library_id = impl_file.stem # e.g., "matplotlib", "seaborn" + code = impl_file.read_text(encoding="utf-8") + file_path = str(impl_file.relative_to(BASE_DIR)) + + # Get implementation metadata + impl_meta = metadata.get("implementations", {}).get(library_id, {}) + current = impl_meta.get("current") or {} + + # Parse generated_at + generated_at = current.get("generated_at") + if isinstance(generated_at, str): + try: + generated_at = datetime.fromisoformat(generated_at.replace("Z", "+00:00")) + except ValueError: + generated_at = None + + implementations.append({ + "spec_id": spec_id, + "library_id": library_id, + "variant": "default", + "file_path": file_path, + "code": code, + "preview_url": impl_meta.get("preview_url") or get_gcs_preview_url(spec_id, library_id), + "generated_at": generated_at, + "generated_by": current.get("generated_by"), + "workflow_run": current.get("workflow_run"), + "issue_number": current.get("issue"), + "quality_score": current.get("quality_score"), + # Quality evaluation details + "evaluator_scores": current.get("evaluator_scores"), + "quality_feedback": current.get("quality_feedback"), + "improvements_suggested": current.get("improvements_suggested"), + # Version history + "history": impl_meta.get("history"), + }) + + return { + "spec": spec_data, + "implementations": implementations, + } + + +def get_gcs_preview_url(spec_id: str, library: str) -> str: + """ + Get the GCS preview URL for an implementation. + + Args: + spec_id: The specification ID + library: The library name + + Returns: + Public GCS URL for latest.png + """ + return f"https://storage.googleapis.com/{GCS_BUCKET}/plots/{spec_id}/{library}/latest.png" + + +async def sync_to_database(session: AsyncSession, plots: list[dict]) -> dict: + """ + Sync plots to the database. + + Args: + session: Database session + plots: List of plot data dictionaries + + Returns: + Dict with counts of synced/removed items + """ + stats = {"specs_synced": 0, "specs_removed": 0, "impls_synced": 0, "impls_removed": 0} + + # Ensure libraries exist + for lib_data in LIBRARIES_SEED: + stmt = insert(Library).values(**lib_data).on_conflict_do_nothing(index_elements=["id"]) + await session.execute(stmt) + + # Collect all spec IDs and implementation keys + spec_ids = set() + impl_keys = set() + + for plot_data in plots: + spec = plot_data["spec"] + spec_id = spec["id"] + spec_ids.add(spec_id) + + # Upsert spec + stmt = ( + insert(Spec) + .values(**spec) + .on_conflict_do_update( + index_elements=["id"], + set_={ + "title": spec["title"], + "description": spec["description"], + "content": spec["content"], + "data_requirements": spec["data_requirements"], + "tags": spec["tags"], + "structured_tags": spec.get("structured_tags"), + "updates": spec.get("updates"), + }, + ) + ) + await session.execute(stmt) + stats["specs_synced"] += 1 + + # Upsert implementations + for impl in plot_data["implementations"]: + key = (impl["spec_id"], impl["library_id"], impl["variant"]) + impl_keys.add(key) + + update_set = { + "file_path": impl["file_path"], + "code": impl["code"], + "preview_url": impl["preview_url"], + } + + # Add optional fields + optional_fields = [ + "generated_at", "generated_by", "workflow_run", "issue_number", "quality_score", + "evaluator_scores", "quality_feedback", "improvements_suggested", "history" + ] + for field in optional_fields: + if impl.get(field) is not None: + update_set[field] = impl[field] + + stmt = ( + insert(Implementation) + .values(**impl) + .on_conflict_do_update(constraint="uq_implementation", set_=update_set) + ) + await session.execute(stmt) + stats["impls_synced"] += 1 + + # Remove specs that no longer exist in repo + result = await session.execute(select(Spec.id).where(Spec.id.notin_(spec_ids))) + removed_spec_ids = [row[0] for row in result.fetchall()] + if removed_spec_ids: + await session.execute(delete(Spec).where(Spec.id.in_(removed_spec_ids))) + stats["specs_removed"] = len(removed_spec_ids) + logger.info(f"Removed {len(removed_spec_ids)} specs no longer in repo") + + # Remove implementations that no longer exist in repo + result = await session.execute(select(Implementation.spec_id, Implementation.library_id, Implementation.variant)) + existing_impls = [(row[0], row[1], row[2]) for row in result.fetchall()] + + removed_impls = [impl for impl in existing_impls if impl not in impl_keys] + if removed_impls: + for spec_id, library_id, variant in removed_impls: + await session.execute( + delete(Implementation).where( + Implementation.spec_id == spec_id, + Implementation.library_id == library_id, + Implementation.variant == variant, + ) + ) + stats["impls_removed"] = len(removed_impls) + logger.info(f"Removed {len(removed_impls)} implementations no longer in repo") + + await session.commit() + return stats + + +async def main() -> int: + """Main entry point for the sync script.""" + if not DATABASE_URL: + logger.error("DATABASE_URL environment variable not set") + return 1 + + logger.info("Starting sync to PostgreSQL...") + logger.info(f"Plots directory: {PLOTS_DIR}") + + # Scan all plot directories + plots = [] + if PLOTS_DIR.exists(): + for plot_dir in sorted(PLOTS_DIR.iterdir()): + if not plot_dir.is_dir(): + continue + if plot_dir.name.startswith("."): + continue + + plot_data = scan_plot_directory(plot_dir) + if plot_data: + plots.append(plot_data) + logger.debug(f"Scanned plot: {plot_dir.name}") + + logger.info(f"Found {len(plots)} plots") + + total_impls = sum(len(p["implementations"]) for p in plots) + logger.info(f"Found {total_impls} implementations") + + # Create database connection + engine = create_async_engine(DATABASE_URL, pool_pre_ping=True) + async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + try: + async with async_session() as session: + stats = await sync_to_database(session, plots) + + logger.info("Sync completed successfully!") + logger.info(f" Specs synced: {stats['specs_synced']}, removed: {stats['specs_removed']}") + logger.info(f" Implementations synced: {stats['impls_synced']}, removed: {stats['impls_removed']}") + return 0 + + except Exception as e: + logger.error(f"Sync failed: {e}") + return 1 + + finally: + await engine.dispose() + + +if __name__ == "__main__": + exit_code = asyncio.run(main()) + sys.exit(exit_code) diff --git a/core/database/models.py b/core/database/models.py index 50a3313027..2ae377ef65 100644 --- a/core/database/models.py +++ b/core/database/models.py @@ -8,7 +8,7 @@ from typing import Optional from uuid import uuid4 -from sqlalchemy import Boolean, DateTime, Float, ForeignKey, String, Text, UniqueConstraint +from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text, UniqueConstraint from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.sql import func @@ -21,13 +21,25 @@ class Spec(Base): __tablename__ = "specs" + # Identification id: Mapped[str] = mapped_column(String, primary_key=True) # e.g., "scatter-basic" title: Mapped[str] = mapped_column(String, nullable=False) - description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) - data_requirements: Mapped[dict] = mapped_column(JSONB, nullable=False, default=list) - optional_params: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) - tags: Mapped[list[str]] = mapped_column(ARRAY(String), default=list) - created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now()) + + # From spec.md + description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Prose text + content: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Full markdown from spec.md + applications: Mapped[list[str]] = mapped_column(ARRAY(String), default=list) # Use cases + data: Mapped[list[str]] = mapped_column(ARRAY(String), default=list) # Data requirements + notes: Mapped[list[str]] = mapped_column(ARRAY(String), default=list) # Optional hints + + # From metadata.yaml + created: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) # When spec was created + issue: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) # GitHub issue number + suggested: Mapped[Optional[str]] = mapped_column(String, nullable=True) # GitHub username + tags: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) # {plot_type, domain, features, audience, data_type} + updates: Mapped[Optional[list]] = mapped_column(JSONB, nullable=True) # Spec update history + + # System updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), onupdate=func.now()) # Relationships @@ -43,10 +55,8 @@ class Library(Base): id: Mapped[str] = mapped_column(String, primary_key=True) # e.g., "matplotlib" name: Mapped[str] = mapped_column(String, nullable=False) # e.g., "Matplotlib" - version: Mapped[Optional[str]] = mapped_column(String, nullable=True) + version: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Current version documentation_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) - active: Mapped[bool] = mapped_column(Boolean, default=True) - created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now()) # Relationships implementations: Mapped[list["Implementation"]] = relationship( @@ -59,17 +69,42 @@ class Implementation(Base): __tablename__ = "implementations" + # Identification id: Mapped[str] = mapped_column(UUID(as_uuid=False), primary_key=True, default=lambda: str(uuid4())) spec_id: Mapped[str] = mapped_column(String, ForeignKey("specs.id", ondelete="CASCADE"), nullable=False) library_id: Mapped[str] = mapped_column(String, ForeignKey("libraries.id", ondelete="CASCADE"), nullable=False) - plot_function: Mapped[str] = mapped_column(String, nullable=False) # e.g., "scatter", "bar" - variant: Mapped[str] = mapped_column(String, nullable=False, default="default") - file_path: Mapped[str] = mapped_column(String, nullable=False) - preview_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) # GCS URL - python_version: Mapped[str] = mapped_column(String, default="3.12+") - tested: Mapped[bool] = mapped_column(Boolean, default=False) + + # Code + code: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Python source + + # Previews + preview_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Full PNG + preview_thumb: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Thumbnail PNG + preview_html: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Interactive HTML + + # Creation versions + python_version: Mapped[Optional[str]] = mapped_column(String, nullable=True) # e.g., "3.12" + library_version: Mapped[Optional[str]] = mapped_column(String, nullable=True) # e.g., "3.9.0" + + # Test matrix: [{"py": "3.11", "lib": "3.8.5", "ok": true}, ...] + tested: Mapped[Optional[list]] = mapped_column(JSONB, nullable=True) + + # Quality & Generation quality_score: Mapped[Optional[float]] = mapped_column(Float, nullable=True) - created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now()) + generated_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) + generated_by: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Model ID + issue: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) # GitHub Issue + workflow_run: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) # GitHub Actions run ID + + # Quality evaluation details + evaluator_scores: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) # Per-LLM scores + quality_feedback: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Evaluation feedback + improvements_suggested: Mapped[Optional[list]] = mapped_column(JSONB, nullable=True) # Suggested fixes + + # Version history + history: Mapped[Optional[list]] = mapped_column(JSONB, nullable=True) # Previous versions + + # System updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), onupdate=func.now()) # Relationships @@ -77,7 +112,7 @@ class Implementation(Base): library: Mapped["Library"] = relationship("Library", back_populates="implementations") # Unique constraint - __table_args__ = (UniqueConstraint("spec_id", "library_id", "variant", name="uq_implementation"),) + __table_args__ = (UniqueConstraint("spec_id", "library_id", name="uq_spec_library"),) # Seed data for libraries diff --git a/docs/architecture/repository.md b/docs/architecture/repository.md index dad8b97116..d2b09605be 100644 --- a/docs/architecture/repository.md +++ b/docs/architecture/repository.md @@ -2,13 +2,19 @@ ## Overview -pyplots follows a **mono-repository pattern** with clear separation between: -- Generic, library-agnostic specifications (Markdown) -- Library-specific implementations (Python code) -- Shared business logic and API -- Frontend application +pyplots follows a **plot-centric repository pattern** where everything for one plot type lives in a single directory: -**Key Principle**: The repository contains **only production code and final specs**. Quality reports, feedback, and workflow state are managed in GitHub Issues. +``` +plots/{spec-id}/ +├── spec.md # Description, Applications, Data, Notes +├── metadata.yaml # Tags, generation info, quality history +└── implementations/ # Library implementations + ├── matplotlib.py + ├── seaborn.py + └── ... +``` + +**Key Principle**: The repository contains **only production code and final specs**. Quality reports and workflow state are managed in GitHub Issues. Preview images are stored in GCS. --- @@ -16,326 +22,292 @@ pyplots follows a **mono-repository pattern** with clear separation between: ``` pyplots/ -├── specs/ # Generic plot specifications (Markdown) -│ ├── scatter-basic-001.md # From GitHub Issue → Markdown Spec -│ ├── heatmap-corr-002.md -│ ├── timeseries-line-003.md -│ └── bar-grouped-004.md -│ -├── rules/ # Versioned rules for code generation and quality evaluation -│ ├── README.md # Rule system documentation -│ ├── versions.yaml # Index of all rule versions -│ ├── templates/ # Templates for creating new rules -│ │ ├── generation-rules-template.md -│ │ ├── quality-criteria-template.md -│ │ └── evaluation-prompt-template.md -│ └── generation/ # Code generation rules -│ └── v1.0.0-draft/ # Initial draft version -│ ├── metadata.yaml -│ ├── code-generation-rules.md -│ ├── quality-criteria.md -│ └── self-review-checklist.md -│ -├── plots/ # Library-specific implementations (9 libraries) -│ ├── matplotlib/ # The classic standard -│ │ ├── scatter/ -│ │ │ ├── scatter-basic-001/ # Implements specs/scatter-basic-001.md -│ │ │ │ ├── default.py # Standard implementation -│ │ │ │ ├── ggplot_style.py # Style variant -│ │ │ │ └── py311.py # Python 3.11 specific (only if needed) -│ │ │ └── scatter-advanced-005/ -│ │ │ └── default.py -│ │ ├── bar/ -│ │ │ └── bar-grouped-004/ -│ │ │ └── default.py -│ │ └── heatmap/ -│ │ └── heatmap-corr-002/ -│ │ └── default.py -│ │ -│ ├── seaborn/ # Statistical visualizations -│ │ ├── scatterplot/ -│ │ │ └── scatter-basic-001/ # Same spec-id! -│ │ │ ├── default.py -│ │ │ └── darkgrid_style.py -│ │ └── heatmap/ -│ │ └── heatmap-corr-002/ -│ │ └── default.py -│ │ -│ ├── plotly/ # Interactive web plots -│ │ └── scatter/ -│ │ └── scatter-basic-001/ # Same spec-id! -│ │ └── default.py -│ │ -│ ├── bokeh/ # Interactive, large datasets -│ │ └── scatter/ -│ │ └── scatter-basic-001/ -│ │ └── default.py -│ │ -│ ├── altair/ # Declarative/Vega-Lite -│ │ └── scatter/ -│ │ └── scatter-basic-001/ -│ │ └── default.py -│ │ -│ ├── plotnine/ # ggplot2 syntax -│ │ └── scatter/ -│ │ └── scatter-basic-001/ -│ │ └── default.py +├── plots/ # Plot-centric directories +│ ├── scatter-basic/ # Everything for basic scatter plot +│ │ ├── spec.md # Library-agnostic specification +│ │ ├── metadata.yaml # Tags, generation info, quality scores +│ │ └── implementations/ # Library-specific code +│ │ ├── matplotlib.py +│ │ ├── seaborn.py +│ │ ├── plotly.py +│ │ ├── bokeh.py +│ │ ├── altair.py +│ │ ├── plotnine.py +│ │ ├── pygal.py +│ │ ├── highcharts.py +│ │ └── letsplot.py │ │ -│ ├── pygal/ # SVG charts -│ │ └── scatter/ -│ │ └── scatter-basic-001/ -│ │ └── default.py +│ ├── bar-basic/ +│ │ ├── spec.md +│ │ ├── metadata.yaml +│ │ └── implementations/ +│ │ └── ... │ │ -│ ├── highcharts/ # Interactive web charts, stock charts -│ │ └── scatter/ -│ │ └── scatter-basic-001/ -│ │ └── default.py -│ │ -│ └── letsplot/ # ggplot2 grammar by JetBrains -│ └── point/ -│ └── scatter-basic-001/ -│ └── default.py +│ └── heatmap-correlation/ +│ ├── spec.md +│ ├── metadata.yaml +│ └── implementations/ +│ └── ... +│ +├── prompts/ # AI agent prompts +│ ├── plot-generator.md # Base rules for code generation +│ ├── quality-criteria.md # Quality evaluation criteria +│ ├── quality-evaluator.md # Multi-LLM evaluation prompt +│ ├── auto-tagger.md # Automatic tagging +│ ├── spec-validator.md # Validates plot requests +│ ├── spec-id-generator.md # Assigns spec IDs +│ └── library/ # Library-specific rules +│ ├── matplotlib.md +│ ├── seaborn.md +│ └── ... │ ├── core/ # Shared business logic │ ├── __init__.py -│ ├── database.py # Database connection management │ ├── config.py # Configuration (.env-based) -│ ├── cache.py # Caching utilities -│ ├── models/ # SQLAlchemy ORM models -│ │ ├── __init__.py -│ │ ├── spec.py -│ │ ├── implementation.py -│ │ └── library.py -│ └── repositories/ # Repository pattern +│ └── database/ # Database layer │ ├── __init__.py -│ ├── base.py -│ ├── spec_repo.py -│ └── implementation_repo.py +│ ├── connection.py # Async connection management +│ ├── models.py # SQLAlchemy ORM models +│ └── repositories.py # Repository pattern │ ├── api/ # FastAPI backend │ ├── __init__.py │ ├── main.py # Application entry point -│ ├── dependencies.py # Dependency injection -│ ├── schemas.py # Pydantic models -│ └── routers/ -│ ├── __init__.py -│ ├── plots.py # Plot endpoints -│ ├── specs.py # Spec endpoints -│ └── data.py # Data upload +│ └── Dockerfile # Cloud Run deployment │ ├── app/ # React frontend │ ├── src/ │ │ ├── components/ │ │ ├── pages/ │ │ └── lib/ -│ ├── public/ │ ├── package.json -│ └── next.config.js +│ └── Dockerfile │ -├── automation/ # AI tools for code generation -│ ├── __init__.py -│ ├── generators/ -│ │ ├── __init__.py -│ │ ├── claude_generator.py # Code generation -│ │ └── quality_checker.py # Multi-LLM quality check +├── automation/ # Automation scripts │ └── scripts/ -│ ├── create_spec_from_issue.py -│ └── update_plots.py +│ └── sync_to_postgres.py # Sync plots/ to database │ ├── tests/ # Test suite -│ ├── unit/ -│ │ ├── core/ -│ │ │ └── test_repositories.py -│ │ ├── api/ -│ │ │ └── test_routers.py -│ │ └── plots/ -│ │ ├── matplotlib/ -│ │ │ └── test_scatter_basic_001.py -│ │ └── seaborn/ -│ │ └── test_scatter_basic_001.py -│ └── integration/ -│ └── test_plot_pipeline.py +│ └── unit/ +│ ├── api/ +│ ├── core/ +│ ├── prompts/ +│ └── workflows/ │ ├── .github/ │ └── workflows/ # GitHub Actions CI/CD -│ ├── spec-to-code.yml -│ ├── test-and-preview.yml -│ ├── quality-check.yml -│ └── deploy.yml +│ ├── gen-create-spec.yml # Creates feature branch + spec +│ ├── gen-new-plot.yml # Orchestrator for parallel generation +│ ├── gen-library-impl.yml # Per-library implementation +│ ├── ci-plottest.yml # Multi-Python testing +│ ├── gen-preview.yml # Preview image generation +│ ├── bot-ai-review.yml # AI quality evaluation +│ ├── bot-auto-merge.yml # Auto-merge approved PRs +│ ├── sync-postgres.yml # Sync to database on push +│ └── ... +│ +├── alembic/ # Database migrations +│ └── versions/ │ ├── docs/ # Documentation -│ ├── vision.md -│ ├── workflow.md │ ├── architecture/ -│ ├── development.md -│ └── deployment.md -│ -├── scripts/ # Utility scripts -│ ├── init_database.py -│ └── migrate.py +│ ├── workflow.md +│ ├── specs-guide.md +│ └── development.md │ ├── pyproject.toml # Python project config (uv) ├── uv.lock # Dependency lock file ├── .env.example # Environment variables template -├── .gitignore -├── README.md -└── Dockerfile # For Cloud Run deployment +├── CLAUDE.md # AI assistant instructions +└── README.md ``` --- ## Key Directories Explained -### `specs/` +### `plots/{spec-id}/` -**Purpose**: Library-agnostic plot specifications in Markdown format +**Purpose**: Plot-centric directories containing everything for one plot type + +**Structure**: +``` +plots/{spec-id}/ +├── spec.md # Library-agnostic specification +├── metadata.yaml # Tags, generation info, quality history +└── implementations/ # Library-specific implementations + ├── matplotlib.py + ├── seaborn.py + └── ... +``` **Characteristics**: -- ✅ Created from approved GitHub Issues -- ✅ Markdown format (human and AI readable) -- ✅ Generic descriptions (no library-specific code) -- ✅ Versioned in git -- ❌ NO code implementations -- ❌ NO quality reports (those are in Issues) +- ✅ Self-contained (spec + metadata + code together) +- ✅ Easy to navigate (one folder = one plot type) +- ✅ Synced to PostgreSQL via `sync-postgres.yml` +- ❌ NO preview images (stored in GCS) +- ❌ NO quality reports (stored in GitHub Issues) -**Naming**: `{type}-{variant}-{number}.md` -- Examples: `scatter-basic-001.md`, `heatmap-corr-002.md` +**Example**: `plots/scatter-basic/` contains everything for the basic scatter plot. --- -### `rules/` +### `plots/{spec-id}/spec.md` -**Purpose**: Versioned rules for AI code generation and quality evaluation +**Purpose**: Library-agnostic plot specification -**Characteristics**: -- ✅ Markdown format (human and LLM readable) -- ✅ Semantic versioning (v1.0.0, v2.0.0, etc.) -- ✅ Separate generation and evaluation rules -- ✅ Templates for creating new versions -- ✅ Git-versioned for full audit trail - -**Structure**: `rules/{type}/{version}/{files}.md` -- `type`: generation or evaluation -- `version`: Semantic version (v1.0.0) -- `files`: Rule Markdown files + metadata.yaml - -**Versions**: -- **draft**: Work in progress (e.g., v1.0.0-draft) -- **active**: Production version -- **deprecated**: Superseded by newer version -- **archived**: Historical record - -**Why Versioned Rules?** -- **Test improvements**: A/B test rule versions before deploying -- **Rollback capability**: Return to previous version if issues arise -- **Audit trail**: Know which rules generated which plots -- **Scientific improvement**: Prove new rules are better - -**See Also**: [Rule Versioning Architecture](./rule-versioning.md) +**Contents**: +- Title and description +- Data requirements (columns, types) +- Use cases with domain context +- Visual requirements ---- +**Naming**: Always `spec.md` (consistent across all plots) -### `plots/` +--- -**Purpose**: Library-specific implementations organized by library and plot type +### `plots/{spec-id}/metadata.yaml` + +**Purpose**: Structured metadata synced to PostgreSQL + +**Contents**: +```yaml +spec_id: scatter-basic +title: Basic Scatter Plot + +tags: + plot_type: [scatter, point] + domain: [statistics, general] + features: [basic, 2d] + audience: [beginner] + data_type: [numeric] + +implementations: + matplotlib: + preview_url: https://storage.googleapis.com/pyplots-images/plots/scatter-basic/matplotlib/latest.png + current: + version: 2 + date: 2025-01-15T10:30:00Z + issue: 53 + generated_by: claude-opus-4-5-20251101 + quality_score: 92 + history: + - version: 0 + date: 2025-01-10T08:00:00Z + issue: 42 + generated_by: claude-sonnet-4-20250514 + quality_score: 65 +``` -**Structure**: `plots/{library}/{plot_type}/{spec_id}/{variant}.py` -- `library`: matplotlib, seaborn, plotly, etc. -- `plot_type`: scatter, bar, heatmap, etc. -- `spec_id`: References spec file (e.g., scatter-basic-001) -- `variant`: default, style name, or Python version +**Key Points**: +- Tags are at spec level (same for all libraries) +- Version numbers match GCS history files (`v0.png`, `v1.png`, etc.) +- Each version tracks date, issue, model, and quality score -**File Types**: -- `default.py` - Standard implementation (required) -- `{style}_style.py` - Style variants (e.g., `ggplot_style.py`, `darkgrid_style.py`) -- `py{version}.py` - Version-specific (only when necessary, e.g., `py310.py`) +### GCS Storage -**Important**: -- ❌ NO `preview.png` files (stored in GCS) -- ❌ NO `quality_report.json` (stored in GitHub Issues) -- ✅ Only Python code +Preview images are stored in Google Cloud Storage (not in repo): -**Cross-Library Linking**: Same `spec_id` across all 8 supported libraries ``` -matplotlib/scatter/scatter-basic-001/default.py -seaborn/scatterplot/scatter-basic-001/default.py -plotly/scatter/scatter-basic-001/default.py -bokeh/scatter/scatter-basic-001/default.py -altair/scatter/scatter-basic-001/default.py -plotnine/scatter/scatter-basic-001/default.py -pygal/scatter/scatter-basic-001/default.py -highcharts/scatter/scatter-basic-001/default.py -letsplot/point/scatter-basic-001/default.py +gs://pyplots-images/ +├── plots/{spec-id}/{library}/ # Live (after merge) +│ ├── latest.png, latest_thumb.png +│ ├── latest.html # Optional (interactive) +│ └── history/v0.png, v1.png, ... +│ +└── staging/{spec-id}/{library}/ # Temp (review) + └── preview.png, preview.html ``` -All implement the same spec: `specs/scatter-basic-001.md` + +**Interactive libraries** (`.html`): plotly, bokeh, altair, highcharts, pygal, letsplot --- -### `core/` +### `plots/{spec-id}/implementations/` -**Purpose**: Shared business logic used by API and automation +**Purpose**: Library-specific Python implementations -**Key Files**: -- `database.py` - Database connection, async session management -- `config.py` - Environment variables, settings -- `cache.py` - Caching layer (if needed) +**File Naming**: `{library}.py` +- `matplotlib.py` +- `seaborn.py` +- `plotly.py` +- `bokeh.py` +- `altair.py` +- `plotnine.py` +- `pygal.py` +- `highcharts.py` +- `letsplot.py` -**Subdirectories**: -- `models/` - SQLAlchemy ORM models (database tables) -- `repositories/` - Repository pattern for data access +**Code Style** (KISS): +```python +""" +scatter-basic: Basic Scatter Plot +Library: matplotlib +""" + +import matplotlib.pyplot as plt +import numpy as np + +# Data +np.random.seed(42) +x = np.random.randn(100) +y = x * 0.8 + np.random.randn(100) * 0.5 + +# Plot +fig, ax = plt.subplots(figsize=(16, 9)) +ax.scatter(x, y, alpha=0.7, s=50) +ax.set_title('Basic Scatter Plot') + +plt.tight_layout() +plt.savefig('plot.png', dpi=300) +``` -**Design Pattern**: Repository pattern separates data access from business logic +**Rules**: +- No functions, no classes +- No `if __name__ == '__main__':` +- Just: imports → data → plot → save --- -### `api/` +### `prompts/` -**Purpose**: FastAPI REST API serving frontend and automation +**Purpose**: AI agent prompts for code generation and quality evaluation -**Key Files**: -- `main.py` - FastAPI app initialization, CORS, middleware -- `dependencies.py` - Dependency injection (DB sessions, auth) -- `schemas.py` - Pydantic models for request/response validation +**Subdirectories**: +- `templates/` - Templates for new specs (`spec.md`, `metadata.yaml`) -**Routers**: -- `plots.py` - Plot-related endpoints -- `specs.py` - Spec-related endpoints -- `data.py` - User data upload and plot generation +**Files**: +- `plot-generator.md` - Base rules for all implementations +- `quality-criteria.md` - Definition of quality +- `quality-evaluator.md` - Multi-LLM evaluation +- `auto-tagger.md` - Automatic tagging +- `library/*.md` - Library-specific rules (9 files) --- -### `app/` +### `core/` -**Purpose**: Next.js frontend application +**Purpose**: Shared business logic used by API -**Structure**: Standard Next.js 14 App Router structure -- `src/components/` - Reusable React components -- `src/pages/` - Page components -- `src/lib/` - Utilities and API client +**Key Components**: +- `database/connection.py` - Async database connection +- `database/models.py` - SQLAlchemy ORM models +- `database/repositories.py` - Repository pattern for data access --- -### `automation/` +### `api/` -**Purpose**: AI-powered code generation and quality checking +**Purpose**: FastAPI REST API **Key Files**: -- `generators/claude_generator.py` - Generates plot code from specs -- `generators/quality_checker.py` - Multi-LLM quality evaluation - -**Usage**: Called by GitHub Actions, not part of production API +- `main.py` - FastAPI app with all endpoints --- -### `tests/` - -**Purpose**: Comprehensive test suite (target: 90%+ coverage) - -**Structure**: -- `unit/` - Unit tests for individual components -- `integration/` - End-to-end workflow tests +### `app/` -**Naming**: `test_{module_name}.py` +**Purpose**: React frontend (Vite + TypeScript + MUI) --- @@ -344,12 +316,13 @@ All implement the same spec: `specs/scatter-basic-001.md` **Purpose**: CI/CD automation via GitHub Actions **Key Workflows**: -- `spec-to-code.yml` - Generate code from approved issues -- `test-and-preview.yml` - Run tests and create previews -- `quality-check.yml` - Multi-LLM quality evaluation -- `deploy.yml` - Deploy to Cloud Run - -See [automation-workflows.md](./automation-workflows.md) for details. +- `gen-create-spec.yml` - Creates feature branch and spec file +- `gen-new-plot.yml` - Orchestrates parallel library generation +- `gen-library-impl.yml` - Generates one library implementation +- `ci-plottest.yml` - Multi-Python version testing +- `gen-preview.yml` - Generates preview images +- `bot-ai-review.yml` - AI quality evaluation +- `sync-postgres.yml` - Syncs plots/ to database --- @@ -357,132 +330,56 @@ See [automation-workflows.md](./automation-workflows.md) for details. ### Spec IDs -Format: `{type}-{variant}-{number}` +Format: `{type}-{variant}` or `{type}-{variant}-{modifier}` **Examples**: -- `scatter-basic-001` - Basic scatter plot -- `scatter-advanced-005` - Advanced scatter with multiple features -- `heatmap-corr-002` - Correlation heatmap -- `bar-grouped-004` - Grouped bar chart -- `timeseries-line-003` - Time series line plot +- `scatter-basic` - Basic scatter plot +- `scatter-color-groups` - Scatter with color-coded groups +- `bar-grouped-horizontal` - Horizontal grouped bars +- `heatmap-correlation` - Correlation matrix heatmap **Rules**: - All lowercase -- Words separated by hyphens -- Three-digit number suffix (001, 002, etc.) -- Unique across all specs - -### File Names - -**Specs**: `{spec-id}.md` -- Example: `scatter-basic-001.md` - -**Implementations**: -- Default: `default.py` -- Styles: `{style}_style.py` (e.g., `ggplot_style.py`) -- Version-specific: `py{version}.py` (e.g., `py310.py`, `py311.py`) - -**Why version-specific files?** -Only create when necessary: -- Breaking changes between Python versions -- Library compatibility issues -- Syntax differences - -Prefer: Single `default.py` that works across all versions (3.10-3.13) - ---- - -## Code Organization Principles - -### 1. Separation of Concerns - -``` -Specs (What) → plots/ (How) → tests/ (Verification) -Generic description Library-specific code Ensure correctness -``` - -### 2. DRY (Don't Repeat Yourself) - -Shared logic goes in `core/`: -```python -# ✅ Good -from core.repositories import SpecRepository - -# ❌ Bad -# Duplicate database queries in multiple routers -``` - -### 3. Dependency Flow - -``` -Frontend (app/) → API (api/) → Core (core/) → Database - ↓ - Plots (plots/) -``` +- Hyphens as separators +- Descriptive names (no numbers needed) -### 4. Testing Parallel to Code +### Implementation Files -``` -plots/matplotlib/scatter/scatter-basic-001/default.py -tests/unit/plots/matplotlib/test_scatter_basic_001.py -``` +Always named by library: `{library}.py` +- `matplotlib.py`, `seaborn.py`, `plotly.py`, etc. --- ## What's NOT in the Repository ### ❌ Preview Images -- **Where**: Google Cloud Storage (`gs://pyplots-images/previews/...`) +- **Where**: Google Cloud Storage (`gs://pyplots-images/plots/...`) - **Why**: Binary files bloat git history ### ❌ Quality Reports - **Where**: GitHub Issues (as bot comments) - **Why**: Keeps repo clean, increases transparency -### ❌ User Data -- **Where**: Processed in-memory, temporary files auto-deleted -- **Why**: Privacy and security - ### ❌ Secrets - **Where**: Environment variables, Cloud Secret Manager - **Why**: Security - **Note**: `.env.example` shows required variables without values -### ❌ n8n Workflows -- **Where**: n8n cloud/self-hosted instance -- **Why**: Visual workflows, not code-based -- **Note**: Can export JSON if needed for backup - ---- - -## File Size Guidelines - -### Specs -- Target: < 5 KB (readable Markdown) -- If larger: Consider splitting into multiple specs - -### Implementation Files -- Target: < 500 lines per file -- If larger: Refactor into helper functions in `core/` - -### Tests -- One test file per implementation -- Target: 100% coverage of plot generation logic - --- -## Migration from Old Structure +## Database Sync -If you have existing plots in a different structure: +The `sync-postgres.yml` workflow syncs `plots/` to PostgreSQL on push to main: -**Old**: `plots/scatter_basic.py` -**New**: `plots/matplotlib/scatter/scatter-basic-001/default.py` +**What's Synced**: +- Spec content (full markdown from spec.md) +- Spec metadata (title, description, tags) +- Implementation code (full Python source) +- Implementation metadata (quality score, generation info) +- Preview URLs from metadata.yaml -Run migration script: -```bash -python scripts/migrate_old_structure.py -``` +**Source of Truth**: The `plots/` directory is authoritative. Database is derived. --- -*For implementation details and code examples, see [specs-guide.md](./specs-guide.md) and [development.md](../development.md)* +*For implementation details, see [specs-guide.md](../specs-guide.md) and [development.md](../development.md)* diff --git a/docs/development.md b/docs/development.md index 60648a0d20..232beff2b8 100644 --- a/docs/development.md +++ b/docs/development.md @@ -214,14 +214,14 @@ See [CLAUDE.md](../CLAUDE.md) for: **Option 1: GitHub Issue (Recommended)** 1. Create issue using spec template -2. Fill in description, requirements, use cases +2. Fill in description, applications, data requirements 3. Add label `plot-idea` 4. Wait for review and approval 5. AI generates implementations automatically **Option 2: Pull Request (Advanced)** -1. Create spec file: `specs/{spec-id}.md` +1. Create spec directory: `plots/{spec-id}/` with spec.md 2. Implement for at least one library 3. Add tests 4. Create PR with previews @@ -267,8 +267,8 @@ Closes #123 See [CLAUDE.md](../CLAUDE.md) for: - Directory structure -- Implementation file naming (`plots/{library}/{plot_type}/{spec_id}/{variant}.py`) -- Test file naming (`tests/unit/plots/{library}/test_{spec_id}.py`) +- Implementation file naming (`plots/{spec-id}/implementations/{library}.py`) +- Test file naming (`tests/unit/plots/test_{spec_id}.py`) --- @@ -282,15 +282,10 @@ See [CLAUDE.md](../CLAUDE.md) for: ### Update an Existing Implementation 1. Create GitHub issue referencing original -2. Update implementation file -3. Run tests: `pytest tests/unit/plots/{library}/test_{spec_id}.py` +2. Update implementation file in `plots/{spec-id}/implementations/{library}.py` +3. Run tests: `uv run pytest tests/unit/plots/test_{spec_id}.py` 4. Create PR → Quality check runs automatically -### Add a Style Variant -1. Create new file: `plots/{library}/{plot_type}/{spec_id}/{style}_style.py` -2. Add test -3. Add to database - --- ## Debugging Tips @@ -320,7 +315,7 @@ uv sync --reinstall ```python # Run implementation standalone -python plots/matplotlib/scatter/scatter_basic_001/default.py +python plots/scatter-basic/implementations/matplotlib.py # Add debug prints print(f"Data shape: {data.shape}") @@ -354,7 +349,7 @@ pytest --pdb ### Q: How do I test plot generation locally? -**A**: Run implementation file directly: `python plots/matplotlib/scatter/scatter_basic_001/default.py` +**A**: Run implementation file directly: `python plots/scatter-basic/implementations/matplotlib.py` ### Q: Do I need to implement for all libraries? diff --git a/docs/plot-types-catalog.md b/docs/plot-types-catalog.md index 52bb89d4da..f0b8a2ac40 100644 --- a/docs/plot-types-catalog.md +++ b/docs/plot-types-catalog.md @@ -1154,7 +1154,7 @@ Each plot type in this catalog can become a spec file. Priority order suggestion 5. **SVG/Minimal** (svg-line, svg-radar, svg-worldmap) 6. **Scientific** (spectrum-plot, survival-curve, forest-plot) -To create a spec, use the format: `specs/{spec-id}.md` +To create a spec, use the format: `plots/{spec-id}/spec.md` --- diff --git a/docs/specs-guide.md b/docs/specs-guide.md index a2936f81de..36f0d0b77e 100644 --- a/docs/specs-guide.md +++ b/docs/specs-guide.md @@ -2,12 +2,24 @@ ## Overview -Plot specifications are **library-agnostic descriptions** of what a plot should show. They live in `specs/` as Markdown files. +Plot specifications are **library-agnostic descriptions** of what a plot should show. They live in `plots/{spec-id}/spec.md`. **Key Principle**: A spec describes **WHAT** to visualize, not **HOW** to implement it. --- +## File Location + +Each spec lives in its own directory: +``` +plots/{spec-id}/ +├── spec.md ← Specification file +├── metadata.yaml ← Tags, generation info +└── implementations/ ← Library code +``` + +--- + ## Spec Format ```markdown @@ -17,30 +29,22 @@ Plot specifications are **library-agnostic descriptions** of what a plot should {2-4 sentences: What does this plot show? When should you use it?} -## Data +## Applications -**Required columns:** -- `{column}` (numeric) - {what it represents} -- `{column}` (categorical) - {what it represents} - -**Example:** -```python -import pandas as pd -data = pd.DataFrame({ - 'x': [1, 2, 3, 4, 5], - 'y': [2, 4, 3, 5, 4] -}) -``` +- {Realistic scenario 1 with domain context} +- {Realistic scenario 2} +- {Realistic scenario 3} -## Tags +## Data -{type}, {purpose}, {complexity} +- `{column}` ({type}) - {what it represents} +- `{column}` ({type}) - {what it represents} +- Size: {recommended data size} +- Example: {dataset reference or description} -## Use Cases +## Notes -- {Realistic scenario 1 with domain context} -- {Realistic scenario 2} -- {Realistic scenario 3} +- {Optional implementation hints or special requirements} ``` --- @@ -53,23 +57,22 @@ Format: `# {spec-id}: {Human-Readable Title}` Example: `# scatter-basic: Basic Scatter Plot` ### Description -2-4 sentences explaining: +2-4 sentences (prose text) explaining: - What the plot visualizes - When to use it - What makes it useful -### Data -- **Required columns** with types (numeric, categorical, datetime) -- **Example** (optional): Inline data, dataset reference, or omit for AI to generate +### Applications +3-4 realistic scenarios with domain context (finance, science, marketing, etc.) -### Tags -Comma-separated keywords for discovery: -- Type: scatter, bar, line, pie, histogram, box, area -- Purpose: comparison, distribution, correlation, trend -- Complexity: basic, intermediate, advanced +### Data +Simple list format: +- Required columns with types (numeric, categorical, datetime) +- Recommended data size +- Example dataset reference -### Use Cases -3-4 realistic scenarios with domain context (finance, science, marketing, etc.) +### Notes (Optional) +Implementation hints, visual preferences, or special requirements. --- @@ -94,7 +97,7 @@ Rules: 1. **User creates GitHub Issue** with plot idea 2. **Bot assigns spec ID** and validates request 3. **Maintainer adds `approved` label** -4. **AI generates spec file** in `specs/` +4. **AI generates spec file** in `plots/{spec-id}/spec.md` 5. **AI generates implementations** for all 9 libraries 6. **Quality check** runs automatically 7. **Auto-merge** if quality passes @@ -105,7 +108,7 @@ Rules: ### DO - Be specific about data requirements -- Use realistic use cases with domain context +- Use realistic applications with domain context - Keep description concise (2-4 sentences) ### DON'T @@ -115,4 +118,4 @@ Rules: --- -*See `specs/.template.md` for the full template.* +*See `prompts/templates/spec.md` for the full template.* diff --git a/docs/workflow.md b/docs/workflow.md index 0b3a5d6606..fc058cdb24 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -93,8 +93,9 @@ Approved issue triggers **feature branch creation**: 1. **Spec Creator** (`gen-create-spec.yml`) runs when `approved` label is added: - Creates feature branch: `plot/{spec-id}` - - Claude generates specification file: `specs/{spec-id}.md` - - Commits spec to feature branch + - Claude generates specification file: `plots/{spec-id}/spec.md` + - Creates metadata file: `plots/{spec-id}/metadata.yaml` + - Commits to feature branch - Dispatches code generation workflow ``` @@ -102,7 +103,8 @@ Main Issue (#53) + [approved] label ↓ gen-create-spec.yml ├─ Creates branch: plot/scatter-basic - ├─ Creates: specs/scatter-basic.md + ├─ Creates: plots/scatter-basic/spec.md + ├─ Creates: plots/scatter-basic/metadata.yaml └─ Dispatches: gen-new-plot.yml ``` @@ -194,7 +196,7 @@ Each plot request spawns **9 parallel sub-issues** (one per library), enabling: ```mermaid graph LR A[Main Issue
plot-request + approved] --> A1[Create Feature Branch
plot/spec-id] - A1 --> A2[Generate Spec
specs/spec-id.md] + A1 --> A2[Generate Spec
plots/spec-id/spec.md] A2 --> B[Orchestrator] B --> C1[Sub-Issue
matplotlib] B --> C2[Sub-Issue
seaborn] @@ -243,7 +245,7 @@ Each attempt is documented in the sub-issue with: ### Status - **PR:** #123 -- **File:** `plots/seaborn/heatmap/heatmap-correlation/default.py` +- **File:** `plots/heatmap-correlation/implementations/seaborn.py` - **Workflow:** [link] ``` @@ -276,7 +278,7 @@ graph TD C -->|Rejected| Z[End] D0 -->|Create plot/spec-id branch| D0a[Generate Spec File] - D0a -->|specs/spec-id.md| D[Flow 3: Parallel Generation] + D0a -->|plots/spec-id/spec.md| D[Flow 3: Parallel Generation] D -->|Create 8 Sub-Issues| D1[Orchestrator] D1 --> D2[8 Parallel Jobs] diff --git a/plots/altair/area/area-basic/default.py b/plots/area-basic/implementations/altair.py similarity index 100% rename from plots/altair/area/area-basic/default.py rename to plots/area-basic/implementations/altair.py diff --git a/plots/bokeh/varea/area-basic/default.py b/plots/area-basic/implementations/bokeh.py similarity index 100% rename from plots/bokeh/varea/area-basic/default.py rename to plots/area-basic/implementations/bokeh.py diff --git a/plots/highcharts/area/area-basic/default.py b/plots/area-basic/implementations/highcharts.py similarity index 100% rename from plots/highcharts/area/area-basic/default.py rename to plots/area-basic/implementations/highcharts.py diff --git a/plots/letsplot/area/area-basic/default.py b/plots/area-basic/implementations/letsplot.py similarity index 100% rename from plots/letsplot/area/area-basic/default.py rename to plots/area-basic/implementations/letsplot.py diff --git a/plots/matplotlib/fill_between/area-basic/default.py b/plots/area-basic/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/fill_between/area-basic/default.py rename to plots/area-basic/implementations/matplotlib.py diff --git a/plots/plotly/scatter/area-basic/default.py b/plots/area-basic/implementations/plotly.py similarity index 100% rename from plots/plotly/scatter/area-basic/default.py rename to plots/area-basic/implementations/plotly.py diff --git a/plots/plotnine/area/area-basic/default.py b/plots/area-basic/implementations/plotnine.py similarity index 100% rename from plots/plotnine/area/area-basic/default.py rename to plots/area-basic/implementations/plotnine.py diff --git a/plots/pygal/area/area-basic/default.py b/plots/area-basic/implementations/pygal.py similarity index 100% rename from plots/pygal/area/area-basic/default.py rename to plots/area-basic/implementations/pygal.py diff --git a/plots/seaborn/fill_between/area-basic/default.py b/plots/area-basic/implementations/seaborn.py similarity index 100% rename from plots/seaborn/fill_between/area-basic/default.py rename to plots/area-basic/implementations/seaborn.py diff --git a/plots/area-basic/metadata.yaml b/plots/area-basic/metadata.yaml new file mode 100644 index 0000000000..f30a078070 --- /dev/null +++ b/plots/area-basic/metadata.yaml @@ -0,0 +1,66 @@ +# Metadata for area-basic +# Auto-synced to PostgreSQL on push to main + +spec_id: area-basic +title: Basic Area Chart + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - area + - filled + domain: + - statistics + - general + - timeseries + features: + - basic + - continuous + - cumulative + audience: + - beginner + data_type: + - numeric + - temporal +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/area-basic.md b/plots/area-basic/spec.md similarity index 69% rename from specs/area-basic.md rename to plots/area-basic/spec.md index b89910dc82..dc991bbbbc 100644 --- a/specs/area-basic.md +++ b/plots/area-basic/spec.md @@ -4,28 +4,16 @@ An area chart displaying a single data series as a filled region under a line. The filled area emphasizes magnitude and cumulative values over a sequence, making trends more visually impactful than simple line plots. Best suited for time series data where you want to highlight volume or accumulated quantities. -## Data - -**Required columns:** -- `x` (numeric/datetime) - sequential values for the horizontal axis (typically time) -- `y` (numeric) - values for the vertical axis representing magnitude - -**Example:** -```python -import pandas as pd -data = pd.DataFrame({ - 'month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], - 'sales': [120, 135, 148, 162, 175, 195, 210, 198, 185, 170, 158, 190] -}) -``` - -## Tags - -area, trend, timeseries, basic, 2d - -## Use Cases +## Applications - Visualizing website traffic volume over time - Showing cumulative sales or revenue trends across quarters - Tracking system resource usage (CPU, memory) over time - Displaying temperature variations throughout a day + +## Data + +- `x` (numeric/datetime) - sequential values for the horizontal axis (typically time) +- `y` (numeric) - values for the vertical axis representing magnitude +- Size: 12-50 points +- Example: monthly sales data over a year diff --git a/plots/altair/bar/bar-basic/default.py b/plots/bar-basic/implementations/altair.py similarity index 100% rename from plots/altair/bar/bar-basic/default.py rename to plots/bar-basic/implementations/altair.py diff --git a/plots/bokeh/vbar/bar-basic/default.py b/plots/bar-basic/implementations/bokeh.py similarity index 100% rename from plots/bokeh/vbar/bar-basic/default.py rename to plots/bar-basic/implementations/bokeh.py diff --git a/plots/highcharts/bar/bar-basic/default.py b/plots/bar-basic/implementations/highcharts.py similarity index 100% rename from plots/highcharts/bar/bar-basic/default.py rename to plots/bar-basic/implementations/highcharts.py diff --git a/plots/letsplot/bar/bar-basic/default.py b/plots/bar-basic/implementations/letsplot.py similarity index 100% rename from plots/letsplot/bar/bar-basic/default.py rename to plots/bar-basic/implementations/letsplot.py diff --git a/plots/matplotlib/bar/bar-basic/default.py b/plots/bar-basic/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/bar/bar-basic/default.py rename to plots/bar-basic/implementations/matplotlib.py diff --git a/plots/plotly/bar/bar-basic/default.py b/plots/bar-basic/implementations/plotly.py similarity index 100% rename from plots/plotly/bar/bar-basic/default.py rename to plots/bar-basic/implementations/plotly.py diff --git a/plots/plotnine/bar/bar-basic/default.py b/plots/bar-basic/implementations/plotnine.py similarity index 100% rename from plots/plotnine/bar/bar-basic/default.py rename to plots/bar-basic/implementations/plotnine.py diff --git a/plots/pygal/bar/bar-basic/default.py b/plots/bar-basic/implementations/pygal.py similarity index 100% rename from plots/pygal/bar/bar-basic/default.py rename to plots/bar-basic/implementations/pygal.py diff --git a/plots/seaborn/barplot/bar-basic/default.py b/plots/bar-basic/implementations/seaborn.py similarity index 100% rename from plots/seaborn/barplot/bar-basic/default.py rename to plots/bar-basic/implementations/seaborn.py diff --git a/plots/bar-basic/metadata.yaml b/plots/bar-basic/metadata.yaml new file mode 100644 index 0000000000..d03c54cb81 --- /dev/null +++ b/plots/bar-basic/metadata.yaml @@ -0,0 +1,66 @@ +# Metadata for bar-basic +# Auto-synced to PostgreSQL on push to main + +spec_id: bar-basic +title: Basic Bar Chart + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - bar + - column + domain: + - statistics + - general + - business + features: + - basic + - categorical + - comparison + audience: + - beginner + data_type: + - categorical + - numeric +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/bar-basic.md b/plots/bar-basic/spec.md similarity index 72% rename from specs/bar-basic.md rename to plots/bar-basic/spec.md index 0d620a161b..7ee2aef1a2 100644 --- a/specs/bar-basic.md +++ b/plots/bar-basic/spec.md @@ -4,28 +4,16 @@ A fundamental vertical bar chart visualizing categorical data with numeric values. Each category is represented by a rectangular bar with height proportional to its value, making it ideal for comparing quantities across discrete groups. One of the most widely used chart types for categorical comparisons. -## Data - -**Required columns:** -- `category` (categorical) - labels for each bar on the x-axis -- `value` (numeric) - values determining bar heights - -**Example:** -```python -import pandas as pd -data = pd.DataFrame({ - 'category': ['Product A', 'Product B', 'Product C', 'Product D', 'Product E'], - 'value': [45, 78, 52, 91, 63] -}) -``` - -## Tags - -bar, comparison, categorical, basic, 1d - -## Use Cases +## Applications - Comparing sales figures across different product categories - Displaying survey response counts for multiple-choice questions - Showing population counts across different regions - Visualizing budget allocation across departments + +## Data + +- `category` (categorical) - labels for each bar on the x-axis +- `value` (numeric) - values determining bar heights +- Size: 3-15 categories +- Example: product sales or survey results diff --git a/plots/altair/boxplot/box-basic/default.py b/plots/box-basic/implementations/altair.py similarity index 100% rename from plots/altair/boxplot/box-basic/default.py rename to plots/box-basic/implementations/altair.py diff --git a/plots/bokeh/custom/box-basic/default.py b/plots/box-basic/implementations/bokeh.py similarity index 100% rename from plots/bokeh/custom/box-basic/default.py rename to plots/box-basic/implementations/bokeh.py diff --git a/plots/highcharts/boxplot/box-basic/default.py b/plots/box-basic/implementations/highcharts.py similarity index 100% rename from plots/highcharts/boxplot/box-basic/default.py rename to plots/box-basic/implementations/highcharts.py diff --git a/plots/letsplot/boxplot/box-basic/default.py b/plots/box-basic/implementations/letsplot.py similarity index 100% rename from plots/letsplot/boxplot/box-basic/default.py rename to plots/box-basic/implementations/letsplot.py diff --git a/plots/matplotlib/boxplot/box-basic/default.py b/plots/box-basic/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/boxplot/box-basic/default.py rename to plots/box-basic/implementations/matplotlib.py diff --git a/plots/plotly/box/box-basic/default.py b/plots/box-basic/implementations/plotly.py similarity index 100% rename from plots/plotly/box/box-basic/default.py rename to plots/box-basic/implementations/plotly.py diff --git a/plots/plotnine/boxplot/box-basic/default.py b/plots/box-basic/implementations/plotnine.py similarity index 100% rename from plots/plotnine/boxplot/box-basic/default.py rename to plots/box-basic/implementations/plotnine.py diff --git a/plots/pygal/box/box-basic/default.py b/plots/box-basic/implementations/pygal.py similarity index 100% rename from plots/pygal/box/box-basic/default.py rename to plots/box-basic/implementations/pygal.py diff --git a/plots/seaborn/boxplot/box-basic/default.py b/plots/box-basic/implementations/seaborn.py similarity index 100% rename from plots/seaborn/boxplot/box-basic/default.py rename to plots/box-basic/implementations/seaborn.py diff --git a/plots/box-basic/metadata.yaml b/plots/box-basic/metadata.yaml new file mode 100644 index 0000000000..071cadd952 --- /dev/null +++ b/plots/box-basic/metadata.yaml @@ -0,0 +1,68 @@ +# Metadata for box-basic +# Auto-synced to PostgreSQL on push to main + +spec_id: box-basic +title: Basic Box Plot + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - box + - boxplot + - whisker + domain: + - statistics + - general + features: + - basic + - distribution + - quartiles + - outliers + audience: + - beginner + - intermediate + data_type: + - numeric + - categorical +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/box-basic.md b/plots/box-basic/spec.md similarity index 61% rename from specs/box-basic.md rename to plots/box-basic/spec.md index aed0dc9ad7..ba6ec886df 100644 --- a/specs/box-basic.md +++ b/plots/box-basic/spec.md @@ -4,35 +4,16 @@ A box-and-whisker plot comparing statistical distributions across multiple groups. Each box displays quartiles (Q1, median, Q3), whiskers extend to show the data range within 1.5×IQR, and outliers appear as individual points. Excellent for comparing distributions and identifying outliers without assuming normality. -## Data - -**Required columns:** -- `group` (categorical) - category labels for each box -- `value` (numeric) - values for distribution analysis - -**Example:** -```python -import pandas as pd -import numpy as np -np.random.seed(42) -data = pd.DataFrame({ - 'group': ['A']*50 + ['B']*50 + ['C']*50 + ['D']*50, - 'value': np.concatenate([ - np.random.normal(50, 10, 50), - np.random.normal(60, 15, 50), - np.random.normal(45, 8, 50), - np.random.normal(70, 20, 50) - ]) -}) -``` - -## Tags - -box, distribution, comparison, statistical, categorical - -## Use Cases +## Applications - Comparing performance metrics across different teams or departments - Analyzing price distributions across product categories - Examining test scores across different classes or schools - Visualizing response times across server regions + +## Data + +- `group` (categorical) - category labels for each box +- `value` (numeric) - values for distribution analysis +- Size: 3-8 groups, 30-100 values per group +- Example: performance data grouped by team or region diff --git a/plots/altair/bar/histogram-basic/default.py b/plots/histogram-basic/implementations/altair.py similarity index 100% rename from plots/altair/bar/histogram-basic/default.py rename to plots/histogram-basic/implementations/altair.py diff --git a/plots/bokeh/quad/histogram-basic/default.py b/plots/histogram-basic/implementations/bokeh.py similarity index 100% rename from plots/bokeh/quad/histogram-basic/default.py rename to plots/histogram-basic/implementations/bokeh.py diff --git a/plots/highcharts/histogram/histogram-basic/default.py b/plots/histogram-basic/implementations/highcharts.py similarity index 100% rename from plots/highcharts/histogram/histogram-basic/default.py rename to plots/histogram-basic/implementations/highcharts.py diff --git a/plots/letsplot/histogram/histogram-basic/default.py b/plots/histogram-basic/implementations/letsplot.py similarity index 100% rename from plots/letsplot/histogram/histogram-basic/default.py rename to plots/histogram-basic/implementations/letsplot.py diff --git a/plots/matplotlib/hist/histogram-basic/default.py b/plots/histogram-basic/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/hist/histogram-basic/default.py rename to plots/histogram-basic/implementations/matplotlib.py diff --git a/plots/plotly/histogram/histogram-basic/default.py b/plots/histogram-basic/implementations/plotly.py similarity index 100% rename from plots/plotly/histogram/histogram-basic/default.py rename to plots/histogram-basic/implementations/plotly.py diff --git a/plots/plotnine/histogram/histogram-basic/default.py b/plots/histogram-basic/implementations/plotnine.py similarity index 100% rename from plots/plotnine/histogram/histogram-basic/default.py rename to plots/histogram-basic/implementations/plotnine.py diff --git a/plots/pygal/histogram/histogram-basic/default.py b/plots/histogram-basic/implementations/pygal.py similarity index 100% rename from plots/pygal/histogram/histogram-basic/default.py rename to plots/histogram-basic/implementations/pygal.py diff --git a/plots/seaborn/histplot/histogram-basic/default.py b/plots/histogram-basic/implementations/seaborn.py similarity index 100% rename from plots/seaborn/histplot/histogram-basic/default.py rename to plots/histogram-basic/implementations/seaborn.py diff --git a/plots/histogram-basic/metadata.yaml b/plots/histogram-basic/metadata.yaml new file mode 100644 index 0000000000..9198ca3c63 --- /dev/null +++ b/plots/histogram-basic/metadata.yaml @@ -0,0 +1,65 @@ +# Metadata for histogram-basic +# Auto-synced to PostgreSQL on push to main + +spec_id: histogram-basic +title: Basic Histogram + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - histogram + - distribution + domain: + - statistics + - general + features: + - basic + - distribution + - frequency + audience: + - beginner + data_type: + - numeric + - continuous +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/histogram-basic.md b/plots/histogram-basic/spec.md similarity index 70% rename from specs/histogram-basic.md rename to plots/histogram-basic/spec.md index 9456969efc..9e966f621b 100644 --- a/specs/histogram-basic.md +++ b/plots/histogram-basic/spec.md @@ -4,28 +4,15 @@ A histogram showing the frequency distribution of numeric data across bins. Each bin represents a range of values, with bar height indicating how many data points fall within that range. Ideal for understanding data spread, identifying patterns, and detecting outliers in continuous distributions. -## Data - -**Required columns:** -- `value` (numeric) - continuous values for distribution analysis - -**Example:** -```python -import pandas as pd -import numpy as np -np.random.seed(42) -data = pd.DataFrame({ - 'value': np.random.normal(100, 15, 500) # 500 values, mean=100, std=15 -}) -``` - -## Tags - -histogram, distribution, univariate, basic, statistical - -## Use Cases +## Applications - Analyzing customer purchase amount distribution across transactions - Visualizing test score distribution in educational assessments - Understanding response time distribution in system performance data - Examining income or salary distribution in demographic studies + +## Data + +- `value` (numeric) - continuous values for distribution analysis +- Size: 100-1000 points +- Example: normally distributed data (mean=100, std=15) diff --git a/plots/altair/line/line-basic/default.py b/plots/line-basic/implementations/altair.py similarity index 100% rename from plots/altair/line/line-basic/default.py rename to plots/line-basic/implementations/altair.py diff --git a/plots/bokeh/line/line-basic/default.py b/plots/line-basic/implementations/bokeh.py similarity index 100% rename from plots/bokeh/line/line-basic/default.py rename to plots/line-basic/implementations/bokeh.py diff --git a/plots/highcharts/line/line-basic/default.py b/plots/line-basic/implementations/highcharts.py similarity index 100% rename from plots/highcharts/line/line-basic/default.py rename to plots/line-basic/implementations/highcharts.py diff --git a/plots/letsplot/line/line-basic/default.py b/plots/line-basic/implementations/letsplot.py similarity index 100% rename from plots/letsplot/line/line-basic/default.py rename to plots/line-basic/implementations/letsplot.py diff --git a/plots/matplotlib/plot/line-basic/default.py b/plots/line-basic/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/plot/line-basic/default.py rename to plots/line-basic/implementations/matplotlib.py diff --git a/plots/plotly/line/line-basic/default.py b/plots/line-basic/implementations/plotly.py similarity index 100% rename from plots/plotly/line/line-basic/default.py rename to plots/line-basic/implementations/plotly.py diff --git a/plots/plotnine/line/line-basic/default.py b/plots/line-basic/implementations/plotnine.py similarity index 100% rename from plots/plotnine/line/line-basic/default.py rename to plots/line-basic/implementations/plotnine.py diff --git a/plots/pygal/line/line-basic/default.py b/plots/line-basic/implementations/pygal.py similarity index 100% rename from plots/pygal/line/line-basic/default.py rename to plots/line-basic/implementations/pygal.py diff --git a/plots/seaborn/lineplot/line-basic/default.py b/plots/line-basic/implementations/seaborn.py similarity index 100% rename from plots/seaborn/lineplot/line-basic/default.py rename to plots/line-basic/implementations/seaborn.py diff --git a/plots/line-basic/metadata.yaml b/plots/line-basic/metadata.yaml new file mode 100644 index 0000000000..f4f15c6cad --- /dev/null +++ b/plots/line-basic/metadata.yaml @@ -0,0 +1,66 @@ +# Metadata for line-basic +# Auto-synced to PostgreSQL on push to main + +spec_id: line-basic +title: Basic Line Chart + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - line + - trend + domain: + - statistics + - general + - timeseries + features: + - basic + - continuous + - trend + audience: + - beginner + data_type: + - numeric + - temporal +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/line-basic.md b/plots/line-basic/spec.md similarity index 74% rename from specs/line-basic.md rename to plots/line-basic/spec.md index 278ff308c8..41eff3f0c4 100644 --- a/specs/line-basic.md +++ b/plots/line-basic/spec.md @@ -4,28 +4,16 @@ A fundamental line plot visualizing trends and changes over a continuous or sequential axis. Data points are connected by lines, making it ideal for time series data and showing progression or trends. The continuous line helps viewers perceive patterns and changes over the ordered sequence. -## Data - -**Required columns:** -- `x` (numeric/datetime) - values for the horizontal axis (typically time or sequence) -- `y` (numeric) - values for the vertical axis - -**Example:** -```python -import pandas as pd -data = pd.DataFrame({ - 'time': [1, 2, 3, 4, 5, 6, 7], - 'value': [10, 15, 13, 18, 22, 19, 25] -}) -``` - -## Tags - -line, trend, timeseries, basic, 2d - -## Use Cases +## Applications - Time series visualization of stock prices over months - Tracking website traffic over time - Displaying temperature changes throughout a day - Monitoring system performance metrics over time + +## Data + +- `x` (numeric/datetime) - values for the horizontal axis (typically time or sequence) +- `y` (numeric) - values for the vertical axis +- Size: 7-100 points +- Example: daily/monthly time series data diff --git a/plots/matplotlib/histogram/histogram-basic/default.py b/plots/matplotlib/histogram/histogram-basic/default.py deleted file mode 100644 index 180d0bfb15..0000000000 --- a/plots/matplotlib/histogram/histogram-basic/default.py +++ /dev/null @@ -1,112 +0,0 @@ -""" -histogram-basic: Basic Histogram -Implementation for: matplotlib -Variant: default -Python: 3.10+ -""" - -from typing import TYPE_CHECKING, Optional - -import matplotlib.pyplot as plt -import pandas as pd - - -if TYPE_CHECKING: - from matplotlib.figure import Figure - - -def create_plot( - data: pd.DataFrame, - column: str, - bins: int = 30, - color: str = "steelblue", - alpha: float = 0.8, - edgecolor: str = "black", - title: Optional[str] = None, - xlabel: Optional[str] = None, - ylabel: Optional[str] = None, - figsize: tuple[float, float] = (16, 9), - **kwargs, -) -> "Figure": - """ - Create a basic histogram showing the distribution of numeric data. - - Args: - data: Input DataFrame with required column - column: Column name for numeric values to visualize - bins: Number of bins for histogram (default: 30) - color: Bar color (default: "steelblue") - alpha: Transparency level 0.0-1.0 (default: 0.8) - edgecolor: Border color for bins (default: "black") - title: Plot title (default: None) - xlabel: Custom x-axis label (default: column name) - ylabel: Custom y-axis label (default: "Frequency") - figsize: Figure size as (width, height) (default: (16, 9)) - **kwargs: Additional parameters passed to ax.hist() - - Returns: - Matplotlib Figure object - - Raises: - ValueError: If data is empty - KeyError: If required column not found - ValueError: If column contains no numeric values - - Example: - >>> data = pd.DataFrame({'Values': [1.5, 2.1, 2.3, 3.2, 3.5, 4.1, 4.5]}) - >>> fig = create_plot(data, column='Values', bins=20) - """ - # Input validation - if data.empty: - raise ValueError("Data cannot be empty") - - # Check required column - if column not in data.columns: - available = ", ".join(data.columns) - raise KeyError(f"Column '{column}' not found. Available: {available}") - - # Check if column has numeric data - if not pd.api.types.is_numeric_dtype(data[column]): - raise ValueError(f"Column '{column}' must contain numeric values") - - # Remove NaN values for histogram - values = data[column].dropna() - - if len(values) == 0: - raise ValueError(f"Column '{column}' contains no valid numeric values") - - # Create figure - fig, ax = plt.subplots(figsize=figsize) - - # Create histogram - ax.hist(values, bins=bins, color=color, alpha=alpha, edgecolor=edgecolor, **kwargs) - - # Apply styling - ax.set_xlabel(xlabel or column) - ax.set_ylabel(ylabel or "Frequency") - ax.grid(True, axis="y", alpha=0.3, linestyle="--") - - # Title - if title: - ax.set_title(title, fontsize=14, fontweight="bold") - - # Tight layout to avoid label clipping - plt.tight_layout() - - return fig - - -if __name__ == "__main__": - # Sample data for testing - import numpy as np - - np.random.seed(42) - data = pd.DataFrame({"Values": np.random.normal(loc=100, scale=15, size=1000)}) - - # Create plot - fig = create_plot(data, column="Values", bins=40, title="Distribution of Values") - - # Save for inspection - plt.savefig("plot.png", dpi=300, bbox_inches="tight") - print("Plot saved to plot.png") - plt.show() diff --git a/plots/altair/arc/pie-basic/default.py b/plots/pie-basic/implementations/altair.py similarity index 100% rename from plots/altair/arc/pie-basic/default.py rename to plots/pie-basic/implementations/altair.py diff --git a/plots/bokeh/custom/pie-basic/default.py b/plots/pie-basic/implementations/bokeh.py similarity index 100% rename from plots/bokeh/custom/pie-basic/default.py rename to plots/pie-basic/implementations/bokeh.py diff --git a/plots/highcharts/pie/pie-basic/default.py b/plots/pie-basic/implementations/highcharts.py similarity index 100% rename from plots/highcharts/pie/pie-basic/default.py rename to plots/pie-basic/implementations/highcharts.py diff --git a/plots/letsplot/pie/pie-basic/default.py b/plots/pie-basic/implementations/letsplot.py similarity index 100% rename from plots/letsplot/pie/pie-basic/default.py rename to plots/pie-basic/implementations/letsplot.py diff --git a/plots/matplotlib/pie/pie-basic/default.py b/plots/pie-basic/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/pie/pie-basic/default.py rename to plots/pie-basic/implementations/matplotlib.py diff --git a/plots/plotly/pie/pie-basic/default.py b/plots/pie-basic/implementations/plotly.py similarity index 100% rename from plots/plotly/pie/pie-basic/default.py rename to plots/pie-basic/implementations/plotly.py diff --git a/plots/plotnine/pie/pie-basic/default.py b/plots/pie-basic/implementations/plotnine.py similarity index 100% rename from plots/plotnine/pie/pie-basic/default.py rename to plots/pie-basic/implementations/plotnine.py diff --git a/plots/pygal/pie/pie-basic/default.py b/plots/pie-basic/implementations/pygal.py similarity index 100% rename from plots/pygal/pie/pie-basic/default.py rename to plots/pie-basic/implementations/pygal.py diff --git a/plots/seaborn/pie/pie-basic/default.py b/plots/pie-basic/implementations/seaborn.py similarity index 100% rename from plots/seaborn/pie/pie-basic/default.py rename to plots/pie-basic/implementations/seaborn.py diff --git a/plots/pie-basic/metadata.yaml b/plots/pie-basic/metadata.yaml new file mode 100644 index 0000000000..519746c415 --- /dev/null +++ b/plots/pie-basic/metadata.yaml @@ -0,0 +1,67 @@ +# Metadata for pie-basic +# Auto-synced to PostgreSQL on push to main + +spec_id: pie-basic +title: Basic Pie Chart + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - pie + - donut + - proportion + domain: + - statistics + - general + - business + features: + - basic + - proportional + - percentage + audience: + - beginner + data_type: + - categorical + - numeric +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/pie-basic.md b/plots/pie-basic/spec.md similarity index 71% rename from specs/pie-basic.md rename to plots/pie-basic/spec.md index 6d9666ef5e..41d736ea1f 100644 --- a/specs/pie-basic.md +++ b/plots/pie-basic/spec.md @@ -4,28 +4,16 @@ A circular chart divided into slices showing proportions of a whole. Each slice represents a category's share relative to the total, making it ideal for displaying composition and percentage breakdowns. Best suited for a small number of categories (3-7) where the focus is on part-to-whole relationships. -## Data - -**Required columns:** -- `category` (categorical) - names for each slice -- `value` (numeric) - values representing each category's proportion - -**Example:** -```python -import pandas as pd -data = pd.DataFrame({ - 'category': ['Product A', 'Product B', 'Product C', 'Product D', 'Other'], - 'value': [35, 25, 20, 15, 5] -}) -``` - -## Tags - -pie, composition, proportions, categorical, basic - -## Use Cases +## Applications - Market share distribution showing company percentages - Budget allocation breakdown across departments - Survey response analysis showing answer percentages - Portfolio composition showing asset allocation + +## Data + +- `category` (categorical) - names for each slice +- `value` (numeric) - values representing each category's proportion +- Size: 3-7 categories +- Example: market share or budget allocation data diff --git a/plots/plotly/scatter/line-basic/default.py b/plots/plotly/scatter/line-basic/default.py deleted file mode 100644 index 9ad9dfcff4..0000000000 --- a/plots/plotly/scatter/line-basic/default.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -line-basic: Basic Line Plot -Library: plotly -""" - -import plotly.graph_objects as go - - -# Data -time = [1, 2, 3, 4, 5, 6, 7] -value = [10, 15, 13, 18, 22, 19, 25] - -# Create plot -fig = go.Figure() -fig.add_trace(go.Scatter(x=time, y=value, mode="lines", line={"color": "#306998", "width": 3})) - -# Layout -fig.update_layout( - title={"text": "Basic Line Plot", "font": {"size": 36}}, - xaxis_title="Time", - yaxis_title="Value", - template="plotly_white", - showlegend=False, - xaxis={"title_font": {"size": 28}, "tickfont": {"size": 22}, "showgrid": True, "gridcolor": "rgba(0,0,0,0.1)"}, - yaxis={"title_font": {"size": 28}, "tickfont": {"size": 22}, "showgrid": True, "gridcolor": "rgba(0,0,0,0.1)"}, - margin={"l": 80, "r": 40, "t": 100, "b": 80}, -) - -# Save -fig.write_image("plot.png", width=1600, height=900, scale=3) diff --git a/plots/pygal/line/area-basic/default.py b/plots/pygal/line/area-basic/default.py deleted file mode 100644 index 7d906914bf..0000000000 --- a/plots/pygal/line/area-basic/default.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -area-basic: Basic Area Chart -Library: pygal -""" - -import pygal -from pygal.style import Style - - -# Data - monthly sales data -months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -sales = [120, 135, 148, 162, 175, 195, 210, 198, 185, 170, 158, 190] - -# Custom style with PyPlots color palette -custom_style = Style( - background="white", - plot_background="white", - foreground="#333333", - foreground_strong="#333333", - foreground_subtle="#666666", - opacity=0.7, - opacity_hover=0.9, - colors=("#306998",), # Python Blue from style guide - font_family="Arial, sans-serif", - title_font_size=48, - label_font_size=36, - major_label_font_size=36, - legend_font_size=36, -) - -# Create area chart (Line chart with fill=True) -chart = pygal.Line( - width=4800, - height=2700, - title="Monthly Sales Performance", - x_title="Month", - y_title="Sales (Units)", - style=custom_style, - fill=True, - show_legend=True, - legend_at_bottom=True, - show_x_guides=False, - show_y_guides=True, - dots_size=6, - stroke_style={"width": 4}, -) - -# Set x-axis labels -chart.x_labels = months - -# Add data series -chart.add("Sales", sales) - -# Save as PNG -chart.render_to_png("plot.png") diff --git a/plots/altair/point/scatter-basic/default.py b/plots/scatter-basic/implementations/altair.py similarity index 100% rename from plots/altair/point/scatter-basic/default.py rename to plots/scatter-basic/implementations/altair.py diff --git a/plots/bokeh/scatter/scatter-basic/default.py b/plots/scatter-basic/implementations/bokeh.py similarity index 100% rename from plots/bokeh/scatter/scatter-basic/default.py rename to plots/scatter-basic/implementations/bokeh.py diff --git a/plots/highcharts/scatter/scatter-basic/default.py b/plots/scatter-basic/implementations/highcharts.py similarity index 100% rename from plots/highcharts/scatter/scatter-basic/default.py rename to plots/scatter-basic/implementations/highcharts.py diff --git a/plots/letsplot/point/scatter-basic/default.py b/plots/scatter-basic/implementations/letsplot.py similarity index 100% rename from plots/letsplot/point/scatter-basic/default.py rename to plots/scatter-basic/implementations/letsplot.py diff --git a/plots/matplotlib/scatter/scatter-basic/default.py b/plots/scatter-basic/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/scatter/scatter-basic/default.py rename to plots/scatter-basic/implementations/matplotlib.py diff --git a/plots/plotly/scatter/scatter-basic/default.py b/plots/scatter-basic/implementations/plotly.py similarity index 100% rename from plots/plotly/scatter/scatter-basic/default.py rename to plots/scatter-basic/implementations/plotly.py diff --git a/plots/plotnine/point/scatter-basic/default.py b/plots/scatter-basic/implementations/plotnine.py similarity index 100% rename from plots/plotnine/point/scatter-basic/default.py rename to plots/scatter-basic/implementations/plotnine.py diff --git a/plots/pygal/xy/scatter-basic/default.py b/plots/scatter-basic/implementations/pygal.py similarity index 100% rename from plots/pygal/xy/scatter-basic/default.py rename to plots/scatter-basic/implementations/pygal.py diff --git a/plots/seaborn/scatterplot/scatter-basic/default.py b/plots/scatter-basic/implementations/seaborn.py similarity index 100% rename from plots/seaborn/scatterplot/scatter-basic/default.py rename to plots/scatter-basic/implementations/seaborn.py diff --git a/plots/scatter-basic/metadata.yaml b/plots/scatter-basic/metadata.yaml new file mode 100644 index 0000000000..d3566694d1 --- /dev/null +++ b/plots/scatter-basic/metadata.yaml @@ -0,0 +1,65 @@ +# Metadata for scatter-basic +# Auto-synced to PostgreSQL on push to main + +spec_id: scatter-basic +title: Basic Scatter Plot + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - scatter + - point + domain: + - statistics + - general + features: + - basic + - 2d + - correlation + audience: + - beginner + data_type: + - numeric + - continuous +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/scatter-basic.md b/plots/scatter-basic/spec.md similarity index 74% rename from specs/scatter-basic.md rename to plots/scatter-basic/spec.md index 941cf8e98b..49223ff1ba 100644 --- a/specs/scatter-basic.md +++ b/plots/scatter-basic/spec.md @@ -4,28 +4,21 @@ A fundamental scatter plot visualizing the relationship between two continuous variables. Each data point is represented as a marker at its (x, y) coordinate, making it ideal for identifying correlations, clusters, and outliers. Optimized for handling many data points with appropriate transparency. +## Applications + +- Correlation analysis between height and weight in healthcare data +- Exploring relationship between advertising spend and sales revenue +- Identifying outliers in financial transaction data +- Visualizing the relationship between study hours and test scores + ## Data -**Required columns:** - `x` (numeric) - values for the horizontal axis - `y` (numeric) - values for the vertical axis +- Size: 50-500 points +- Example: random correlated data or tips dataset (total_bill, tip) -**Example:** -```python -import pandas as pd -data = pd.DataFrame({ - 'x': [1, 2, 3, 4, 5, 6, 7, 8], - 'y': [2.1, 4.3, 3.2, 5.8, 4.9, 7.2, 6.1, 8.5] -}) -``` - -## Tags +## Notes -scatter, correlation, basic, 2d, exploratory - -## Use Cases - -- Correlation analysis between height and weight in healthcare data -- Exploring relationship between advertising spend and sales revenue -- Identifying outliers in financial transaction data -- Visualizing the relationship between study hours and test scores +- Use alpha=0.7 for overlapping points +- Grid should be subtle (alpha=0.3) diff --git a/plots/altair/point/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/altair.py similarity index 100% rename from plots/altair/point/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/altair.py diff --git a/plots/bokeh/scatter/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/bokeh.py similarity index 100% rename from plots/bokeh/scatter/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/bokeh.py diff --git a/plots/highcharts/scatter/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/highcharts.py similarity index 100% rename from plots/highcharts/scatter/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/highcharts.py diff --git a/plots/letsplot/point/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/letsplot.py similarity index 100% rename from plots/letsplot/point/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/letsplot.py diff --git a/plots/matplotlib/scatter/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/matplotlib.py similarity index 100% rename from plots/matplotlib/scatter/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/matplotlib.py diff --git a/plots/plotly/scatter/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/plotly.py similarity index 100% rename from plots/plotly/scatter/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/plotly.py diff --git a/plots/plotnine/point/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/plotnine.py similarity index 100% rename from plots/plotnine/point/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/plotnine.py diff --git a/plots/pygal/xy/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/pygal.py similarity index 100% rename from plots/pygal/xy/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/pygal.py diff --git a/plots/seaborn/scatterplot/scatter-color-groups/default.py b/plots/scatter-color-groups/implementations/seaborn.py similarity index 100% rename from plots/seaborn/scatterplot/scatter-color-groups/default.py rename to plots/scatter-color-groups/implementations/seaborn.py diff --git a/plots/scatter-color-groups/metadata.yaml b/plots/scatter-color-groups/metadata.yaml new file mode 100644 index 0000000000..60b4581a6a --- /dev/null +++ b/plots/scatter-color-groups/metadata.yaml @@ -0,0 +1,67 @@ +# Metadata for scatter-color-groups +# Auto-synced to PostgreSQL on push to main + +spec_id: scatter-color-groups +title: Scatter Plot with Color Groups + +# Spec-level tracking +created: null +issue: null +suggested: pyplots +updates: [] + +tags: + plot_type: + - scatter + - point + domain: + - statistics + - general + features: + - grouped + - color-coded + - categorical + - legend + audience: + - beginner + - intermediate + data_type: + - numeric + - categorical +implementations: + matplotlib: + preview_url: null + current: null + history: [] + seaborn: + preview_url: null + current: null + history: [] + plotly: + preview_url: null + current: null + history: [] + bokeh: + preview_url: null + current: null + history: [] + altair: + preview_url: null + current: null + history: [] + plotnine: + preview_url: null + current: null + history: [] + pygal: + preview_url: null + current: null + history: [] + highcharts: + preview_url: null + current: null + history: [] + letsplot: + preview_url: null + current: null + history: [] diff --git a/specs/scatter-color-groups.md b/plots/scatter-color-groups/spec.md similarity index 77% rename from specs/scatter-color-groups.md rename to plots/scatter-color-groups/spec.md index 1451ddd518..580312a596 100644 --- a/specs/scatter-color-groups.md +++ b/plots/scatter-color-groups/spec.md @@ -4,27 +4,17 @@ A scatter plot where data points are colored by categorical groups, creating distinct "color clouds" for different categories. This visualization reveals both the relationship between two numeric variables and how that relationship differs across groups. Essential for comparing patterns and identifying group-specific clusters or trends. -## Data - -**Required columns:** -- `x` (numeric) - values for the horizontal axis -- `y` (numeric) - values for the vertical axis -- `group` (categorical) - category defining the color for each point - -**Example:** -```python -import seaborn as sns -data = sns.load_dataset('iris') -# Use: x='sepal_length', y='sepal_width', group='species' -``` - -## Tags - -scatter, groups, categorical, comparison, 2d, exploratory - -## Use Cases +## Applications - Comparing customer segments by spending behavior and visit frequency - Analyzing species characteristics in biological datasets - Visualizing regional differences in economic indicators - Exploring product performance across different market categories + +## Data + +- `x` (numeric) - values for the horizontal axis +- `y` (numeric) - values for the vertical axis +- `group` (categorical) - category defining the color for each point +- Size: 50-500 points, 2-6 groups +- Example: iris dataset (sepal_length, sepal_width, species) diff --git a/prompts/README.md b/prompts/README.md index 6889442c85..f85be07129 100644 --- a/prompts/README.md +++ b/prompts/README.md @@ -46,7 +46,7 @@ See `workflow-prompts/README.md` for variable reference and usage. ${PROMPT_LIB} ## Spec - $(cat specs/${{ inputs.spec_id }}.md)" + $(cat plots/${{ inputs.spec_id }}/spec.md)" ``` ## Prompt Structure diff --git a/prompts/library/altair.md b/prompts/library/altair.md index 81f09d9af9..7e94368641 100644 --- a/prompts/library/altair.md +++ b/prompts/library/altair.md @@ -66,15 +66,7 @@ chart = chart.interactive() .encode(tooltip=['col_x', 'col_y']) ``` -## Folder-Name +## Output File -`plots/altair/{mark_type}/` - -| Mark | Folder | -|------|--------| -| `mark_point()` | `point/` | -| `mark_line()` | `line/` | -| `mark_bar()` | `bar/` | -| `mark_boxplot()` | `boxplot/` | -| `mark_rect()` | `rect/` | +`plots/{spec-id}/implementations/altair.py` diff --git a/prompts/library/bokeh.md b/prompts/library/bokeh.md index 0cc3385606..0d6b7ae37e 100644 --- a/prompts/library/bokeh.md +++ b/prompts/library/bokeh.md @@ -63,15 +63,7 @@ p.yaxis.axis_label = y_label # Grid: AI discretion ``` -## Folder-Name +## Output File -`plots/bokeh/{glyph_method}/` - -| Method | Folder | -|--------|--------| -| `p.scatter()` | `scatter/` | -| `p.line()` | `line/` | -| `p.vbar()` | `vbar/` | -| `p.hbar()` | `hbar/` | -| Custom (no native) | `custom/` | +`plots/{spec-id}/implementations/bokeh.py` diff --git a/prompts/library/highcharts.md b/prompts/library/highcharts.md index acd306953c..60ecc7aa10 100644 --- a/prompts/library/highcharts.md +++ b/prompts/library/highcharts.md @@ -123,17 +123,9 @@ chart.options.chart = { } ``` -## Folder-Name +## Output File -`plots/highcharts/{series_type}/` - -| Series | Folder | -|--------|--------| -| `ScatterSeries` | `scatter/` | -| `LineSeries` | `line/` | -| `ColumnSeries` | `bar/` | -| `BarSeries` | `bar/` | -| `BoxPlotSeries` | `boxplot/` | +`plots/{spec-id}/implementations/highcharts.py` ## Common Pitfalls diff --git a/prompts/library/letsplot.md b/prompts/library/letsplot.md index ff10ac2e26..5d5352e3b1 100644 --- a/prompts/library/letsplot.md +++ b/prompts/library/letsplot.md @@ -95,18 +95,9 @@ geom_density() # Density + facet_grid(x='row_var', y='col_var') ``` -## Folder Name - -`plots/letsplot/{geom}/` - -| Geom | Folder | -|------|--------| -| `geom_point()` | `point/` | -| `geom_line()` | `line/` | -| `geom_bar()` | `bar/` | -| `geom_boxplot()` | `boxplot/` | -| `geom_histogram()` | `histogram/` | -| `geom_tile()` | `tile/` | +## Output File + +`plots/{spec-id}/implementations/letsplot.py` ## Key Differences from plotnine diff --git a/prompts/library/matplotlib.md b/prompts/library/matplotlib.md index b62c7b191e..cc2ee6a52d 100644 --- a/prompts/library/matplotlib.md +++ b/prompts/library/matplotlib.md @@ -54,17 +54,7 @@ ax.boxplot(data, labels=group_names) # Wrong ax.boxplot(data, tick_labels=group_names) # Right ``` -## Folder Name - -`plots/matplotlib/{plot_function}/` - -| Function | Folder | -|----------|--------| -| `ax.scatter()` | `scatter/` | -| `ax.plot()` | `plot/` | -| `ax.bar()` | `bar/` | -| `ax.boxplot()` | `boxplot/` | -| `ax.hist()` | `hist/` | -| `ax.imshow()` | `imshow/` | -| `ax.pie()` | `pie/` | +## Output File + +`plots/{spec-id}/implementations/matplotlib.py` diff --git a/prompts/library/plotly.md b/prompts/library/plotly.md index 5905026aa4..56acdd3900 100644 --- a/prompts/library/plotly.md +++ b/prompts/library/plotly.md @@ -44,16 +44,7 @@ fig.write_image('plot.png', width=1600, height=900, scale=3) Plotly is interactive by default (hover, zoom, pan). For static outputs → `write_image()`. -## Folder-Name - -`plots/plotly/{trace_type}/` - -| Trace | Folder | -|-------|--------| -| `go.Scatter` | `scatter/` | -| `go.Bar` | `bar/` | -| `go.Box` | `box/` | -| `go.Heatmap` | `heatmap/` | -| `go.Scatter3d` | `scatter3d/` | -| `go.Candlestick` | `candlestick/` | +## Output File + +`plots/{spec-id}/implementations/plotly.py` diff --git a/prompts/library/plotnine.md b/prompts/library/plotnine.md index 0a87091d75..1a2ae33ad2 100644 --- a/prompts/library/plotnine.md +++ b/prompts/library/plotnine.md @@ -72,14 +72,7 @@ geom_histogram() # Histogram geom_tile() # Heatmap ``` -## Folder-Name +## Output File -`plots/plotnine/{geom}/` - -| Geom | Folder | -|------|--------| -| `geom_point()` | `point/` | -| `geom_line()` | `line/` | -| `geom_bar()` | `bar/` | -| `geom_boxplot()` | `boxplot/` | +`plots/{spec-id}/implementations/plotnine.py` diff --git a/prompts/library/pygal.md b/prompts/library/pygal.md index da92387daf..2f00566979 100644 --- a/prompts/library/pygal.md +++ b/prompts/library/pygal.md @@ -76,15 +76,7 @@ chart = pygal.Bar( ) ``` -## Folder-Name +## Output File -`plots/pygal/{chart_type}/` - -| Type | Folder | -|-----|--------| -| `pygal.Bar()` | `bar/` | -| `pygal.Line()` | `line/` | -| `pygal.XY()` | `xy/` | -| `pygal.Pie()` | `pie/` | -| `pygal.Box()` | `box/` | +`plots/{spec-id}/implementations/pygal.py` diff --git a/prompts/library/seaborn.md b/prompts/library/seaborn.md index 3f1fc48a34..220e94e395 100644 --- a/prompts/library/seaborn.md +++ b/prompts/library/seaborn.md @@ -57,16 +57,7 @@ palette='Blues' palette='RdBu' ``` -## Folder Name - -`plots/seaborn/{seaborn_function}/` - -| Function | Folder | -|----------|--------| -| `sns.scatterplot()` | `scatterplot/` | -| `sns.lineplot()` | `lineplot/` | -| `sns.barplot()` | `barplot/` | -| `sns.boxplot()` | `boxplot/` | -| `sns.heatmap()` | `heatmap/` | -| `sns.violinplot()` | `violinplot/` | +## Output File + +`plots/{spec-id}/implementations/seaborn.py` diff --git a/prompts/plot-generator.md b/prompts/plot-generator.md index fdaa5770c7..d836d621d0 100644 --- a/prompts/plot-generator.md +++ b/prompts/plot-generator.md @@ -10,7 +10,7 @@ Create a Python script for the specified plot type and library. The code should ## Input -1. **Spec**: Markdown specification from `specs/{spec-id}.md` +1. **Spec**: Markdown specification from `plots/{spec-id}/spec.md` 2. **Library**: matplotlib, seaborn, plotly, bokeh, altair, plotnine, pygal, highcharts, or letsplot 3. **Library Rules**: Specific rules from `prompts/library/{library}.md` diff --git a/prompts/spec-id-generator.md b/prompts/spec-id-generator.md index 5de5628698..457c33f64b 100644 --- a/prompts/spec-id-generator.md +++ b/prompts/spec-id-generator.md @@ -14,7 +14,7 @@ You analyze plot requests from GitHub Issues and assign meaningful, unique spec 1. **Issue Title**: The title of the GitHub issue 2. **Issue Body**: The description/request from the issue -3. **Existing Specs**: All files in `specs/` directory +3. **Existing Specs**: All directories in `plots/` (each has a spec.md) ## Output @@ -28,7 +28,7 @@ Post a comment on the issue with ONE of these formats: This plot request appears to be a duplicate of an existing spec: **Existing Spec:** `{existing-spec-id}` -**File:** `specs/{existing-spec-id}.md` +**Directory:** `plots/{existing-spec-id}/` **Similarity:** {brief explanation of why it's a duplicate} @@ -43,7 +43,7 @@ Please review the existing spec. If you believe this is different, please update This request is very similar to an existing spec: **Existing Spec:** `{existing-spec-id}` -**File:** `specs/{existing-spec-id}.md` +**Directory:** `plots/{existing-spec-id}/` **Differences:** {brief explanation} @@ -68,14 +68,14 @@ Add label `approved` to begin generation. Update requests have `[update]` or `[update:library]` in the title (e.g., `[update:highcharts] line-basic`). 1. Extract the spec-id from the title (the part after the update marker) -2. Verify the spec exists in `specs/` +2. Verify the spec exists in `plots/` 3. Post comment and update title: ```markdown ## 🔄 Update Request: `{spec-id}` **Existing Spec:** `{spec-id}` -**File:** `specs/{spec-id}.md` +**Directory:** `plots/{spec-id}/` **Scope:** {all libraries OR specific library from [update:library]} Add label `approved` to trigger regeneration. @@ -148,8 +148,8 @@ New plot type → New spec OK - If no: continue with new spec analysis 2. **Read all existing specs** - - List files in `specs/` (excluding templates) - - Read each to understand what exists + - List directories in `plots/` (each has a spec.md) + - Read each spec.md to understand what exists 3. **Analyze the request** - What plot type? (scatter, bar, line, etc.) diff --git a/prompts/spec-validator.md b/prompts/spec-validator.md index 7f4e3aed06..29670a99c4 100644 --- a/prompts/spec-validator.md +++ b/prompts/spec-validator.md @@ -10,7 +10,7 @@ Validate the spec file and provide feedback on missing or problematic elements. ## Input -Markdown specification from `specs/{spec-id}.md` +Markdown specification from `plots/{spec-id}/spec.md` ## Output diff --git a/prompts/templates/metadata.yaml b/prompts/templates/metadata.yaml new file mode 100644 index 0000000000..3d24201101 --- /dev/null +++ b/prompts/templates/metadata.yaml @@ -0,0 +1,109 @@ +# Metadata for {spec-id} +# Auto-synced to PostgreSQL on push to main + +spec_id: {spec-id} +title: {Title} + +# Spec-level tracking +created: null # ISO 8601 timestamp +issue: null # GitHub issue number +suggested: null # GitHub username or 'pyplots' for seed plots +updates: [] # List of spec updates [{date, issue, changes}] + +tags: + plot_type: + - {type} # scatter, bar, line, pie, histogram, box, area, etc. + domain: + - general # statistics, science, business, finance, etc. + features: + - basic # basic, grouped, stacked, animated, interactive, etc. + audience: + - beginner # beginner, intermediate, advanced + data_type: + - numeric # numeric, categorical, datetime, continuous + +# Implementation metadata per library +# GCS URLs: https://storage.googleapis.com/pyplots-images/plots/{spec-id}/{library}/latest.png +# Interactive libraries also have .html: plotly, bokeh, altair, highcharts, pygal, letsplot +implementations: + matplotlib: + preview_url: null # Set after first successful merge to main + current: # Current live version + version: null # Integer: 0, 1, 2, ... + date: null # ISO 8601 timestamp + issue: null # GitHub issue number + generated_by: null # Model ID (e.g., claude-opus-4-5-20251101) + quality_score: null # 0-100 + history: [] # Previous versions (same fields as current) + seaborn: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] + plotly: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] + bokeh: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] + altair: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] + plotnine: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] + pygal: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] + highcharts: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] + letsplot: + preview_url: null + current: + version: null + date: null + issue: null + generated_by: null + quality_score: null + history: [] diff --git a/prompts/templates/spec.md b/prompts/templates/spec.md new file mode 100644 index 0000000000..0ef9016219 --- /dev/null +++ b/prompts/templates/spec.md @@ -0,0 +1,22 @@ +# {spec-id}: {Title} + +## Description + +{2-4 sentences describing what this plot visualizes and when to use it. What makes it useful? What insights does it reveal?} + +## Applications + +- {Realistic scenario with domain context} +- {Another use case} +- {Third use case} + +## Data + +- `{column_name}` ({type}) - {what this column represents} +- `{column_name}` ({type}) - {what this column represents} +- Size: {recommended data size, e.g., 50-500 points} +- Example: {dataset reference or inline data description} + +## Notes + +- {Optional implementation hints, special requirements, or visual preferences} diff --git a/prompts/workflow-prompts/README.md b/prompts/workflow-prompts/README.md index 6eba32aa92..8424f3ec62 100644 --- a/prompts/workflow-prompts/README.md +++ b/prompts/workflow-prompts/README.md @@ -34,7 +34,7 @@ These prompts use `${VARIABLE}` placeholders that must be substituted before use | `${PR_NUMBER}` | Pull request number | `42` | | `${SUB_ISSUE_NUMBER}` | Library-specific sub-issue | `100` | | `${MAIN_ISSUE_NUMBER}` | Parent issue number | `99` | -| `${PLOT_FILE}` | Path to implementation file | `plots/matplotlib/scatter/scatter-basic/default.py` | +| `${PLOT_FILE}` | Path to implementation file | `plots/scatter-basic/implementations/matplotlib.py` | | `${CURRENT_CODE}` | Current implementation code | Python source | | `${SPEC_CONTENT}` | Spec file contents | Markdown | | `${LIBRARY_RULES}` | Library-specific rules | Markdown | diff --git a/prompts/workflow-prompts/ai-quality-review.md b/prompts/workflow-prompts/ai-quality-review.md index d7927fa76f..65bd0232e3 100644 --- a/prompts/workflow-prompts/ai-quality-review.md +++ b/prompts/workflow-prompts/ai-quality-review.md @@ -13,12 +13,12 @@ Evaluate if the **${LIBRARY}** implementation matches the specification for `${S ## Your Task ### 1. Read the Spec File -`specs/${SPEC_ID}.md` +`plots/${SPEC_ID}/spec.md` - Note all quality criteria listed - Understand the expected visual output ### 2. Read the Implementation -`plots/${LIBRARY}/*/${SPEC_ID}/default.py` +`plots/${SPEC_ID}/implementations/${LIBRARY}.py` ### 3. Read Library-Specific Rules `prompts/library/${LIBRARY}.md` diff --git a/prompts/workflow-prompts/generate-implementation.md b/prompts/workflow-prompts/generate-implementation.md index 11cbed244f..2c6e5aceb0 100644 --- a/prompts/workflow-prompts/generate-implementation.md +++ b/prompts/workflow-prompts/generate-implementation.md @@ -11,7 +11,7 @@ You are generating ONLY the **${LIBRARY}** implementation. Focus exclusively on 1. `prompts/plot-generator.md` - Base generation rules 2. `prompts/quality-criteria.md` - Quality requirements 3. `prompts/library/${LIBRARY}.md` - Library-specific rules -4. `specs/${SPEC_ID}.md` - The specification +4. `plots/${SPEC_ID}/spec.md` - The specification ### Step 2: Check for Previous Attempts @@ -21,17 +21,15 @@ ${PREVIOUS_ATTEMPTS_CONTEXT} Create the implementation file at the correct path: ``` -plots/${LIBRARY}/{plot_type}/${SPEC_ID}/default.py +plots/${SPEC_ID}/implementations/${LIBRARY}.py ``` -Determine `{plot_type}` from the spec (e.g., scatter, bar, line, heatmap). - ### Step 4: Test the Implementation Run the implementation to verify it works: ```bash source .venv/bin/activate -MPLBACKEND=Agg python plots/${LIBRARY}/{plot_type}/${SPEC_ID}/default.py +MPLBACKEND=Agg python plots/${SPEC_ID}/implementations/${LIBRARY}.py ``` ### Step 5: Create PR @@ -50,7 +48,7 @@ Implements `${SPEC_ID}` for **${LIBRARY}** library. **Attempt:** ${ATTEMPT}/3 ## Implementation -- `plots/${LIBRARY}/{plot_type}/${SPEC_ID}/default.py` +- `plots/${SPEC_ID}/implementations/${LIBRARY}.py` ``` ## Important Notes diff --git a/scripts/sync_to_postgres.py b/scripts/sync_to_postgres.py deleted file mode 100644 index 633806d35b..0000000000 --- a/scripts/sync_to_postgres.py +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python3 -""" -Sync specs and implementations from repository to PostgreSQL. - -This script is run by GitHub Actions on push to main branch. -It ensures the database only contains data for code that is actually in main. -""" - -import asyncio -import logging -import os -import re -import sys -from pathlib import Path - - -# Add project root to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from dotenv import load_dotenv - - -load_dotenv() - -from sqlalchemy import delete, select # noqa: E402 -from sqlalchemy.dialects.postgresql import insert # noqa: E402 -from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine # noqa: E402 - -from core.database import LIBRARIES_SEED, Implementation, Library, Spec # noqa: E402 - - -# Configuration -BASE_DIR = Path(__file__).parent.parent -SPECS_DIR = BASE_DIR / "specs" -PLOTS_DIR = BASE_DIR / "plots" -GCS_BUCKET = os.getenv("GCS_BUCKET", "pyplots-images") -DATABASE_URL = os.getenv("DATABASE_URL", "") - -# Logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") -logger = logging.getLogger(__name__) - - -def parse_spec_markdown(file_path: Path) -> dict: - """ - Parse a spec markdown file and extract metadata. - - Args: - file_path: Path to the .md file - - Returns: - Dict with id, title, description, data_requirements, tags - """ - content = file_path.read_text(encoding="utf-8") - spec_id = file_path.stem - - # Parse title from first heading: "# scatter-basic: Basic Scatter Plot" - title_match = re.search(r"^#\s+[\w-]+:\s*(.+)$", content, re.MULTILINE) - title = title_match.group(1).strip() if title_match else spec_id - - # Parse description section - description = "" - desc_match = re.search(r"## Description\s*\n(.+?)(?=\n##|\Z)", content, re.DOTALL) - if desc_match: - description = desc_match.group(1).strip() - - # Parse data requirements section - data_requirements = [] - data_match = re.search(r"## Data\s*\n(.+?)(?=\n##|\Z)", content, re.DOTALL) - if data_match: - data_section = data_match.group(1) - # Extract required columns: - `x` (numeric) - description - for match in re.finditer(r"-\s+`(\w+)`\s+\((\w+)\)\s*-?\s*(.+)?", data_section): - data_requirements.append( - {"name": match.group(1), "type": match.group(2), "description": (match.group(3) or "").strip()} - ) - - # Parse tags section - tags = [] - tags_match = re.search(r"## Tags\s*\n(.+?)(?=\n##|\Z)", content, re.DOTALL) - if tags_match: - tags_text = tags_match.group(1).strip() - tags = [t.strip() for t in tags_text.split(",") if t.strip()] - - return { - "id": spec_id, - "title": title, - "description": description, - "data_requirements": data_requirements, - "tags": tags, - } - - -def scan_implementations() -> list[dict]: - """ - Scan the plots directory for all implementations. - - Returns: - List of dicts with spec_id, library_id, plot_function, variant, file_path - """ - implementations = [] - excluded_specs = {".template", "VERSIONING"} - - if not PLOTS_DIR.exists(): - logger.warning(f"Plots directory not found: {PLOTS_DIR}") - return implementations - - # Pattern: plots/{library}/{plot_function}/{spec_id}/{variant}.py - for py_file in PLOTS_DIR.rglob("*.py"): - # Skip __pycache__ and other non-implementation files - if "__pycache__" in str(py_file) or py_file.name.startswith("_"): - continue - - parts = py_file.relative_to(PLOTS_DIR).parts - if len(parts) >= 4: - library = parts[0] - plot_function = parts[1] - spec_id = parts[2] - variant = py_file.stem # "default" or "ggplot_style" - - if spec_id in excluded_specs: - continue - - # Verify the spec exists - spec_file = SPECS_DIR / f"{spec_id}.md" - if not spec_file.exists(): - logger.debug(f"Spec not found for implementation: {spec_id}") - continue - - file_path = f"plots/{library}/{plot_function}/{spec_id}/{variant}.py" - implementations.append( - { - "spec_id": spec_id, - "library_id": library, - "plot_function": plot_function, - "variant": variant, - "file_path": file_path, - } - ) - - return implementations - - -def get_gcs_preview_url(spec_id: str, library: str, variant: str = "default") -> str | None: - """ - Get the GCS preview URL for an implementation. - - The URL pattern is: gs://{bucket}/plots/{spec_id}/{library}/{variant}/v{timestamp}.png - We return the public URL format. - - Args: - spec_id: The specification ID - library: The library name - variant: The variant name (default: "default") - - Returns: - Public GCS URL or None if not available - """ - # We construct the base path - the actual latest file will be determined - # by the API when serving images. For now, we store the base pattern. - # The gen-preview workflow uploads with timestamps, so we use the pattern. - base_url = f"https://storage.googleapis.com/{GCS_BUCKET}/plots/{spec_id}/{library}/{variant}/" - return base_url - - -async def sync_to_database(session: AsyncSession, specs: list[dict], implementations: list[dict]) -> dict: - """ - Sync specs and implementations to the database. - - Performs upserts and removes entries that no longer exist in the repo. - - Args: - session: Database session - specs: List of spec dictionaries - implementations: List of implementation dictionaries - - Returns: - Dict with counts of synced/removed items - """ - stats = {"specs_synced": 0, "specs_removed": 0, "impls_synced": 0, "impls_removed": 0} - - # Ensure libraries exist - for lib_data in LIBRARIES_SEED: - stmt = insert(Library).values(**lib_data).on_conflict_do_nothing(index_elements=["id"]) - await session.execute(stmt) - - # Upsert specs - spec_ids = set() - for spec_data in specs: - spec_ids.add(spec_data["id"]) - stmt = ( - insert(Spec) - .values(**spec_data) - .on_conflict_do_update( - index_elements=["id"], - set_={ - "title": spec_data["title"], - "description": spec_data["description"], - "data_requirements": spec_data["data_requirements"], - "tags": spec_data["tags"], - }, - ) - ) - await session.execute(stmt) - stats["specs_synced"] += 1 - - # Remove specs that no longer exist in repo - result = await session.execute(select(Spec.id).where(Spec.id.notin_(spec_ids))) - removed_spec_ids = [row[0] for row in result.fetchall()] - if removed_spec_ids: - await session.execute(delete(Spec).where(Spec.id.in_(removed_spec_ids))) - stats["specs_removed"] = len(removed_spec_ids) - logger.info(f"Removed {len(removed_spec_ids)} specs no longer in repo") - - # Upsert implementations - impl_keys = set() - for impl_data in implementations: - key = (impl_data["spec_id"], impl_data["library_id"], impl_data["variant"]) - impl_keys.add(key) - - # Add preview URL - impl_data["preview_url"] = get_gcs_preview_url( - impl_data["spec_id"], impl_data["library_id"], impl_data["variant"] - ) - - stmt = ( - insert(Implementation) - .values(**impl_data) - .on_conflict_do_update( - constraint="uq_implementation", - set_={ - "plot_function": impl_data["plot_function"], - "file_path": impl_data["file_path"], - "preview_url": impl_data["preview_url"], - }, - ) - ) - await session.execute(stmt) - stats["impls_synced"] += 1 - - # Remove implementations that no longer exist in repo - result = await session.execute(select(Implementation.spec_id, Implementation.library_id, Implementation.variant)) - existing_impls = [(row[0], row[1], row[2]) for row in result.fetchall()] - - removed_impls = [impl for impl in existing_impls if impl not in impl_keys] - if removed_impls: - for spec_id, library_id, variant in removed_impls: - await session.execute( - delete(Implementation).where( - Implementation.spec_id == spec_id, - Implementation.library_id == library_id, - Implementation.variant == variant, - ) - ) - stats["impls_removed"] = len(removed_impls) - logger.info(f"Removed {len(removed_impls)} implementations no longer in repo") - - await session.commit() - return stats - - -async def main() -> int: - """Main entry point for the sync script.""" - if not DATABASE_URL: - logger.error("DATABASE_URL environment variable not set") - return 1 - - logger.info("Starting sync to PostgreSQL...") - logger.info(f"Specs directory: {SPECS_DIR}") - logger.info(f"Plots directory: {PLOTS_DIR}") - - # Parse all specs - specs = [] - excluded = {".template", "VERSIONING"} - for spec_file in SPECS_DIR.glob("*.md"): - if spec_file.stem in excluded: - continue - try: - spec_data = parse_spec_markdown(spec_file) - specs.append(spec_data) - logger.debug(f"Parsed spec: {spec_data['id']}") - except Exception as e: - logger.error(f"Failed to parse {spec_file}: {e}") - - logger.info(f"Found {len(specs)} specs") - - # Scan implementations - implementations = scan_implementations() - logger.info(f"Found {len(implementations)} implementations") - - # Create database connection - engine = create_async_engine(DATABASE_URL, pool_pre_ping=True) - async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) - - try: - async with async_session() as session: - stats = await sync_to_database(session, specs, implementations) - - logger.info("Sync completed successfully!") - logger.info(f" Specs synced: {stats['specs_synced']}, removed: {stats['specs_removed']}") - logger.info(f" Implementations synced: {stats['impls_synced']}, removed: {stats['impls_removed']}") - return 0 - - except Exception as e: - logger.error(f"Sync failed: {e}") - return 1 - - finally: - await engine.dispose() - - -if __name__ == "__main__": - exit_code = asyncio.run(main()) - sys.exit(exit_code) diff --git a/specs/.template.md b/specs/.template.md deleted file mode 100644 index e2869d129d..0000000000 --- a/specs/.template.md +++ /dev/null @@ -1,37 +0,0 @@ -# {spec-id}: {Title} - -## Description - -{2-4 sentences describing what this plot visualizes and when to use it. -What makes it useful? What insights does it reveal?} - -## Data - -**Required columns:** -- `{column_name}` (numeric) - {what this column represents} -- `{column_name}` (categorical) - {what this column represents} - -**Example:** *(optional - provide inline data, dataset reference, or omit for AI to generate)* -```python -# Option A: Inline data for simple plots -data = pd.DataFrame({ - "category": ["A", "B", "C", "D"], - "value": [25, 40, 30, 35] -}) - -# Option B: Standard dataset reference -import seaborn as sns -data = sns.load_dataset('tips') - -# Option C: Omit this section - AI generates appropriate sample data -``` - -## Tags - -{type}, {purpose}, {complexity} - -## Use Cases - -- {Realistic scenario with domain context} -- {Another use case} -- {Third use case}