In [None]:
import polars as pl
import sys
import os

sys.path.append(os.path.dirname(os.path.abspath('')))

from libraries.client_stashapp import get_stashapp_client, StashAppClient

stash = get_stashapp_client()
stash_client = StashAppClient()

In [None]:
stash_tags = stash_client.get_tags_by_names(["Video Cut"])

In [None]:
filtered_markers = []

page = 1

while True:
    markers = stash.find_scene_markers(
        {
            "tags": {
                "value": [],
                "modifier": "INCLUDES_ALL",
                "excludes": [stash_tags.video_cut["id"]]
            }
        },
        fragment="id primary_tag { id name } tags { id name } scene { id title }",
        filter={ "per_page": 10000, "page": page }
    )

    if len(markers) == 0:
        break

    filtered_markers.extend(markers)
    page += 1

print(len(filtered_markers))

In [None]:
primary_tag_names = sorted(list(set([marker["primary_tag"]["name"] for marker in filtered_markers])))
all_marker_tags = stash_client.get_tags_by_names(primary_tag_names)

In [None]:
# Analyze marker statistics - PRIMARY TAGS ONLY
print("üìä Analyzing marker statistics (PRIMARY TAGS ONLY)...")

# Prepare data for analysis - only primary tags
marker_data = []
for marker in filtered_markers:
    marker_id = marker["id"]
    scene_id = marker["scene"]["id"]
    scene_title = marker["scene"]["title"]
    primary_tag = marker["primary_tag"]["name"]
    
    # Add only primary tag
    marker_data.append({
        "marker_id": marker_id,
        "scene_id": scene_id,
        "scene_title": scene_title,
        "tag_name": primary_tag
    })

# Create DataFrame
df_markers = pl.DataFrame(marker_data)

print(f"Total markers analyzed: {len(df_markers)}")
print(f"Unique markers: {df_markers['marker_id'].n_unique()}")
print(f"Unique scenes: {df_markers['scene_id'].n_unique()}")
print(f"Unique primary tags: {df_markers['tag_name'].n_unique()}")


In [None]:
# Count markers per primary tag
markers_per_tag = (
    df_markers
    .group_by("tag_name")
    .agg([
        pl.col("marker_id").n_unique().alias("marker_count"),
        pl.col("scene_id").n_unique().alias("scene_count")
    ])
    .sort("marker_count", descending=True)
)

print("üè∑Ô∏è TOP 20 PRIMARY TAGS BY MARKER COUNT:")
print("="*60)
top_markers = markers_per_tag.head(20)
for row in top_markers.iter_rows(named=True):
    print(f"üéØ {row['tag_name']:<30} | {row['marker_count']:>6} markers | {row['scene_count']:>6} scenes")

# Verification step: Show the DataFrame for inspection
print("\n" + "="*60)
print("üìã VERIFICATION - Sample of markers_per_tag DataFrame:")
print("="*60)
markers_per_tag.head(10)


In [None]:
# Additional analysis - Primary tags with highest scene coverage
print("üé¨ TOP 20 PRIMARY TAGS BY SCENE COVERAGE:")
print("="*60)
top_scenes = markers_per_tag.sort("scene_count", descending=True).head(20)
for row in top_scenes.iter_rows(named=True):
    print(f"üéØ {row['tag_name']:<30} | {row['scene_count']:>6} scenes | {row['marker_count']:>6} markers")

# Summary statistics
print("\n" + "="*60)
print("üìà SUMMARY STATISTICS:")
print("="*60)

# Overall summary
total_markers = df_markers['marker_id'].n_unique()
total_scenes = df_markers['scene_id'].n_unique()
total_tags = df_markers['tag_name'].n_unique()

print(f"TOTAL MARKERS:     {total_markers:>6}")
print(f"TOTAL SCENES:      {total_scenes:>6}")
print(f"TOTAL PRIMARY TAGS: {total_tags:>6}")
print(f"AVERAGE MARKERS PER TAG: {total_markers / total_tags:>6.2f}")
print(f"AVERAGE SCENES PER TAG:  {total_scenes / total_tags:>6.2f}")

# Top 10 most marker-dense primary tags (high marker count per scene)
print("\nüî• TOP 10 MOST MARKER-DENSE PRIMARY TAGS (markers per scene):")
print("="*60)
marker_density = (
    markers_per_tag
    .with_columns(
        (pl.col("marker_count") / pl.col("scene_count")).alias("markers_per_scene")
    )
    .filter(pl.col("scene_count") >= 5)  # Only tags with at least 5 scenes
    .sort("markers_per_scene", descending=True)
    .head(10)
)

for row in marker_density.iter_rows(named=True):
    ratio = row["markers_per_scene"]
    print(f"üéØ {row['tag_name']:<30} | {ratio:>5.2f} markers/scene | {row['marker_count']:>4} markers | {row['scene_count']:>4} scenes")

# Verification step: Show the complete summary
print("\n" + "="*60)
print("üìã VERIFICATION - Complete markers_per_tag DataFrame:")
print("="*60)
markers_per_tag
