# Pinit Recommendation Sandbox
Interactive notebook to experiment with the Pinit recommendation pipeline using Supabase.

**Key Features:**
- Loads tags directly from Supabase database
- Builds location tags using the refactored tagging system
- Generates user recommendations
- Formats data for Supabase upload

## Workflow Overview
1. Configure paths and helpers.
2. Load the canonical location inventory from CSVs.
3. Derive taxonomy + location tags (deterministic + reviews).
4. Build/synthesize user actions and compute taste profiles.
5. Generate recommendation scores + inspect outputs.
6. Persist artifacts for downstream experiments.


In [18]:
from pathlib import Path
import json
import pandas as pd

from config import PipelineConfig, PipelinePaths, ReviewTagConfig
from recommendation.tag_taxonomy import get_tags_dataframe, get_tags_by_category, get_tag_id_lookup
from recommendation.static_tagging import load_locations, load_reviews, build_location_tags
from recommendation.user_profiles import ensure_user_actions, build_user_tag_affinities
from recommendation.recommendation import build_recommendations
from supabase_client.supabase_service import get_supabase_service


### Configure paths + output folders


In [19]:
DATA_DIR = Path("/Users/sriharshavitta/Projects/pinit-recommendations/data/raw")
CITY_NAME = "london"
OUTPUT_DIR = Path("../../output/pinit_notebook")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

paths = PipelinePaths(data_dir=DATA_DIR, city_name=CITY_NAME, output_dir=OUTPUT_DIR)
review_cfg = ReviewTagConfig(min_unique_authors=2, min_mentions=3)
config = PipelineConfig(paths=paths, review_tagging=review_cfg, top_k_per_user=25)
config


PipelineConfig(paths=PipelinePaths(data_dir=PosixPath('/Users/sriharshavitta/Projects/pinit-recommendations/data/raw'), city_name='london', output_dir=PosixPath('../../output/pinit_notebook'), user_actions_csv=None, user_friends_csv=None, bubble_locations_csv=None), review_tagging=ReviewTagConfig(min_unique_authors=2, min_mentions=3, english_only=True, score_floor=20.0, score_cap=100.0), recommendation_weights=RecommendationWeights(taste=0.5, trend_app=0.15, hidden_gems=0.2, quality=0.15, friend=0.0, bubble=0.0), synthetic_users=True, top_k_per_user=25)

## 1. Load canonical location inventory


In [20]:
locations = load_locations(paths)
print(f"Loaded {len(locations):,} locations")
locations.head()


Loaded 1,608 locations


Unnamed: 0,place_id,name,types,rating,user_ratings_total,price_level,lat,lon,vicinity,business_status,...,price_bucket,log_reviews,popularity_score,expected_popularity,residual_popularity,quality_score,hidden_gem_score,hidden_gem_source,expected_rating,hype_residual
0,ChIJNSXdCgBT30cRJCJlCeqYZq4,Piccolo's Pizza,"establishment,food,point_of_interest,restaurant",,0.0,,51.28347,0.16984,"Riverhead, Sevenoaks",OPERATIONAL,...,unknown,0.0,0.0,4.028667,-4.028667,0.811178,0.734039,popularity_residual,,
1,ChIJB7xIORNS30cRSpYmpEaUvvM,Miller & Carter Sevenoaks,"establishment,food,point_of_interest,restaurant",4.5,1895.0,2.0,51.283125,0.170598,"Amherst Hill, Riverhead, Sevenoaks",OPERATIONAL,...,mid,7.547502,0.842022,5.480125,2.067377,0.875,0.0,popularity_residual,,
2,ChIJZa00u2xS30cRYBr8E5bIYdM,Sun On,"establishment,food,meal_takeaway,point_of_inte...",3.6,40.0,,51.284885,0.169742,"27 London Rd, Riverhead, Sevenoaks",OPERATIONAL,...,unknown,3.713572,0.414297,4.329621,-0.616049,0.65,0.112247,popularity_residual,,
3,ChIJTW54IChT30cR_lIlYS9uZZI,Linden Catering,"establishment,food,point_of_interest,restaurant",,0.0,,51.28908,0.168472,"Riverpoint house, London Rd, Sevenoaks",OPERATIONAL,...,unknown,0.0,0.0,2.186523,-2.186523,0.811178,0.398393,popularity_residual,,
4,ChIJ7_uwBeRT30cRt_w4ylhWfF4,Kanosh Lebanese Street Food,"establishment,food,meal_takeaway,point_of_inte...",5.0,4.0,,51.289309,0.168325,"London Rd, Sevenoaks",CLOSED_TEMPORARILY,...,unknown,1.609438,0.179554,4.053022,-2.443584,1.0,0.445231,popularity_residual,,


In [21]:
locations[['location_id','name','cuisine_primary','price_level','rating','user_ratings_total','is_open_late','is_open_early']].head(10)


Unnamed: 0,location_id,name,cuisine_primary,price_level,rating,user_ratings_total,is_open_late,is_open_early
0,1,Piccolo's Pizza,italian,,,0.0,False,False
1,2,Miller & Carter Sevenoaks,unknown,2.0,4.5,1895.0,True,False
2,3,Sun On,indian,,3.6,40.0,False,False
3,4,Linden Catering,unknown,,,0.0,False,False
4,5,Kanosh Lebanese Street Food,middle_eastern,,5.0,4.0,False,False
5,6,E K B GOURMET BURGER,american,,3.0,3.0,True,False
6,7,M&S Food To Go,unknown,2.0,2.8,98.0,False,True
7,8,The Bell,pub,2.0,4.4,877.0,True,False
8,9,Trattoria Da Carlo,italian,,4.5,69.0,False,False
9,10,Zen Garden Chinese Restaurant,chinese,2.0,4.2,260.0,False,False


In [22]:
place_to_location = locations.set_index('google_place_id')['location_id'].to_dict()

reviews = load_reviews(paths, place_to_location)
print(f"Loaded {len(reviews):,} reviews")

place_to_location

Loaded 7,240 reviews


{'ChIJNSXdCgBT30cRJCJlCeqYZq4': 1,
 'ChIJB7xIORNS30cRSpYmpEaUvvM': 2,
 'ChIJZa00u2xS30cRYBr8E5bIYdM': 3,
 'ChIJTW54IChT30cR_lIlYS9uZZI': 4,
 'ChIJ7_uwBeRT30cRt_w4ylhWfF4': 5,
 'ChIJHbX02-1T30cRDO1efNmH4nU': 6,
 'ChIJAcqe0zzZdUgR3pDe79_xkh0': 7,
 'ChIJ_bgO35PgdUgR3QVkvi-fwnc': 8,
 'ChIJMYLkfI7gdUgRFesJVMX5PPo': 9,
 'ChIJcz21cuXgdUgROfpWnFBuIoA': 10,
 'ChIJ1R6NmV__dUgROJgVsZpFaR4': 11,
 'ChIJj-4uTIxN30cRNFTyf69vF2c': 12,
 'ChIJCzhCNNJN30cR4kIUhhYJCvA': 13,
 'ChIJg55-TZnhdUgRB2wihE4l7lE': 14,
 'ChIJi1GLWQXhdUgRdZKBQ0UZ4oQ': 15,
 'ChIJ9Zh7jtdT30cRlLD0oyKtUTQ': 16,
 'ChIJxV42umdS30cRnImpcY0Qacc': 17,
 'ChIJ0ZC0oit2dkgRv9sDyGigQ1Y': 18,
 'ChIJy0c3bIXYdUgRojTzdhXE09Y': 19,
 'ChIJiTJbt8Wq2EcRXgu3uGInfTM': 20,
 'ChIJm2WXSsGq2EcRGlY-fs6mu7s': 21,
 'ChIJB1SV1rh1dkgR0uwwdRa90KA': 22,
 'ChIJl0L28AB1dkgRdwkc7DrdUjE': 23,
 'ChIJ2V0PyugKdkgRdgvkKB8LRo0': 24,
 'ChIJf7oNL-EKdkgRZScn5_R4pUg': 25,
 'ChIJS9H60PEKdkgRfXwMTP7hkTo': 26,
 'ChIJuSEJusoLdkgRKhVUhl5TOrg': 27,
 'ChIJabRCmzgKdkgRHP9oehOYowE': 28,
 

## 2. Build taxonomy + location tags


In [23]:
# Load tags from Supabase
tags_df = get_tags_dataframe()
print(f"✓ Loaded {len(tags_df)} tags from Supabase")

# Display tags organized by category
tags_by_category = get_tags_by_category()
print(f"\nTag categories: {list(tags_by_category.keys())}")
for cat, cat_df in tags_by_category.items():
    print(f"  {cat}: {len(cat_df)} tags")

# Build location tags
reviews_df = load_reviews(paths, locations.set_index('google_place_id')['location_id'].to_dict())
location_tags = build_location_tags(locations, reviews_df, config.review_tagging)
print(f"\nTagged pairs: {len(location_tags):,}")

✓ Loaded 104 tags from Supabase

Tag categories: ['vibe', 'cuisine', 'dietary_requirement', 'occasion', 'drinks', 'schedule', 'value', 'category']
  vibe: 51 tags
  cuisine: 26 tags
  dietary_requirement: 14 tags
  occasion: 4 tags
  drinks: 2 tags
  schedule: 3 tags
  value: 2 tags
  category: 2 tags

Tagged pairs: 6,368


In [24]:
tags_df
# write tags_df into a csv file
tags_df.to_csv(OUTPUT_DIR / "tags.csv", index=False)

In [25]:
location_tags_with_names = location_tags.merge(locations[["location_id", "name"]], on="location_id")
location_tags_with_names

Unnamed: 0,location_id,tag_text,score,source,metadata,tag_id,name
0,1,italian,92.000000,cuisine_detected,"{""field"": ""cuisine_primary""}",d0ae158d-cedd-4f29-9c03-6ebfe2c0a76c,Piccolo's Pizza
1,1,restaurant,75.000000,google_types,"{""type"": ""restaurant""}",51eec92e-5a7d-4b2f-a96c-dc364b1b253a,Piccolo's Pizza
2,2,restaurant,75.000000,google_types,"{""type"": ""restaurant""}",51eec92e-5a7d-4b2f-a96c-dc364b1b253a,Miller & Carter Sevenoaks
3,2,open_late,70.000000,opening_hours,{},eaa9d86e-1f30-4198-b4c8-bed603ce5d3a,Miller & Carter Sevenoaks
4,2,sunday_open,65.000000,opening_hours,{},ec1bf490-469c-410c-97e7-6034a75ce9e2,Miller & Carter Sevenoaks
...,...,...,...,...,...,...,...
6363,1561,lively,60.235948,reviews,"{""mentions"": 4, ""unique_authors"": 4}",c85dbbf9-abba-46e4-a1bc-d30a20ba660a,Mooro's Events
6364,1569,quiet,47.465307,reviews,"{""mentions"": 2, ""unique_authors"": 2}",0cc10eb4-6935-425e-b177-b528c669b18f,Hopefield Animal Sanctuary & Visitor Centre
6365,1600,quiet,47.465307,reviews,"{""mentions"": 2, ""unique_authors"": 2}",0cc10eb4-6935-425e-b177-b528c669b18f,The Bank Chorleywood
6366,1604,lively,47.465307,reviews,"{""mentions"": 2, ""unique_authors"": 2}",c85dbbf9-abba-46e4-a1bc-d30a20ba660a,Nonno's Pizza (Chorleywood)


## 3. Build/simulate user actions + taste profiles


In [26]:
user_actions, synthetic = ensure_user_actions(paths, locations, location_tags, allow_synthetic=config.synthetic_users)
print(f"Loaded {len(user_actions)} user actions | synthetic={synthetic}")
user_actions


Loaded 36 user actions | synthetic=True


Unnamed: 0,user_id,place_id,action,created_at
0,demo_date_night,ChIJNSXdCgBT30cRJCJlCeqYZq4,like,2025-10-25T17:13:37.430094+00:00
1,demo_date_night,ChIJMYLkfI7gdUgRFesJVMX5PPo,detail_view,2025-11-13T17:13:37.430208+00:00
2,demo_date_night,ChIJj-4uTIxN30cRNFTyf69vF2c,like,2025-12-04T17:13:37.430253+00:00
3,demo_date_night,ChIJi1GLWQXhdUgRdZKBQ0UZ4oQ,detail_view,2025-12-14T17:13:37.430293+00:00
4,demo_date_night,ChIJy0c3bIXYdUgRojTzdhXE09Y,like,2025-10-19T17:13:37.430328+00:00
5,demo_date_night,ChIJDziemhAHdkgRPgRx1c7zC-8,save,2025-10-13T17:13:37.430360+00:00
6,demo_date_night,ChIJleBWrQULdkgRstOAcogkSFc,like,2025-11-07T17:13:37.430392+00:00
7,demo_date_night,ChIJ0X6uEBELdkgRlWlZh5uIRX8,detail_view,2025-11-19T17:13:37.430422+00:00
8,demo_date_night,ChIJ5ekJCS5xdkgRUbU1cErJ11g,like,2025-11-16T17:13:37.430452+00:00
9,demo_date_night,ChIJUyVgzv1zdkgR08_8R-kLGJ0,like,2025-10-09T17:13:37.430482+00:00


In [27]:
user_tags, user_history = build_user_tag_affinities(user_actions, location_tags, locations)
print(f"Computed {len(user_tags)} user-tag affinities across {user_tags['user_id'].nunique()} users")
user_tags.head(10)


Computed 38 user-tag affinities across 3 users


  normalized = agg.groupby("user_id", group_keys=False).apply(_normalize)


Unnamed: 0,user_id,tag_id,tag_text,score,metadata
0,demo_date_night,d0ae158d-cedd-4f29-9c03-6ebfe2c0a76c,italian,100.0,"{""raw_score"": 6.166226187950931}"
1,demo_date_night,51eec92e-5a7d-4b2f-a96c-dc364b1b253a,restaurant,81.521739,"{""raw_score"": 5.0268148271339115}"
2,demo_date_night,62e37b09-992b-4efc-86e6-5dad073a1b6e,takeaway,79.523404,"{""raw_score"": 4.9035929937585205}"
3,demo_date_night,ec1bf490-469c-410c-97e7-6034a75ce9e2,sunday_open,66.165839,"{""raw_score"": 4.079935289115969}"
4,demo_date_night,eaa9d86e-1f30-4198-b4c8-bed603ce5d3a,open_late,57.568438,"{""raw_score"": 3.54980010807192}"
5,demo_date_night,0fb97c6b-cbc5-48fc-8540-2562e1f78219,bar,8.309101,"{""raw_score"": 0.5123579813829471}"
6,demo_date_night,cca8f6d9-43ab-43ef-a99a-bed1518afc16,great_value,7.600534,"{""raw_score"": 0.4686661486771203}"
7,demo_date_night,71cb7a58-2981-4cf1-bd83-6dfed1c50826,open_early,6.650468,"{""raw_score"": 0.41008288009248023}"
8,demo_group_hang,51eec92e-5a7d-4b2f-a96c-dc364b1b253a,restaurant,100.0,"{""raw_score"": 5.18338986804027}"
9,demo_group_hang,ec1bf490-469c-410c-97e7-6034a75ce9e2,sunday_open,83.138164,"{""raw_score"": 4.3093751648727}"


## 4. Generate recommendation scores


In [28]:
recs = build_recommendations(locations, user_tags, location_tags, user_history, user_actions, config)
print(f"Generated {len(recs)} rec rows")


Generated 75 rec rows


In [29]:
def show_user_recs(user_id, n=10):
    subset = recs[recs['user_id'] == user_id].merge(
        locations[['location_id','name','vicinity','cuisine_primary','rating','user_ratings_total']],
        on='location_id', how='left'
    ).head(n)
    return subset[['rank','score','name','cuisine_primary','rating','user_ratings_total','reason']]

unique_users = recs['user_id'].unique().tolist()
unique_users[:3], len(unique_users)


(['demo_date_night', 'demo_group_hang', 'demo_vegan'], 3)

In [30]:
# Example: inspect top recommendations for the first user
if len(recs):
    example_user = recs['user_id'].iloc[0]
show_user_recs(example_user, n=10)


Unnamed: 0,rank,score,name,cuisine_primary,rating,user_ratings_total,reason
0,1,2.041038,Nonno's Pizza (Chorleywood),italian,4.5,392.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
1,2,2.038603,Domino's Pizza - Chigwell,italian,4.3,57.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
2,3,2.024309,Just Pizza Rickmansworth,italian,4.0,1.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
3,4,2.024218,Pizza Go Go,italian,4.1,96.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
4,5,2.021643,Pizza King Kebab House,italian,3.9,494.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
5,6,2.020989,Mascalzone,italian,4.3,1194.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
6,7,2.019286,County Fried Chicken & pizza & Burgers,italian,4.0,102.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
7,8,2.017219,Domino's Pizza - Brentwood,italian,4.0,96.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
8,9,2.016788,Tops Pizza,italian,4.0,295.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
9,10,2.014908,Farmhouse pizza Edgware,italian,3.9,330.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."


In [40]:
recs

Unnamed: 0,user_id,location_id,rank,score,taste_score,trend_score,hidden_gem_score,quality_score,reason
0,demo_date_night,1604,1,2.041038,3.619639,0.666456,0.000000,0.875,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
1,demo_date_night,1015,2,2.038603,3.557321,0.452995,0.341217,0.825,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
2,demo_date_night,1134,3,2.024309,3.557321,0.077330,0.607745,0.750,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
3,demo_date_night,1075,4,2.024218,3.618125,0.510369,0.111752,0.775,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
4,demo_date_night,815,5,2.021643,3.618125,0.692199,0.000000,0.725,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
...,...,...,...,...,...,...,...,...,...
70,demo_vegan,553,21,1.669639,2.904970,0.697696,0.000000,0.750,"{""taste_tags"": [{""tag"": ""restaurant"", ""score"":..."
71,demo_vegan,1021,22,1.668020,2.904970,0.547247,0.104740,0.750,"{""taste_tags"": [{""tag"": ""cafe"", ""score"": 0.75}..."
72,demo_vegan,512,23,1.666720,2.904970,0.628235,0.000000,0.800,"{""taste_tags"": [{""tag"": ""restaurant"", ""score"":..."
73,demo_vegan,688,24,1.666282,2.956448,0.578720,0.000000,0.675,"{""taste_tags"": [{""tag"": ""restaurant"", ""score"":..."


## 5. Persist notebook artifacts


In [31]:
# locations.to_csv(OUTPUT_DIR / 'locations.csv', index=False)
# location_tags.to_csv(OUTPUT_DIR / 'location_tags.csv', index=False)
# user_tags.to_csv(OUTPUT_DIR / 'user_tag_affinities.csv', index=False)
# recs.to_csv(OUTPUT_DIR / 'user_recommendations.csv', index=False)
# metadata = {
#     'city': CITY_NAME,
#     'n_locations': int(len(locations)),
#     'n_tags': int(len(tags_df)),
#     'n_location_tags': int(len(location_tags)),
#     'n_users': int(user_tags['user_id'].nunique()) if not user_tags.empty else 0,
#     'n_recommendations': int(len(recs)),
#     'synthetic_user_actions': bool(synthetic),
# }
# (OUTPUT_DIR / 'metadata.json').write_text(json.dumps(metadata, indent=2))
# metadata


## Tag Statistics from Supabase

In [32]:
# Display tag statistics from Supabase
print(f"Total tags in database: {len(tags_df)}")
print(f"\nTags by type:")
print(tags_df['tag_type'].value_counts())

# Show sample tags
print(f"\nSample tags:")
tags_df.head(10)

Total tags in database: 104

Tags by type:
tag_type
vibe                   51
cuisine                26
dietary_requirement    14
occasion                4
schedule                3
drinks                  2
value                   2
category                2
Name: count, dtype: int64

Sample tags:


Unnamed: 0,tag_id,text,prompt_description,tag_type,Colour
0,7f789b3e-ae55-4ac1-83fb-5ba3c785d793,casual,"Restaurant has relaxed, informal atmosphere. C...",vibe,
1,0d07b094-0ffd-4213-a4a8-057106511d14,indian,"Restaurant serves Indian cuisine with curries,...",cuisine,#FF6B35
2,1530fc9e-61cb-4fbc-add5-e84ac1d295eb,southern,Restaurant serves Southern American cuisine li...,cuisine,#D4A574
3,1afd3985-40d9-473d-9dc4-cf4e5c2c814f,mediterranean,Restaurant serves Mediterranean cuisine from r...,cuisine,#2D5016
4,229bd2f8-07d3-4058-a897-3cd0757e16a5,middle-eastern,Restaurant serves Middle Eastern cuisine like ...,cuisine,#D4A574
5,80b705b8-c158-4703-b775-b5c00f4fd622,small-plates,Restaurant focuses on small portions meant for...,vibe,
6,15392ae4-7633-476b-a511-ae23f8dc68c3,gastropub,Restaurant combines pub atmosphere with elevat...,cuisine,#8B4513
7,0a8f1a21-14e7-4081-b05a-11ae1cde349a,wine-bar,Restaurant emphasizes wine selection and wine ...,vibe,#9B59B6
8,0cc10eb4-6935-425e-b177-b528c669b18f,quiet,"Restaurant provides peaceful, calm dining envi...",vibe,#34495E
9,06167977-6dff-4001-b28d-0e2ad024acdc,buffet,Restaurant offers buffet-style service. All-yo...,vibe,#F39C12


In [33]:
# Save all tags to CSV for reference
tags_df.to_csv(OUTPUT_DIR / "tags.csv", index=False)
print(f"✓ Saved {len(tags_df)} tags to {OUTPUT_DIR / 'tags.csv'}")

✓ Saved 104 tags to ../../output/pinit_notebook/tags.csv


## Supabase Connection Test

In [34]:
# Test Supabase connection
db = get_supabase_service()

print("Testing Supabase connection...")
try:
    test_tags = db.get_all_tags(limit=5)
    print(f"✓ Connected to Supabase successfully")
    print(f"✓ Retrieved {len(test_tags)} sample tags")
    
    # Show sample tag
    if test_tags:
        sample = test_tags[0]
        print(f"\nSample tag:")
        print(f"  Text: {sample.get('text')}")
        print(f"  ID: {sample.get('tag_id')}")
        print(f"  Type: {sample.get('tag_type')}")
except Exception as e:
    print(f"✗ Connection failed: {e}")


Testing Supabase connection...
✓ Connected to Supabase successfully
✓ Retrieved 5 sample tags

Sample tag:
  Text: casual
  ID: 7f789b3e-ae55-4ac1-83fb-5ba3c785d793
  Type: vibe


## Format location_tags for Supabase upload

In [35]:
# Format location_tags for Supabase upload
print(f"Original location_tags shape: {location_tags.shape}")
print(f"Columns: {location_tags.columns.tolist()}")

# The location_tags from build_location_tags() already has tag_id from Supabase
# We just need to format it properly
location_tags_supabase = location_tags.copy()

# Ensure all required columns exist
required_cols = ['location_id', 'tag_id', 'score', 'source', 'metadata']
missing = [col for col in required_cols if col not in location_tags_supabase.columns]

if missing:
    print(f"Missing columns: {missing}")
else:
    print("✓ All required columns present")

# Check for any null tag_ids
null_tags = location_tags_supabase[location_tags_supabase['tag_id'].isnull()]
if len(null_tags) > 0:
    print(f"\n⚠ Warning: {len(null_tags)} location_tags have null tag_id")
    if 'tag_text' in location_tags_supabase.columns:
        print(f"Unmapped tags: {sorted(null_tags['tag_text'].unique()[:10])}")
    # Remove null tag_ids
    location_tags_supabase = location_tags_supabase[location_tags_supabase['tag_id'].notna()]
else:
    print("✓ All tags have valid tag_id")

# Prepare final format
final_columns = ['location_id', 'tag_id', 'score', 'source', 'metadata']
location_tags_final = location_tags_supabase[final_columns].copy()

print(f"\nFinal location_tags shape: {location_tags_final.shape}")
print(f"  Unique locations: {location_tags_final['location_id'].nunique():,}")
print(f"  Unique tags: {location_tags_final['tag_id'].nunique():,}")
print(f"\nSample rows:")
location_tags_final.head(10)

Original location_tags shape: (6368, 6)
Columns: ['location_id', 'tag_text', 'score', 'source', 'metadata', 'tag_id']
✓ All required columns present
✓ All tags have valid tag_id

Final location_tags shape: (6368, 5)
  Unique locations: 1,608
  Unique tags: 33

Sample rows:


Unnamed: 0,location_id,tag_id,score,source,metadata
0,1,d0ae158d-cedd-4f29-9c03-6ebfe2c0a76c,92.0,cuisine_detected,"{""field"": ""cuisine_primary""}"
1,1,51eec92e-5a7d-4b2f-a96c-dc364b1b253a,75.0,google_types,"{""type"": ""restaurant""}"
2,2,51eec92e-5a7d-4b2f-a96c-dc364b1b253a,75.0,google_types,"{""type"": ""restaurant""}"
3,2,eaa9d86e-1f30-4198-b4c8-bed603ce5d3a,70.0,opening_hours,{}
4,2,ec1bf490-469c-410c-97e7-6034a75ce9e2,65.0,opening_hours,{}
5,3,0d07b094-0ffd-4213-a4a8-057106511d14,92.0,cuisine_detected,"{""field"": ""cuisine_primary""}"
6,3,62e37b09-992b-4efc-86e6-5dad073a1b6e,75.0,google_types,"{""type"": ""meal_takeaway""}"
7,3,51eec92e-5a7d-4b2f-a96c-dc364b1b253a,75.0,google_types,"{""type"": ""restaurant""}"
8,3,ec1bf490-469c-410c-97e7-6034a75ce9e2,65.0,opening_hours,{}
9,4,51eec92e-5a7d-4b2f-a96c-dc364b1b253a,75.0,google_types,"{""type"": ""restaurant""}"


In [None]:
# # Save to CSV for Supabase upload
# output_path = OUTPUT_DIR / "location_tags.csv"
# location_tags_final.to_csv(output_path, index=False)
# print(f"✓ Saved location_tags to: {output_path}")
# print(f"  Total records: {len(location_tags_final):,}")
# print(f"  Unique locations: {location_tags_final['location_id'].nunique():,}")
# print(f"  Unique tags: {location_tags_final['tag_id'].nunique():,}")

✓ Saved location_tags to: ../../output/pinit_notebook/location_tags.csv
  Total records: 6,362
  Unique locations: 1,608
  Unique tags: 31


## Optional: Upload location_tags directly to Supabase

In [None]:
# # Upload location_tags to Supabase
# # WARNING: This will insert location_tags into Supabase

# UPLOAD_TO_SUPABASE = False  # Set to True to enable upload

# if UPLOAD_TO_SUPABASE:
#     db = get_supabase_service()
    
#     # Prepare records for upload
#     records = location_tags_final.to_dict('records')
    
#     # Parse metadata JSON strings back to dicts for JSONB storage
#     for record in records:
#         if isinstance(record.get('metadata'), str):
#             try:
#                 record['metadata'] = json.loads(record['metadata'])
#             except:
#                 record['metadata'] = {}
    
#     print(f"Uploading {len(records):,} location_tags to Supabase...")
    
#     # Upload in batches
#     batch_size = 500
#     total_inserted = 0
    
#     for i in range(0, len(records), batch_size):
#         batch = records[i:i + batch_size]
#         try:
#             result = db.client.table("location_tags").insert(batch).execute()
#             total_inserted += len(result.data)
#             print(f"  Batch {i//batch_size + 1}: Inserted {len(result.data)} records")
#         except Exception as e:
#             print(f"  Batch {i//batch_size + 1}: Error - {e}")
#             break
    
#     print(f"\n✓ Total inserted: {total_inserted:,} location_tags")
# else:
#     print("Upload disabled. Set UPLOAD_TO_SUPABASE = True to upload.")
#     print(f"CSV file ready at: {OUTPUT_DIR / 'location_tags.csv'}")

Uploading 6,362 location_tags to Supabase...
  Batch 1: Inserted 1000 records
  Batch 2: Inserted 1000 records
  Batch 3: Inserted 1000 records
  Batch 4: Inserted 1000 records
  Batch 5: Inserted 1000 records
  Batch 6: Inserted 1000 records
  Batch 7: Inserted 362 records

✓ Total inserted: 6,362 location_tags


In [41]:
# list out locations around (51.513, -0.124) within 2km radius
from recommendation.proximal_recommendation import build_proximal_recommendations, ProximalConfig

config = ProximalConfig()
config.radius_km = 2.0

proximal_recs = build_proximal_recommendations(
    "demo_date_night",
    51.513,
    -0.124,
    locations,
    user_tags, 
    location_tags
)

Found 1 locations within 2.0 km radius.
Found 4 locations within 6.0 km radius.
Found 4 locations within 6.0 km radius.
Found 1038 locations within 18.0 km radius.
Found 1038 locations within 18.0 km radius.
