# Pinit Recommendation Sandbox
Interactive notebook to experiment with each stage of the local Pinit recommendation pipeline without Supabase.


## Workflow Overview
1. Configure paths and helpers.
2. Load the canonical location inventory from CSVs.
3. Derive taxonomy + location tags (deterministic + reviews).
4. Build/synthesize user actions and compute taste profiles.
5. Generate recommendation scores + inspect outputs.
6. Persist artifacts for downstream experiments.


In [1]:
from pathlib import Path
import json
import pandas as pd

from config import PipelineConfig, PipelinePaths, ReviewTagConfig
from tag_taxonomy import tag_dataframe
from tagging import load_locations, load_reviews, build_location_tags
from user_profiles import ensure_user_actions, build_user_tag_affinities
from recommendation import build_recommendations


### Configure paths + output folders


In [2]:
DATA_DIR = Path("../../data/raw")
CITY_NAME = "london"
OUTPUT_DIR = Path("../../output/pinit_notebook")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

paths = PipelinePaths(data_dir=DATA_DIR, city_name=CITY_NAME, output_dir=OUTPUT_DIR)
review_cfg = ReviewTagConfig(min_unique_authors=2, min_mentions=3)
config = PipelineConfig(paths=paths, review_tagging=review_cfg, top_k_per_user=25)
config


PipelineConfig(paths=PipelinePaths(data_dir=PosixPath('../../data/raw'), city_name='london', output_dir=PosixPath('../../output/pinit_notebook'), user_actions_csv=None, user_friends_csv=None, bubble_locations_csv=None), review_tagging=ReviewTagConfig(min_unique_authors=2, min_mentions=3, english_only=True, score_floor=20.0, score_cap=100.0), recommendation_weights=RecommendationWeights(taste=0.5, trend_app=0.15, hidden_gems=0.2, quality=0.15, friend=0.0, bubble=0.0), synthetic_users=True, top_k_per_user=25)

## 1. Load canonical location inventory


In [3]:
locations = load_locations(paths)
print(f"Loaded {len(locations):,} locations")
locations.head()


Loaded 1,608 locations


Unnamed: 0,place_id,name,types,rating,user_ratings_total,price_level,lat,lon,vicinity,business_status,...,is_open_late,is_open_early,is_sunday_open,price_bucket,log_reviews,popularity_score,expected_popularity,residual_popularity,hidden_gem_score,quality_score
0,ChIJNSXdCgBT30cRJCJlCeqYZq4,Piccolo's Pizza,"establishment,food,point_of_interest,restaurant",,0.0,,51.28347,0.16984,"Riverhead, Sevenoaks",OPERATIONAL,...,False,False,False,unknown,0.0,0.0,4.028667,-4.028667,0.734039,0.811178
1,ChIJB7xIORNS30cRSpYmpEaUvvM,Miller & Carter Sevenoaks,"establishment,food,point_of_interest,restaurant",4.5,1895.0,2.0,51.283125,0.170598,"Amherst Hill, Riverhead, Sevenoaks",OPERATIONAL,...,True,False,True,mid,7.547502,0.842022,5.480125,2.067377,0.0,0.875
2,ChIJZa00u2xS30cRYBr8E5bIYdM,Sun On,"establishment,food,meal_takeaway,point_of_inte...",3.6,40.0,,51.284885,0.169742,"27 London Rd, Riverhead, Sevenoaks",OPERATIONAL,...,False,False,True,unknown,3.713572,0.414297,4.329621,-0.616049,0.112247,0.65
3,ChIJTW54IChT30cR_lIlYS9uZZI,Linden Catering,"establishment,food,point_of_interest,restaurant",,0.0,,51.28908,0.168472,"Riverpoint house, London Rd, Sevenoaks",OPERATIONAL,...,False,False,False,unknown,0.0,0.0,2.186523,-2.186523,0.398393,0.811178
4,ChIJ7_uwBeRT30cRt_w4ylhWfF4,Kanosh Lebanese Street Food,"establishment,food,meal_takeaway,point_of_inte...",5.0,4.0,,51.289309,0.168325,"London Rd, Sevenoaks",CLOSED_TEMPORARILY,...,False,False,False,unknown,1.609438,0.179554,4.053022,-2.443584,0.445231,1.0


In [4]:
locations[['location_id','name','cuisine_primary','price_level','rating','user_ratings_total','is_open_late','is_open_early']].head(10)


Unnamed: 0,location_id,name,cuisine_primary,price_level,rating,user_ratings_total,is_open_late,is_open_early
0,1,Piccolo's Pizza,italian,,,0.0,False,False
1,2,Miller & Carter Sevenoaks,unknown,2.0,4.5,1895.0,True,False
2,3,Sun On,indian,,3.6,40.0,False,False
3,4,Linden Catering,unknown,,,0.0,False,False
4,5,Kanosh Lebanese Street Food,middle_eastern,,5.0,4.0,False,False
5,6,E K B GOURMET BURGER,american,,3.0,3.0,True,False
6,7,M&S Food To Go,unknown,2.0,2.8,98.0,False,True
7,8,The Bell,pub,2.0,4.4,877.0,True,False
8,9,Trattoria Da Carlo,italian,,4.5,69.0,False,False
9,10,Zen Garden Chinese Restaurant,chinese,2.0,4.2,260.0,False,False


In [28]:
place_to_location = locations.set_index('google_place_id')['location_id'].to_dict()

reviews = load_reviews(paths, place_to_location)
print(f"Loaded {len(reviews):,} reviews")

place_to_location

Loaded 7,240 reviews


{'ChIJNSXdCgBT30cRJCJlCeqYZq4': 1,
 'ChIJB7xIORNS30cRSpYmpEaUvvM': 2,
 'ChIJZa00u2xS30cRYBr8E5bIYdM': 3,
 'ChIJTW54IChT30cR_lIlYS9uZZI': 4,
 'ChIJ7_uwBeRT30cRt_w4ylhWfF4': 5,
 'ChIJHbX02-1T30cRDO1efNmH4nU': 6,
 'ChIJAcqe0zzZdUgR3pDe79_xkh0': 7,
 'ChIJ_bgO35PgdUgR3QVkvi-fwnc': 8,
 'ChIJMYLkfI7gdUgRFesJVMX5PPo': 9,
 'ChIJcz21cuXgdUgROfpWnFBuIoA': 10,
 'ChIJ1R6NmV__dUgROJgVsZpFaR4': 11,
 'ChIJj-4uTIxN30cRNFTyf69vF2c': 12,
 'ChIJCzhCNNJN30cR4kIUhhYJCvA': 13,
 'ChIJg55-TZnhdUgRB2wihE4l7lE': 14,
 'ChIJi1GLWQXhdUgRdZKBQ0UZ4oQ': 15,
 'ChIJ9Zh7jtdT30cRlLD0oyKtUTQ': 16,
 'ChIJxV42umdS30cRnImpcY0Qacc': 17,
 'ChIJ0ZC0oit2dkgRv9sDyGigQ1Y': 18,
 'ChIJy0c3bIXYdUgRojTzdhXE09Y': 19,
 'ChIJiTJbt8Wq2EcRXgu3uGInfTM': 20,
 'ChIJm2WXSsGq2EcRGlY-fs6mu7s': 21,
 'ChIJB1SV1rh1dkgR0uwwdRa90KA': 22,
 'ChIJl0L28AB1dkgRdwkc7DrdUjE': 23,
 'ChIJ2V0PyugKdkgRdgvkKB8LRo0': 24,
 'ChIJf7oNL-EKdkgRZScn5_R4pUg': 25,
 'ChIJS9H60PEKdkgRfXwMTP7hkTo': 26,
 'ChIJuSEJusoLdkgRKhVUhl5TOrg': 27,
 'ChIJabRCmzgKdkgRHP9oehOYowE': 28,
 

## 2. Build taxonomy + location tags


In [5]:
tags_df = tag_dataframe()
reviews_df = load_reviews(paths, locations.set_index('google_place_id')['location_id'].to_dict())
location_tags = build_location_tags(locations, reviews_df, config.review_tagging)
print(f"Tags defined: {len(tags_df)} | Tagged pairs: {len(location_tags):,}")


Tags defined: 46 | Tagged pairs: 6,465


In [None]:
location_tags_with_names = location_tags.merge(locations[["location_id", "name"]], on="location_id")




Unnamed: 0,location_id,tag_text,score,source,metadata,tag_id,name
6212,1608,takeaway,75.0,google_types,"{""type"": ""meal_delivery""}",46,PAYA Cuisine (Rickmansworth)
6217,1608,sunday_open,65.0,opening_hours,{},40,PAYA Cuisine (Rickmansworth)
6216,1608,open_late,70.0,opening_hours,{},38,PAYA Cuisine (Rickmansworth)
6215,1608,great_value,80.0,price_level,"{""price_level"": 1.0}",41,PAYA Cuisine (Rickmansworth)
6214,1608,restaurant,75.0,google_types,"{""type"": ""restaurant""}",43,PAYA Cuisine (Rickmansworth)
...,...,...,...,...,...,...,...
2,2,restaurant,75.0,google_types,"{""type"": ""restaurant""}",43,Miller & Carter Sevenoaks
4,2,sunday_open,65.0,opening_hours,{},40,Miller & Carter Sevenoaks
3,2,open_late,70.0,opening_hours,{},38,Miller & Carter Sevenoaks
1,1,restaurant,75.0,google_types,"{""type"": ""restaurant""}",43,Piccolo's Pizza


## 3. Build/simulate user actions + taste profiles


In [None]:
user_actions, synthetic = ensure_user_actions(paths, locations, location_tags, allow_synthetic=config.synthetic_users)
print(f"Loaded {len(user_actions)} user actions | synthetic={synthetic}")
user_actions


Loaded 36 user actions | synthetic=True


Unnamed: 0,user_id,place_id,action,created_at
0,demo_date_night,ChIJNSXdCgBT30cRJCJlCeqYZq4,like,2025-10-20T13:35:53.608975+00:00
1,demo_date_night,ChIJMYLkfI7gdUgRFesJVMX5PPo,detail_view,2025-11-08T13:35:53.609283+00:00
2,demo_date_night,ChIJj-4uTIxN30cRNFTyf69vF2c,like,2025-11-29T13:35:53.609388+00:00
3,demo_date_night,ChIJi1GLWQXhdUgRdZKBQ0UZ4oQ,detail_view,2025-12-09T13:35:53.609459+00:00
4,demo_date_night,ChIJy0c3bIXYdUgRojTzdhXE09Y,like,2025-10-14T13:35:53.609512+00:00


In [8]:
user_tags, user_history = build_user_tag_affinities(user_actions, location_tags, locations)
print(f"Computed {len(user_tags)} user-tag affinities across {user_tags['user_id'].nunique()} users")
user_tags.head(10)


Computed 39 user-tag affinities across 3 users


  normalized = agg.groupby("user_id", group_keys=False).apply(_normalize)


Unnamed: 0,user_id,tag_id,tag_text,score,metadata
0,demo_date_night,1,italian,100.0,"{""raw_score"": 6.166221891842733}"
1,demo_date_night,43,restaurant,81.521739,"{""raw_score"": 5.026811324871793}"
2,demo_date_night,46,takeaway,79.523404,"{""raw_score"": 4.903589577348086}"
3,demo_date_night,40,sunday_open,66.165839,"{""raw_score"": 4.0799324465882725}"
4,demo_date_night,38,open_late,57.568438,"{""raw_score"": 3.5497976348940954}"
5,demo_date_night,45,bar,8.309101,"{""raw_score"": 0.5123576244141039}"
6,demo_date_night,41,great_value,7.600534,"{""raw_score"": 0.4686658221635675}"
7,demo_date_night,39,open_early,6.650468,"{""raw_score"": 0.4100825943931215}"
8,demo_group_hang,43,restaurant,100.0,"{""raw_score"": 5.183386258798874}"
9,demo_group_hang,40,sunday_open,83.138164,"{""raw_score"": 4.309372164212092}"


## 4. Generate recommendation scores


In [9]:
recs = build_recommendations(locations, user_tags, location_tags, user_history, user_actions, config)
print(f"Generated {len(recs)} rec rows")


Generated 75 rec rows


In [10]:
def show_user_recs(user_id, n=10):
    subset = recs[recs['user_id'] == user_id].merge(
        locations[['location_id','name','vicinity','cuisine_primary','rating','user_ratings_total']],
        on='location_id', how='left'
    ).head(n)
    return subset[['rank','score','name','cuisine_primary','rating','user_ratings_total','reason']]

unique_users = recs['user_id'].unique().tolist()
unique_users[:3], len(unique_users)


(['demo_date_night', 'demo_group_hang', 'demo_vegan'], 3)

In [19]:
# Example: inspect top recommendations for the first user
if len(recs):
    example_user = recs['user_id'].iloc[0]
show_user_recs(example_user, n=10)


Unnamed: 0,rank,score,name,cuisine_primary,rating,user_ratings_total,reason
0,1,2.041038,Nonno's Pizza (Chorleywood),italian,4.5,392.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
1,2,2.038603,Domino's Pizza - Chigwell,italian,4.3,57.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
2,3,2.024309,Just Pizza Rickmansworth,italian,4.0,1.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
3,4,2.024218,Pizza Go Go,italian,4.1,96.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
4,5,2.021643,Pizza King Kebab House,italian,3.9,494.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
5,6,2.020989,Mascalzone,italian,4.3,1194.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
6,7,2.019286,County Fried Chicken & pizza & Burgers,italian,4.0,102.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
7,8,2.017219,Domino's Pizza - Brentwood,italian,4.0,96.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
8,9,2.016788,Tops Pizza,italian,4.0,295.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."
9,10,2.014908,Farmhouse pizza Edgware,italian,3.9,330.0,"{""taste_tags"": [{""tag"": ""italian"", ""score"": 0...."


## 5. Persist notebook artifacts


In [None]:
locations.to_csv(OUTPUT_DIR / 'locations.csv', index=False)
location_tags.to_csv(OUTPUT_DIR / 'location_tags.csv', index=False)
user_tags.to_csv(OUTPUT_DIR / 'user_tag_affinities.csv', index=False)
recs.to_csv(OUTPUT_DIR / 'user_recommendations.csv', index=False)
metadata = {
    'city': CITY_NAME,
    'n_locations': int(len(locations)),
    'n_tags': int(len(tags_df)),
    'n_location_tags': int(len(location_tags)),
    'n_users': int(user_tags['user_id'].nunique()) if not user_tags.empty else 0,
    'n_recommendations': int(len(recs)),
    'synthetic_user_actions': bool(synthetic),
}
(OUTPUT_DIR / 'metadata.json').write_text(json.dumps(metadata, indent=2))
metadata
