<a href="https://colab.research.google.com/github/EladMoshe98/testrepo/blob/main/AmenityRecognition_AirBNB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Amenity Recognition**


**main**

In [None]:
#topN = top_visual_amenities_from_inside_airbnb_csv("https://data.insideairbnb.com/united-kingdom/england/greater-manchester/2025-06-24/data/listings.csv.gz")
#for i, (amenity, count) in enumerate(topN, 1):
#  print(f"{i:2d}. {amenity} ({count})")

 1. wifi router (7059)
 2. smoke alarm (6913)
 3. Kitchen (6679)
 4. parking (6303)
 5. TV (6107)
 6. washer (6052)
 7. heating (6018)
 8. hot water (5592)
 9. refrigerator (5425)
10. ironing board (5250)
11. hangers (5199)
12. dishes and silverware (5140)
13. dryer (5120)
14. bed linens (5110)
15. microwave (4959)
16. kettle (4810)
17. oven (4718)
18. bathroom utensils (4588)
19. toaster (4267)
20. stove (4196)
21. dedicated workspace (4066)
22. freezer (3989)
23. dining table (3749)
24. cleaning products (3536)
25. first aid kit (3508)
26. wine glasses (3451)
27. clothing storage (3351)
28. bathtub (3350)
29. fire extinguisher (3120)
30. coffee (2907)
31. lockbox (2849)
32. dishwasher (2394)
33. room-darkening shades (2135)
34. entrance (2115)
35. coffee maker (1942)
36. balcony (1416)
37. crib (1371)
38. elevator (1226)
39. backyard (1212)
40. outdoor dining area (1151)
41. laundromat (1126)
42. outdoor furniture (1094)
43. ethernet connection (903)
44. portable fans (808)
45. firep


**function 1**

input: "inside airbnb" CSV

output: Airbnb top N household Ameneties by hosts

This function cleans manually entered amenity data, converting free-form text into a standardized list of tangible, observable amenities (e.g., “oven,” “air conditioning,” “TV”). It also filters out invalid or non-physical entries provided by hosts, such as rules or descriptions like “lake view,” “free parking,” or other irrelevant phrases.






In [3]:
import ast
import re
from collections import Counter
from typing import Iterable, List, Tuple

import pandas as pd


def top_visual_amenities_from_inside_airbnb_csv(
    csv_path: str,
    top_n: int = 50,
    amenities_column: str = "amenities",
) -> List[Tuple[str, int]]:
    """
    Compute the top-N most common *photo-recognizable* amenities from an Inside Airbnb listings CSV.

    Parameters
    ----------
    csv_path : str
        Path to the Inside Airbnb listings CSV file.
    top_n : int, default=100
        Number of amenities to return.
    amenities_column : str, default="amenities"
        Column name containing stringified lists of amenities.

    Returns
    -------
    List[Tuple[str, int]]
        A list of (amenity, count) sorted by descending frequency. Amenity names are normalized.

    Design goals
    ------------
    1) Keep only tangible, visible amenities (things you can see in a photo).
       Examples kept: tv, kitchen, oven, stove, microwave, refrigerator, dishwasher, washer, dryer,
       air conditioning unit, radiator/heater, fireplace, pool, hot tub, sauna, balcony, patio/deck,
       garden, bbq grill, outdoor furniture, hammock, gym equipment, piano, pool table, ping pong table,
       crib, high chair, lockbox, parking, bike rack, smoke alarm, carbon monoxide alarm, fire extinguisher.

    2) Remove *qualifiers* that you cannot reliably infer from photos (free/paid/street/on-premises/etc.).
       Example: "Free street parking" -> "parking". "Paid parking off premises" -> "parking".

    3) Exclude non-amenities (rules/services/permissions/conditions):
       Examples: "the host will be present", "luggage dropoff allowed", "pets allowed",
       "long-term stays allowed", "events allowed", "smoking allowed", "no smoking",
       "breakfast included", "daily cleaning", "host greets you", etc.

    Notes about Inside Airbnb CSVs
    ------------------------------
    - The 'amenities' field typically looks like a Python list in string form:
      '["Wifi", "TV", "Kitchen", "Washer", "Paid parking off premises", "Host will be present"]'
    - We parse it with ast.literal_eval safely.
    """

    # ---------- 1) Load only the amenities column ----------
    df = pd.read_csv(csv_path, usecols=[amenities_column])

    # ---------- 2) Helpers: cleaning & classification rules ----------

    # (a) Words/phrases that indicate *rules/services/conditions* (non-amenities).
    # Keep this conservative; we only exclude items clearly not a physical amenity.
    NON_AMENITY_PATTERNS: Iterable[re.Pattern] = [
        re.compile(r"\b(host|hostess)\b.*\b(present|greets|greeter)\b", re.I),
        re.compile(r"\b(luggage|baggage)\b.*\b(drop[- ]?off|storage|allowed)\b", re.I),
        re.compile(r"\b(pets?|animals?)\b.*\b(allowed|not allowed|no)\b", re.I),
        re.compile(r"\b(long[- ]?term|extended)\b.*\b(stays?)\b", re.I),
        re.compile(r"\b(events?|parties?)\b.*\b(allowed|permitted|no|not allowed)\b", re.I),
        re.compile(r"\b(smoking)\b.*\b(allowed|permitted|no|not allowed)\b", re.I),
        re.compile(r"\b(breakfast|meals?|meal plan|room service)\b", re.I),
        re.compile(r"\b(daily|weekly|monthly)\b.*\b(cleaning|housekeeping)\b", re.I),
        re.compile(r"\b(check[- ]?in)\b.*\b(assistance|time|anytime)\b", re.I),

        # child-safety guards / covers
        re.compile(r"\btable\s+corner\s+guards?\b", re.I),
        re.compile(r"\boutlet\s+covers?\b", re.I),
        re.compile(r"\bwindow\s+guards?\b", re.I),

        # services / recommendations / access
        re.compile(r"\bbabysitter\s+recommendations?\b", re.I),
        re.compile(r"\bresort\s+access\b", re.I),
        re.compile(r"\bbuilding\s+staff\b", re.I),
        re.compile(r"\bski[-\s]?in\s*/\s*ski[-\s]?out\b", re.I),   # “ski-in/ski-out”, “ski in / ski out”
        re.compile(r"\bski[-\s]?in\s+ski[-\s]?out\b", re.I),       # “ski in ski out”

        # generic “X view” location attributes (e.g., lake view / mountain view / canal view)
        # space before 'view' avoids matching "overview/review"
        re.compile(r"\b[a-z][\w\s-]*\s+view\b", re.I),

        # location attribute
        re.compile(r"\bwaterfront\b", re.I),

        # access / entry, not a tangible amenity
        re.compile(r"\bkeypad\b", re.I),
        re.compile(r"\bself[-\s]?check[-\s]?in\b", re.I),

        # property/structure descriptors
        re.compile(r"\bsingle[-\s]level\s+home\b", re.I),

        # kitchen odds & ends / catch-alls (treat as too generic per your policy)
        re.compile(r"\bbaking\s+sheet\b", re.I),
        re.compile(r"\bessentials\b", re.I),
        re.compile(r"\bcooking\s+basics\b", re.I),

        # playground outside the unit (treat as non-unit amenity per your rule)
        re.compile(r"\boutdoor\s+playground\b", re.I),

        # We *do not* exclude "lockbox" (it is tangible). Only general check-in permissions/rules.
    ]

    # (b) Normalization: remove qualifiers we cannot trust from photos
    #    (kept generic so it works for many amenities, especially parking/pool/shared/etc).
    QUALIFIER_CLEANUPS: Iterable[re.Pattern] = [
        re.compile(r"\b(free|paid|complimentary|extra fee|fee|24/7|twenty[- ]four[- ]seven)\b", re.I),
        re.compile(r"\b(on[- ]?premises|off[- ]?premises|on[- ]?site|off[- ]?site|nearby|street|public|private|shared)\b", re.I),
        re.compile(r"\b(in[- ]?building|in the building|in unit|on floor|ground floor|rooftop)\b", re.I),
    ]

    # (c) Simple negative statements to *exclude* outright (e.g., "No parking").
    NEGATIVE_PATTERNS: Iterable[re.Pattern] = [
        re.compile(r"\b(no|not|without)\s+parking\b", re.I),
    ]

    # (d) Mapping from many raw variants -> a single, visual amenity label.
    #     Order matters: first match wins.
    NORMALIZATION_RULES: List[Tuple[re.Pattern, str]] = [
        # Media & living
        (re.compile(r"\b(smart\s*tv|television|tv)\b", re.I), "TV"),
        (re.compile(r"\b(projector)\b", re.I), "projector"),
        (re.compile(r"\b(sound\s*system|speakers?)\b", re.I), "sound system"),
        # Collapse all game console variants to a single label
        (re.compile(
            r"\b(?:"
            r"game\s*console(?:\s*:\s*\w+)?|"    # "game console", "game console: ps4"
            r"playstation(?:\s*\d+)?|"           # "playstation", "playstation 5"
            r"ps[1-5]|"                          # "ps2", "ps5"
            r"xbox(?:\s*(?:one|360|series\s*(?:x|s))?)|"  # "xbox", "xbox one", "xbox series x"
            r"nintendo\s*switch|"
            r"wii(?:\s*u)?|"
            r"gamecube"
            r")\b",
            re.I,),"game console",
        ),

        # Kitchen core
        (re.compile(r"\b(kitchen)\b", re.I), "Kitchen"),
        (re.compile(r"\b(refrigerator|fridge)\b", re.I), "refrigerator"),
        (re.compile(r"\b(dishwasher)\b", re.I), "dishwasher"),
        (re.compile(r"\b(oven)\b", re.I), "oven"),
        (re.compile(r"\b(stove|cooktop|range)\b", re.I), "stove"),
        (re.compile(r"\b(microwave)\b", re.I), "microwave"),
        (re.compile(r"\b(coffee\s*maker|espresso|nespresso|keurig)\b", re.I), "coffee maker"),
        (re.compile(r"\b(kettle|electric\s*kettle)\b", re.I), "kettle"),
        (re.compile(r"\b(toaster)\b", re.I), "toaster"),

        # Laundry
        (re.compile(r"\b(washer|washing\s*machine)\b", re.I), "washer"),
        (re.compile(r"\b(dryer|tumble\s*dryer)\b", re.I), "dryer"),
        (re.compile(r"\b(iron|ironing\s*board)\b", re.I), "ironing board"),

        # Climate & safety (tangible devices)
        (re.compile(r"\b(air\s*conditioning|a/c|ac)\b", re.I), "air conditioning unit"),
        (re.compile(r"\b(radiator|space\s*heater|heater)\b", re.I), "heater"),
        (re.compile(r"\b(fireplace)\b", re.I), "fireplace"),
        (re.compile(r"\b(smoke\s*alarm|smoke\s*detector)\b", re.I), "smoke alarm"),
        (re.compile(r"\b(carbon\s*monoxide\s*alarm|carbon\s*monoxide\s*detector)\b", re.I), "smoke alarm"),
        (re.compile(r"\b(fire\s*extinguisher)\b", re.I), "fire extinguisher"),
        (re.compile(r"\b(first\s*aid\s*kit)\b", re.I), "first aid kit"),

        # Outdoors & recreation
        (re.compile(r"\b(pool|swimming\s*pool)\b", re.I), "pool"),
        (re.compile(r"\b(hot\s*tub|jacuzzi)\b", re.I), "hot tub"),
        (re.compile(r"\b(sauna)\b", re.I), "sauna"),
        (re.compile(r"\b(balcony)\b", re.I), "balcony"),
        (re.compile(r"\b(terrace|patio|deck)\b", re.I), "patio"),
        (re.compile(r"\b(garden|yard)\b", re.I), "garden"),
        (re.compile(r"\b(bbq|barbe?cue|grill)\b", re.I), "bbq grill"),
        (re.compile(r"\b(outdoor\s*furniture|sun\s*lounger|loungers?)\b", re.I), "outdoor furniture"),
        (re.compile(r"\b(hammock)\b", re.I), "hammock"),
        (re.compile(r"\b(bike\s*rack)\b", re.I), "bike rack"),

        # Games / extras
        (re.compile(r"\b(pool\s*table|billiards?)\b", re.I), "pool table"),
        (re.compile(r"\b(table\s*tennis|ping\s*pong)\b", re.I), "ping pong table"),
        (re.compile(r"\b(arcade\s*machine|arcade)\b", re.I), "arcade machine"),
        (re.compile(r"\b(board\s*games?)\b", re.I), "board games"),
        (re.compile(r"\b(piano)\b", re.I), "piano"),
        (re.compile(r"\b(gym|fitness(\s*center|\s*room)?|fitness\s*equipment)\b", re.I), "gym equipment"),

        # Family
        (re.compile(r"\b(crib|cot|travel\s*cot)\b", re.I), "crib"),
        (re.compile(r"\b(high\s*chair)\b", re.I), "high chair"),

        # Access / check-in devices (tangible)
        (re.compile(r"\b(lockbox)\b", re.I), "lockbox"),
        (re.compile(r"\b(keypad)\b", re.I), "keypad"),

        # Parking (normalize ANY parking variant -> 'parking' UNLESS explicitly negated)
        (re.compile(r"\b(parking|garage|carport)\b", re.I), "parking"),

        # Bathroom items → "bathroom utensils"
        (re.compile(r"\b(dove|pantene)\s+(conditioner|shampoo)\b|\b(shower\s+gel|body\s+soap|conditioner|shampoo|soap)\b",re.I),"bathroom utensils",),

        # Game console → "playstation"
        (re.compile(r"\b(game\s*console:\s*)?ps[45]\b", re.I),"playstation",),

        # HDTV variants (with Netflix, sized, plain) → "TV"
        (re.compile(r"\b(?:\d{2,3}\s*(?:\"|inch|in)\s*)?h?d?tv(?:\s+with\s+netflix)?\b",re.I,),"TV",),

        # Kitchenette → Kitchen
        (re.compile(r"\bkitchenette\b", re.I), "Kitchen"),

        # Wi-Fi / Pocket Wi-Fi → wifi router
        (re.compile(r"\bpocket\s*wi[- ]?fi\b", re.I), "wifi router"),
        (re.compile(r"\bwi[- ]?fi\b", re.I), "wifi router"),

        # Clothing storage variants → clothing storage
        (re.compile(r"\bclothing\s+storage:\s*(dresser|wardrobe)\b", re.I),"clothing storage",),

        # Radiant heating → heating
        (re.compile(r"\bradiant\s+heating\b", re.I), "heating"),

        # kitchen variants
        (re.compile(r"^kitchen$", re.I), "Kitchen"),
        (re.compile(r"^Kitchen$", re.I), "Kitchen"),

        # tv variants
        (re.compile(r"^tv$", re.I), "TV"),
        (re.compile(r"^TV$", re.I), "TV"),

        # heating variants
        (re.compile(r"^(heating|central heating|heater)$", re.I), "heating"),

        # clothing storage variants
        (re.compile(r"^clothing storage(:\s*closet)?$", re.I), "clothing storage"),

        # exercise equipment variants
        (re.compile(r"^(exercise equipment|gym equipment|exercise equipment:\s*(weights|treadmill))$",re.I,),"exercise equipment",),

        # bikes variants
        (re.compile(r"^(bikes|children’s bikes)$", re.I), "bikes"),
    ]

    def looks_like_non_amenity(s: str) -> bool:
        return any(p.search(s) for p in NON_AMENITY_PATTERNS)

    def explicitly_negated(s: str) -> bool:
        return any(p.search(s) for p in NEGATIVE_PATTERNS)

    def strip_qualifiers(s: str) -> str:
        """Remove adjectives/qualifiers we cannot infer from photos."""
        cleaned = s
        # Remove parentheticals and brackets: "(...)" or "[...]"
        cleaned = re.sub(r"[\(\[\{].*?[\)\]\}]", "", cleaned)
        for pat in QUALIFIER_CLEANUPS:
            cleaned = pat.sub(" ", cleaned)
        # Collapse whitespace, strip quotes and punctuation at ends
        cleaned = re.sub(r"\s+", " ", cleaned).strip(" '\".,;:! ").strip()
        return cleaned

    def normalize_one(raw: str) -> str:
        """
        Normalize a raw amenity string to a single visual amenity label or '' (exclude).
        """
        if not raw or not isinstance(raw, str):
            return ""
        s = raw.strip()
        if not s:
            return ""

        # Quick drop if it's clearly a non-amenity rule/permission
        if looks_like_non_amenity(s):
            return ""

        # Handle explicit negatives like "No parking"
        if explicitly_negated(s):
            return ""

        # Remove qualifiers we can't trust visually
        s_clean = strip_qualifiers(s)

        # Try to map to a normalized amenity label (first match wins)
        for pat, label in NORMALIZATION_RULES:
            if pat.search(s_clean):
                # Special case: if the match is parking but the original string says "no parking", we already filtered above
                return label

        # If we didn't match any mapping, decide whether to keep or drop:
        # Keep only if the token looks like a tangible object (single nouns often are),
        # otherwise drop to stay conservative.
        # Heuristic: drop things that contain words like "allowed", "included", "provided", "upon", "request".
        if re.search(r"\b(allowed|included|provided|upon|request|policy|rules?)\b", s_clean, re.I):
            return ""

        # Fall back: if it's a short nouny token (e.g., "desk", "safe"), keep as is.
        # This helps capture tangible items not in our mapping.
        # Reject tokens with many spaces (likely descriptive sentences).
        if len(s_clean.split()) <= 3 and re.search(r"[A-Za-z]", s_clean):
            return s_clean.lower()

        return ""

    # ---------- 3) Parse the CSV amenities and build counts ----------
    counts = Counter()

    for raw in df[amenities_column].dropna():
        # Parse: Inside Airbnb amenities is a stringified Python list.
        try:
            items = ast.literal_eval(raw)
            if not isinstance(items, list):
                continue
        except Exception:
            # If parsing fails, try a very loose split as last resort
            items = re.split(r",\s*", raw.strip("[] "))

        seen_this_listing = set()
        for item in items:
            label = normalize_one(item)
            if not label:
                continue
            # De-duplicate within a single listing to avoid double-counting
            seen_this_listing.add(label)

        counts.update(seen_this_listing)

    # ---------- 4) Return the top-N ----------
    return counts.most_common(top_n)



**function 2**

input: photo (jpg), ameneties list

output: list of AirBnb ameneties recognized in the photo

optional: a jpg with the ameneties shown on the picture itself



TypeError: `source` must be a pandas.DataFrame or a CSV file path (str).

In [None]:
"""
amenity_detector.py

Detect user-specified amenities in a JPG image using a pretrained object
detection model (Faster R-CNN) and visualize detections with bounding boxes.

Requirements:
    pip install torch torchvision pillow
"""

import torch
from torchvision import models, transforms
from PIL import Image, ImageDraw, ImageFont
import os
import math
from typing import List, Dict, Tuple

# -------------------------------------------------------------------
# 1. Load pretrained model (COCO)
# -------------------------------------------------------------------

_device = "cuda" if torch.cuda.is_available() else "cpu"

def load_model():
    """Load a pretrained Faster R-CNN model."""
    model = models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    model.to(_device).eval()
    return model

# COCO labels (index-aligned with model output)
COCO_CLASSES = [
    "__background__","person","bicycle","car","motorcycle","airplane","bus","train",
    "truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench",
    "bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe",
    "backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard",
    "sports ball","kite","baseball bat","baseball glove","skateboard","surfboard",
    "tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana",
    "apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake",
    "chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse",
    "remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator",
    "book","clock","vase","scissors","teddy bear","hair drier","toothbrush"
]

# -------------------------------------------------------------------
# 2. Detect amenities
# -------------------------------------------------------------------

def detect_amenities(
    image_path: str,
    amenity_list: List[str],
    score_threshold: float = 0.6
) -> List[Dict]:
    """
    Detect amenities from a photo using the given list of amenity names.

    Parameters
    ----------
    image_path : str
        Path to the JPG image of the apartment/home.
    amenity_list : List[str]
        List of amenity keywords to look for (e.g. ["tv", "oven", "bed"]).
    score_threshold : float
        Minimum confidence required for a detection to be kept.

    Returns
    -------
    detections : List[Dict]
        List of detected amenities in the format:
        {
            "amenity": str,
            "score": float,
            "box": (x1, y1, x2, y2)
        }
    """
    if not os.path.exists(image_path):
        raise FileNotFoundError(image_path)

    # Load and preprocess
    img = Image.open(image_path).convert("RGB")
    transform = transforms.Compose([transforms.ToTensor()])
    x = transform(img).to(_device)

    # Model inference
    model = load_model()
    with torch.no_grad():
        preds = model([x])[0]

    # Filter by score and match to requested amenities
    results = []
    for label, box, score in zip(preds["labels"], preds["boxes"], preds["scores"]):
        if score < score_threshold:
            continue
        cls_name = COCO_CLASSES[label]
        # if user amenity matches (case-insensitive substring)
        if any(a.lower() in cls_name.lower() or cls_name.lower() in a.lower() for a in amenity_list):
            results.append({
                "amenity": cls_name,
                "score": float(score),
                "box": tuple(map(float, box))
            })

    return results

# -------------------------------------------------------------------
# 3. Draw detections on image
# -------------------------------------------------------------------

def draw_amenities(
    image_path: str,
    detections: List[Dict],
    output_path: str = "annotated.jpg"
) -> str:
    """
    Draw detected amenities on the image and save it.

    Parameters
    ----------
    image_path : str
        Input image path.
    detections : List[Dict]
        Output from detect_amenities().
    output_path : str
        Where to save the annotated image.

    Returns
    -------
    str
        Path to the annotated image.
    """
    img = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(img)
    font = ImageFont.load_default()

    for det in detections:
        x1, y1, x2, y2 = det["box"]
        label = f"{det['amenity']} ({det['score']:.2f})"

        # Draw rectangle and text label
        draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
        tw, th = draw.textsize(label, font)
        draw.rectangle([x1, y1 - th - 4, x1 + tw + 4, y1], fill="red")
        draw.text((x1 + 2, y1 - th - 2), label, fill="white", font=font)

    img.save(output_path)
    return output_path

# -------------------------------------------------------------------
# 4. Example usage
# -------------------------------------------------------------------

if __name__ == "__main__":
    # Example: specify the amenities you want to look for
    my_amenities = ["tv", "oven", "bed", "toaster", "refrigerator"]

    # Path to your JPG photo
    img_path = "sample_apartment.jpg"

    # Detect amenities
    detections = detect_amenities(img_path, my_amenities, score_threshold=0.6)
    print("Detected amenities:")
    for d in detections:
        print(f" - {d['amenity']} (confidence={d['score']:.2f})")

    # Annotate and save
    annotated = draw_amenities(img_path, detections, "sample_annotated.jpg")
    print(f"Annotated image saved at: {annotated}")


function 3
input: "inside airbnb CSV
output: function 2 on a randomly selected photo