In [9]:
import os
import csv
import json
import ast
import pandas as pd
from collections import Counter


def _parse_locations_cell(cell):
    """
    Devuelve una lista de ubicaciones desde una celda del CSV.
    Acepta: JSON (["A","B"]), repr de lista Python (['A','B']), o string simple ("A").
    """
    if cell is None or (isinstance(cell, float) and pd.isna(cell)):
        return []
    s = str(cell).strip()
    if not s:
        return []
    # Normalizar valores tipo "None"
    if s.lower() in {"none", "null"}:
        return []
    # Caso JSON válido
    try:
        val = json.loads(s)
        if isinstance(val, list):
            return [str(x).strip() for x in val if str(x).strip()]
        if isinstance(val, str) and val.strip():
            return [val.strip()]
    except Exception:
        pass
    # Caso repr de lista Python
    try:
        val = ast.literal_eval(s)
        if isinstance(val, list):
            return [str(x).strip() for x in val if str(x).strip()]
        if isinstance(val, tuple):
            return [str(x).strip() for x in list(val) if str(x).strip()]
        if isinstance(val, str) and val.strip():
            return [val.strip()]
    except Exception:
        # Fallback: tratar como string plano
        return [s]
    # Fallback final
    return []


def list_unique_locations(parsed_csv_path, col="locations", sort_by="freq_desc"):
    """
    Lee el CSV parseado y lista ubicaciones únicas del campo `locations`.
    sort_by: 'freq_desc' | 'alpha'
    Imprime resultados y devuelve un DataFrame con columnas [location, count].
    """
    try:
        if not os.path.exists(parsed_csv_path):
            raise FileNotFoundError("File not found.")
        # Lectura tolerante a BOM
        df = pd.read_csv(parsed_csv_path, encoding="utf-8-sig", keep_default_na=True)
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found.")

        counter = Counter()
        for cell in df[col].tolist():
            for loc in _parse_locations_cell(cell):
                norm = " ".join(str(loc).split())  # colapsar espacios
                if norm:
                    counter[norm] += 1

        items = list(counter.items())
        if sort_by == "alpha":
            items.sort(key=lambda x: x[0].lower())
        else:
            items.sort(key=lambda x: (-x[1], x[0].lower()))

        # Print compacto
        print(f"Unique locations: {len(items)}")
        for loc, cnt in items:
            print(f"{cnt:4d}  {loc}")

        out_df = pd.DataFrame(items, columns=["location", "count"])
        return out_df
    except Exception as e:
        print(
            {
                "error": True,
                "message": "Failed to list unique locations. Check CSV path and column.",
                "detail": str(e),
            }
        )
        return pd.DataFrame(columns=["location", "count"])



PATH = "/Users/emin/InmobiliariaJBC/Scrappers/Ego/Data/contacts_today_parsed.csv"
list_unique_locations(PATH, col="locations", sort_by="freq_desc")

Unique locations: 65
 292  Eixample
 267  Centre - Zona Alta
 244  Santa Rosa
 229  Centre - Zona Alta (Alcoy / Alcoi)
 208  Zona Nord
  78  Alcoy / Alcoi (Alicante)
  68  Eixample (Alcoy / Alcoi)
  64  Batoi
  53  Cocentaina
  45  Alicante (Spain)
  35  Muro de Alcoy
  28  Santa Rosa (Alcoy / Alcoi)
  28  Zona Nord (Alcoy / Alcoi)
  18  Benimarfull
  16  Gaianes
  12  Alcocer de Planes
  12  L'Alqueria d'Asnar
  11  Batoi (Alcoy / Alcoi)
  11  Cocentaina (Alicante)
   9  Benilloba
   9  Gorga
   8  Benillup
   8  España
   7  Agres
   6  Planes
   5  Beniarrés
   5  Gaianes (Alicante)
   5  Muro de Alcoy (Alicante)
   3  Quatretondeta
   2  Alcoleja
   2  Almudaina
   2  Benifallim
   2  Benimassot
   2  Facheca
   2  Famorca
   2  Tollos
   1  Alcocer de Planes (Alicante)
   1  Alfafara (Alicante)
   1  Banyeres de Mariola
   1  Benasau
   1  Beniarrés (Alicante)
   1  Benilloba (Alicante)
   1  Benimantell (Alicante)
   1  Benimarfull (Alicante)
   1  Cabanes y Las Fuentes (Villena)

Unnamed: 0,location,count
0,Eixample,292
1,Centre - Zona Alta,267
2,Santa Rosa,244
3,Centre - Zona Alta (Alcoy / Alcoi),229
4,Zona Nord,208
...,...,...
60,Penàguila (Alicante),1
61,Qatar,1
62,San Francisco (Villena),1
63,Sol y Luz (San Vicente del Raspeig / Sant Vice...,1
