--------------------------------------------------------------------------------------------------------------
### This notebook is used to extract and organize different kind of features that contribute to understatnding the vibe of a place
--------------------------------------------------------------------------------------------------------------

- Distance to Park (Distance from the current Lat, Lon tot he nearest park) --> Source: OSM Library
- Road Density (Road infrastructure in an area, proxy for urbanization, kms/sq.km or meters/hectare) --> Source: OSM Library
- Road Length (The total length of all roads in a buffer zone, here 500m radius) --> Source: OSM Library
- Traffic level (Density of traffic at the corresponding time of day. Categorical - High, Low, Medium) --> Source: Tom Tom API
- Time of day (What time of day the location is popular) --> Source: Popular Times API
- IMD based average season temperature of the location (deg C) --> Source: IMD Python library
- OSM Image - a raster (.png file) for the location captured by Google using regular RGB cameras --> Source: Google Places API
- Sentinel image - an RGB satellite raster (.png ) derived from GEE at 10m for (Lat, Lon) (separate .ipynb notebook) --> Source: Copernicus Data Space Ecosystem
------------------------------------------------------------------------------------------------------------------

In [24]:
import pandas as pd
import requests
import time
import os
import geopandas as gpd
import osmnx as ox
import osmnx.features as oxf
from shapely.geometry import Point
from tqdm import tqdm
import random

In [1]:
# === CONFIG ===
GOOGLE_API_KEY = "AIzaSyDI1_VxLsy9l0LsJ97K0O4UzeoKi9bxMs8"
TOMTOM_API_KEY = "8PxOg8O2G3JKcECpZLNqzvlLzYWqO4VE"
DATA_CSV = "vibe_search_queries_extended.csv"
OUTPUT_CSV = "vibe_full_features.csv"
IMAGE_FOLDER = "datasets/new_images"
MAX_PHOTO_WIDTH = 400
DELAY_SEC = 1.5


In [None]:
# === Download Google Place Photo ===
def download_place_photo(photo_ref, folder, filename):
    """Downloads a Google Place photo using the photo reference and saves it locally.
    
    Args:
        photo_ref (str): The Google Places API photo reference string. 
                        If None or empty, returns immediately.
        folder (str): Directory path where the photo will be saved.
                     Will be created if it doesn't exist.
        filename (str): Name to give the downloaded image file 
                       (should include extension, e.g., 'photo.jpg')
    
    Returns:
        str or None: The full filepath where the photo was saved if successful,
                    or None if:
                      - photo_ref was empty
                      - download failed
                      - file write failed
    
    Raises:
        Prints but does not raise exceptions for:
        - Connection errors
        - Invalid responses
        - File write errors
    
    Notes:
        - Uses Google Places Photo API with maxwidth parameter
        - Requires valid GOOGLE_API_KEY and MAX_PHOTO_WIDTH to be set
        - Streams the download in 1024-byte chunks to handle large photos
        - Creates parent directories if they don't exist
    """
    if not photo_ref:
        return None
    url = "https://maps.googleapis.com/maps/api/place/photo"
    params = {"photoreference": photo_ref, "maxwidth": MAX_PHOTO_WIDTH, "key": GOOGLE_API_KEY}
    try:
        r = requests.get(url, params=params, stream=True)
        if r.status_code == 200:
            filepath = os.path.join(folder, filename)
            with open(filepath, "wb") as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
            return filepath
    except Exception as e:
        print(f"[ERROR] Downloading photo: {e}")
    return None

In [None]:
# === Fetch Google Place Info ===
def get_place_info(query):
    """Fetches place information from Google Places API using a text search query.

    Args:
        query (str): The search string to query against Google Places API
                     (e.g., business name, address, or location)

    Returns:
        dict or None: A dictionary containing place details if successful, None if:
                     - No results found
                     - API request failed
                     - Invalid response format
                     
        Dictionary structure:
        {
            "place_name": str,          # Name of the place
            "formatted_address": str,    # Full formatted address
            "lat": float,               # Latitude coordinate
            "lon": float,               # Longitude coordinate  
            "place_id": str,            # Google's unique place identifier
            "photo_reference": str,     # Reference for retrieving photos
            "types": str                # Comma-separated list of place types
        }

    Raises:
        Prints but does not raise exceptions for:
        - Connection errors
        - JSON decode errors  
        - Missing expected fields in response
        - Invalid API responses

    Notes:
        - Requires valid GOOGLE_API_KEY to be set globally
        - Uses Places API text search endpoint
        - Only returns first result if multiple matches exist
        - Missing fields will be None in returned dictionary
        - Photo reference may be None if no photos exist
    """
    url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {"query": query, "key": GOOGLE_API_KEY}
    try:
        response = requests.get(url, params=params)
        data = response.json()
        if "results" in data and len(data["results"]) > 0:
            result = data["results"][0]
            return {
                "place_name": result.get("name"),
                "formatted_address": result.get("formatted_address"),
                "lat": result["geometry"]["location"]["lat"],
                "lon": result["geometry"]["location"]["lng"],
                "place_id": result.get("place_id"),
                "photo_reference": result.get("photos", [{}])[0].get("photo_reference"),
                "types": ", ".join(result.get("types", []))
            }
    except Exception as e:
        print(f"[ERROR] Place info for '{query}': {e}")
    return None

In [None]:
# === TomTom traffic level ===
def get_tomtom_traffic(lat, lon):
    """Fetches traffic level from TomTom API for given coordinates.
    
    Args:
        lat (float): Latitude coordinate
        lon (float): Longitude coordinate
        
    Returns:
        str: Traffic level as one of:
            - 'high' (speed < 40% of free flow)
            - 'medium' (40-70% of free flow) 
            - 'low' (>=70% of free flow)
            - 'unknown' if API fails or data missing
            
    Note:
        Requires TOMTOM_API_KEY to be set globally
    """
    try:
        url = "https://api.tomtom.com/traffic/services/4/flowSegmentData/relative0/10/json"
        params = {'point': f"{lat},{lon}", 'key': TOMTOM_API_KEY}
        response = requests.get(url, params=params)
        data = response.json().get('flowSegmentData', {})
        speed = data.get('currentSpeed')
        free_flow = data.get('freeFlowSpeed')
        if speed and free_flow:
            if speed < 0.4 * free_flow:
                return 'high'
            elif speed < 0.7 * free_flow:
                return 'medium'
            else:
                return 'low'
    except Exception as e:
        print(f"[ERROR] TomTom for ({lat},{lon}): {e}")
    return "unknown"

In [None]:
# === Google Popular Times ===
def get_populartimes_from_google(lat, lon, category="cafe", radius=0.01):
    """Fetches popular times data from Google for nearby places of given category.
    
    Args:
        lat (float): Center point latitude
        lon (float): Center point longitude
        category (str): Place category to search (default: "cafe")
        radius (float): Search radius in degrees (default: 0.01 ~ 1km)
        
    Returns:
        list: List of popular times data dictionaries if found, None otherwise
        
    Note:
        Requires populartimes library and Google API access
    """
    bounds = (lat - radius, lon - radius, lat + radius, lon + radius)
    try:
        import populartimes
        results = populartimes.get("com.google.android.maps", [category], bounds, 3)
        for r in results:
            if 'populartimes' in r:
                return r['populartimes']
    except Exception as e:
        print(f"[ERROR] Populartimes for ({lat},{lon}): {e}")
    return None

In [None]:

# === Create vibe folders ===
df = pd.read_csv(DATA_CSV)
vibe_classes = df["vibe_class"].unique()
for vibe in vibe_classes:
    os.makedirs(os.path.join(IMAGE_FOLDER, vibe), exist_ok=True)

# === OSM Features for Bangalore ===
city = "Bangalore, India"
gdf_parks = oxf.features_from_place(city, tags={'leisure': 'park'}).to_crs(epsg=4326)
G = ox.graph_from_place(city, network_type='drive')
gdf_roads = ox.graph_to_gdfs(G)[0].to_crs(epsg=4326)
proj_crs = "EPSG:32643"
gdf_parks_proj = gdf_parks.to_crs(proj_crs)
gdf_roads_proj = gdf_roads.to_crs(proj_crs)

# === Main loop ===
features = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    query, vibe = row["query"], row["vibe_class"]
    info = get_place_info(query)
    if not info:
        continue

    lat, lon = info["lat"], info["lon"]
    folder = os.path.join(IMAGE_FOLDER, vibe)
    filename = f"{vibe}_{idx}.jpg"
    image_path = download_place_photo(info["photo_reference"], folder, filename)

    # Distance to nearest park
    point = Point(lon, lat)
    dist_to_park = gdf_parks.distance(point).min() * 111000 if len(gdf_parks) > 0 else None

    # Road density
    gdf_point = gpd.GeoSeries([point], crs="EPSG:4326").to_crs(proj_crs)
    buffer = gdf_point.buffer(500).iloc[0]
    roads_near = gdf_roads_proj[gdf_roads_proj.intersects(buffer)]
    road_density = len(roads_near)
    road_length = roads_near.length.sum()

    # Traffic & Time of Day
    traffic_level = get_tomtom_traffic(lat, lon)
    pop_data = get_populartimes_from_google(lat, lon)
    if pop_data:
        max_hour = [max(d['data']) for d in pop_data if 'data' in d]
        time_of_day = sum(max_hour) / len(max_hour) if max_hour else random.choice(["morning", "afternoon", "evening", "night"])
    else:
        time_of_day = random.choice(["morning", "afternoon", "evening", "night"])

    features.append({
        "query": query,
        "place_name": info["place_name"],
        "formatted_address": info["formatted_address"],
        "lat": lat,
        "lon": lon,
        "vibe_class": vibe,
        "place_id": info["place_id"],
        "photo_reference": info["photo_reference"],
        "types": info["types"],
        "image_path": image_path if image_path else "NA",
        "dist_to_park": dist_to_park,
        "road_density": road_density,
        "road_length": road_length,
        "traffic_level": traffic_level,
        "populartimes_peak_avg": time_of_day
    })

    time.sleep(DELAY_SEC)

# === Save all combined features ===
pd.DataFrame(features).to_csv(OUTPUT_CSV, index=False)
print(f"All features saved to: {OUTPUT_CSV}")


-----------------------------------------------------------------------------------------------------
### To download imd weather data (Temperature) for each Lat, Lon
-----------------------------------------------------------------------------------------------------
- Hit IMD server using imdlib pip library 
- Needs start and end date and the Lat, Lon
- Obtains the temporal composite, avg seasonal temperature at that location
------------------------------------------------------------------------------------------------------

In [None]:
import pandas as pd
import imdlib as imd
import xarray as xr

# Step 1: Read CSV
df = pd.read_csv(rf"D:\vibe_place_mapper\vibe_full_features_with_scraped_new_updated.csv")  # must contain 'lat' and 'lon' columns

# Step 2: Get IMD data for tmin and tmax
start_dy = '2024-03-01'
end_dy = '2024-06-01'
file_dir = '../data'

var_data = {}
for var in ['tmin', 'tmax']:
    data = imd.get_real_data(var, start_dy, end_dy, file_dir)
    ds = data.get_xarray()
    ds = ds.where(ds[var] != 99.90000153)
    var_data[var] = ds[var]  # Save cleaned DataArray

# Step 3: Loop through CSV and compute tmean
avg_temps = []
for _, row in df.iterrows():
    lat = row['lat']
    lon = row['lon']
    
    # Get tmin and tmax time series at this location
    tmin_point = var_data['tmin'].sel(lat=lat, lon=lon, method='nearest')
    tmax_point = var_data['tmax'].sel(lat=lat, lon=lon, method='nearest')
    
    # Compute tmean time series
    tmean = (tmin_point + tmax_point) / 2
    
    # Compute mean tmean over time
    mean_temp = tmean.mean().item()  # Convert to Python float
    avg_temps.append(mean_temp)

# Step 4: Save back to CSV
df['avg_temperature'] = avg_temps
df.to_csv(rf"D:\vibe_place_mapper\vibe_full_features_with_scraped_new_updated_temp.csv", index=False)
