In [14]:
# %pip install rapidfuzz
# %pip install fuzzywuzzy

In [15]:
import requests
import pandas as pd
import time
from fuzzywuzzy import fuzz, process
import openai
from openai import OpenAI
import os

In [None]:

OPENAI_API_KEY = "Place openAi key here"
GOOGLE_API_KEY = "Place google places api key here"
LOCATIONS = ["Los Angeles"]
BUSINESS_TYPES = ["Restaurant"]

TEXT_SEARCH_URL = "https://maps.googleapis.com/maps/api/place/textsearch/json"
PLACE_DETAILS_URL = "https://maps.googleapis.com/maps/api/place/details/json"


In [17]:
# Load the master business types and create mapping
master_df = pd.read_csv("food_services_business_types.csv")
master_types = master_df["business_type"].dropna().tolist()
allowed_types_str = "\n".join(f"- {t}" for t in master_types)

In [18]:
def match_predicted_business_type(types_list):
    for t in types_list:
        t_clean = t.strip().lower()
        for allowed in master_types:
            if t_clean == allowed.lower():
                return allowed
    return ""

In [19]:
from rapidfuzz import process, fuzz

def fuzzy_match_business_type(types_list, allowed_list):
    flat_types = [t.strip().lower() for t in types_list]
    best_match = ""
    best_score = 0
    for mt in allowed_list:
        score = max(fuzz.partial_ratio(mt.lower(), t) for t in flat_types)
        if score > best_score:
            best_match = mt
            best_score = score
    return best_match if best_score >= 70 else ""

In [20]:
client = OpenAI(api_key=OPENAI_API_KEY)

def classify_with_openai(business_name, api_types):
    joined_types = ", ".join(api_types)
    prompt = f"""
You are a business classification assistant.

Here is a business:
- Name: {business_name}
- Google Places API Types: {joined_types}

You must classify it into one of the following known business types ONLY:
{allowed_types_str}

Choose the single most appropriate business type.
Also return your confidence score (from 0 to 1).

Format:
Business Type: <best match>
Confidence: <score>
"""
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        content = response.choices[0].message.content.strip()
        lines = content.splitlines()
        predicted = ""
        confidence = ""
        for line in lines:
            if "Business Type:" in line:
                predicted = line.split(":", 1)[1].strip()
            elif "Confidence:" in line:
                confidence = line.split(":", 1)[1].strip()

        print(f"Business :  {business_name}")
        print(f"Business types from api :  {joined_types}")
        print(f"✅ OpenAI Predicted: {predicted} ({confidence})")

        return predicted, confidence
    except Exception as e:
        print("❌ OpenAI Error:\n", str(e))
        return "", ""


In [21]:
def get_places_for_type_and_location(business_type, location, api_key, max_results=5):
    import requests, time

    TEXT_SEARCH_URL = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    query = f"{business_type} in {location}"
    all_results = []

    params = {"query": query, "key": api_key}
    paginated = True
    retries = 0

    while paginated and len(all_results) < max_results:
        response = requests.get(TEXT_SEARCH_URL, params=params)
        data = response.json()

        # Debug: Check if request is valid
        if "results" not in data:
            print("⚠️ No results found:", data)
            break

        results = data["results"]
        all_results.extend(results)

        # Debug: See how many you've collected
        print(f"✅ Collected {len(all_results)} results so far...")

        # Handle pagination
        next_page_token = data.get("next_page_token", None)
        if next_page_token and len(all_results) < max_results:
            time.sleep(2.1)  # Ensure token is active
            params = {"pagetoken": next_page_token, "key": api_key}
        else:
            paginated = False

    return all_results[:max_results]


In [22]:
def get_place_details(place_id, api_key):
    params = {
        "place_id": place_id,
        "fields": "website,formatted_phone_number",
        "key": api_key
    }
    try:
        response = requests.get(PLACE_DETAILS_URL, params=params)
        result = response.json().get("result", {})
        return result.get("website", ""), result.get("formatted_phone_number", "")
    except:
        return "", ""


In [23]:
def extract_info(results, business_type, location, api_key):
    data = []
    for place in results:
        place_id = place.get("place_id", "")
        name = place.get("name", "")
        lat = place.get("geometry", {}).get("location", {}).get("lat", "")
        lng = place.get("geometry", {}).get("location", {}).get("lng", "")
        lat_lng = f"{lat}, {lng}"
        types = ", ".join(place.get("types", []))
        
        website, phone = get_place_details(place_id, api_key)

        data.append({
            "place_id": place_id,
            "name": name,
            "lat_lng": lat_lng,
            "phone_number": phone,
            "website": website,
            "types": types
        })
    return data


In [24]:
def main():
    final_data = []

    for location in LOCATIONS:
        for business_type in BUSINESS_TYPES:
            print(f"🔍 Searching for '{business_type}' in {location}")
            places = get_places_for_type_and_location(business_type, location, GOOGLE_API_KEY, max_results=20)
            enriched = extract_info(places, business_type, location, GOOGLE_API_KEY)
            final_data.extend(enriched)

    df = pd.DataFrame(final_data)
    df["types"] = df["types"].apply(lambda x: x if isinstance(x, list) else str(x).split(","))

    predicted_list = []
    confidence_list = []

    for i, row in df.iterrows():
        predicted = ""
        confidence = ""

        # Stage 1: Exact match
        predicted = match_predicted_business_type(row["types"])
        if predicted:
            confidence = "1"

        # Stage 2: Fuzzy match
        if not predicted:
            predicted = fuzzy_match_business_type(row["types"], master_types)
            if predicted:
                confidence = "0.7"

        # Stage 3: OpenAI fallback
        if not predicted:
            predicted, confidence = classify_with_openai(row["name"], row["types"])

        predicted_list.append(predicted)
        confidence_list.append(confidence)

    df["predicted_business_type"] = predicted_list
    df["confidence"] = confidence_list

    df.to_csv("results_food_services.csv", index=False)
    print("✅ Saved: results.csv")

In [25]:
main()

🔍 Searching for 'Restaurant' in Los Angeles
✅ Collected 20 results so far...
✅ Saved: results.csv
