In [49]:
import pandas as pd
import json
import os

from dotenv import load_dotenv
load_dotenv()

from openai import AzureOpenAI
from pydantic import BaseModel, HttpUrl


In [None]:
df = pd.read_csv("raw_data/Business_Licenses_20250621.csv") 
df.head()

Unnamed: 0,APN,RecordID,BusDesc,B1_PER_SUB_TYPE,DBA,NAICS,Tax_Code,Employee_Num,Bus_Own_Type,B1_BUSINESS_NAME,...,B1_ADDRESS2,B1_CITY,B1_STATE,B1_ZIP,B1_CONTACT_TYPE,B1_FULL_ADDRESS,B1_SITUS_CITY,B1_SITUS_STATE,B1_SITUS_ZIP,Business_Location
0,055 187800400,BL-032324,RESTAURANT,Retail Trade,KIPS RESTAURANT,722110,R,14,Corporation,KIPS RESTAURANT,...,,BERKELEY,CA,947041610,Business Owner,2439 DURANT AVE,BERKELEY,CA,94704.0,
1,057 205900600,BL-018844,JAPANESE FOOD & DRINKS,Retail Trade,KINDA SAKE BAR,722513 - Limited-Service Restaurants,R,16,Corporation,KINDA SAKE BAR,...,,EL CERRITO,CA,94530-2811,Business Owner,1941 UNIVERSITY Ave,BERKELEY,CA,94704.0,
2,057 205300302,BL-021115,RESTAURANT,Retail Trade,VIETNAM HOUSE,722513 - Limited-Service Restaurants,R,2,LLC,VIETNAM HOUSE,...,,ALAMEDA,CA,94501-4410,Business Owner,1986 SHATTUCK Ave,BERKELEY,CA,94708.0,
3,056 194600604,BL-048399,RESTAURANT,Retail Trade,"VIK'S CHAAT & MARKET, LLC",722211,R,26,LLC,"VIK'S CHAAT & MARKET, LLC",...,,BERKELEY,CA,94710-2402,Business Owner,2390 FOURTH ST,BERKELEY,CA,94710.0,
4,052 153100900,BL-054096,ARCHITECT,Professional SemiProfessional,DONAHUE RICHARD S,541310 - Architectural Services,P,1,Sole Ownership,DONAHUE RICHARD S,...,,BERKELEY,CA,94703,Business Owner,1743 ALCATRAZ AVE,BERKELEY,CA,94703.0,


In [52]:
# build the schema for businesses
from pydantic import BaseModel, HttpUrl, Field
from typing import List, Dict, Optional


class Hours(BaseModel):
    mon_sun: str = Field(..., min_length=1)


class Vibe(BaseModel):
    crowd: str
    atmosphere: str
    events: List[str]


class Reviews(BaseModel):
    yelp_rating: float
    restaurantguru_rating: float
    birdeye_rating: float
    common_feedback: List[str]
    sample_quotes: List[str]


class SocialMedia(BaseModel):
    instagram: str
    latest_event_post: str


class Business(BaseModel):
    name: str
    address: str
    phone: str
    website: HttpUrl
    hours: Hours
    established: Optional[str]
    type: str
    menu_highlights: List[str]
    vibe: Vibe
    reviews: Reviews
    parking: str
    payment: List[str]
    wifi: str
    delivery: str
    social_media: SocialMedia


business_info_schema = {
    "type": "function",
    "function": {
        "name": "get_business_info",
        "description": "Structured business metadata for businesses in Berkeley, CA.",
        "parameters": Business.model_json_schema()
    }
}

In [None]:
shortened_df = df[["BusDesc", "B1_BUSINESS_NAME", "B1_FULL_ADDRESS"]]

endpoint = os.getenv("ENDPOINT_URL", "https://2025-ai-hackberkeley.openai.azure.com/")
deployment = os.getenv("DEPLOYMENT_NAME", "o4-mini")
subscription_key = os.getenv("AZURE_OPENAI_API_KEY")

client = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=subscription_key,
    api_version="2024-12-01-preview",  # Use a valid API version
)

def get_business_info(business_name):
    response = client.chat.completions.create(
                model=deployment,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a search agent, and your task is to collect more information based on a given information of a business. use web search. access reviews about the business if you can. access big chunk of texts regarding what they serve or who they serve for. include things such as vibes about the place that cannot be easily found through traditional searh engines. The user will provide the business information, and you will limit your answer to a json data format with clear data output regarding any information you can find.",
                    },
                    {
                        "role": "user",
                        "content": f"{business_name}",
                    },
                ],
                tools=[business_info_schema]
            )
    
    raw_output = response.choices[0].message.content
    cleaned = raw_output.strip().removeprefix("```json").removesuffix("```").strip()

    return cleaned

In [53]:
for index, row in shortened_df[:5].iterrows():
    business_query = row['B1_BUSINESS_NAME'] + " " + row['B1_FULL_ADDRESS']
    print(f"Processing {business_query}...")
    try:
        response = get_business_info(business_query)
        print(f"Response for {business_query}: {response}")
    except Exception as e:
        print(f"Error processing {business_query}: {e}")

    shortened_df.at[index, 'Business Detailed Data'] = response

Processing KIPS RESTAURANT 2439 DURANT AVE...
Response for KIPS RESTAURANT 2439 DURANT AVE: {
  "name": "Kip’s Market & Café",
  "address": {
    "street": "2439 Durant Ave",
    "city": "Berkeley",
    "state": "CA",
    "zip": "94704"
  },
  "contact": {
    "phone": "(510) 883-3355",
    "website": "https://www.kipsmarket.com",
    "email": "info@kipsmarket.com"
  },
  "categories": [
    "Café",
    "Delicatessen",
    "Coffee & Tea",
    "Sandwiches",
    "Salads",
    "Beer/Wine"
  ],
  "hours": {
    "monday":    "08:00–21:00",
    "tuesday":   "08:00–21:00",
    "wednesday": "08:00–21:00",
    "thursday":  "08:00–21:00",
    "friday":    "08:00–22:00",
    "saturday":  "09:00–22:00",
    "sunday":    "09:00–20:00"
  },
  "price_range": "$",
  "ratings": {
    "google": {
      "rating": 4.3,
      "review_count": 245
    },
    "yelp": {
      "rating": 4.0,
      "review_count":  ninety
    }
  },
  "menu_highlights": [
    {
      "item": "Breakfast Burrito",
      "descripti

In [54]:
shortened_df

Unnamed: 0,BusDesc,B1_BUSINESS_NAME,B1_FULL_ADDRESS,Business Detailed Data
0,RESTAURANT,KIPS RESTAURANT,2439 DURANT AVE,"{\n ""name"": ""Kip’s Market & Café"",\n ""addres..."
1,JAPANESE FOOD & DRINKS,KINDA SAKE BAR,1941 UNIVERSITY Ave,"{\n ""name"": ""KINDA SAKE BAR"",\n ""address"": {..."
2,RESTAURANT,VIETNAM HOUSE,1986 SHATTUCK Ave,"{\n ""business_name"": ""Vietnam House"",\n ""add..."
3,RESTAURANT,"VIK'S CHAAT & MARKET, LLC",2390 FOURTH ST,"{\n ""business"": {\n ""name"": ""Vik’s Chaat &..."
4,ARCHITECT,DONAHUE RICHARD S,1743 ALCATRAZ AVE,"{\n ""business_name"": ""Donahue Richard S"",\n ..."
...,...,...,...,...
12330,WOMENS CLOTHING,C P SHADES,1842 FOURTH St,
12331,RES. RENTAL - 12 UNITS,ANDERSON PAULA G,2515 PIEDMONT AVE,
12332,ROOFING CONTRACTOR,K REED ROOFING,0 VARIOUS,
12333,RENTAL,AONIA HOLDINGS LLC,2517 REGENT St,
