In [14]:
import requests
import json
import re
import random

In [15]:
# Use regular expression to find the text between "Phường " and the next comma
def getWardFromAddress(address_string):
    match = re.search(r'Phường ([^,]*)(?=,)', address_string)
    match2 = re.search(r'Ward ([^,]*)(?=,)', address_string)
    match3 = re.search(r', (.*), Quận', address_string)

    if match:
        ward_info = match.group(1)
    elif match2:
        ward_info = match2.group(1)
    elif match3:
        ward_info = match3.group(1)
    else:
        ward_info = ''
        print("[Regex] Fail to find ward in ", address_string)
    return ward_info

# Use regular expression to find the text between "Quận " and the next comma
def getDistrictFromAddress(address_string):
    match = re.search(r'Quận ([^,]*)(?=,)', address_string)
    match2 = re.search(r'District ([^,]*)(?=,)', address_string)
    match3 = re.search(r'Phường [^,]*, ([^,]*)', address_string)

    if match:
        district = match.group(1)
    elif match2:
        district = match2.group(1)
    elif match3:
        district = match3.group(1)
    else:
        district = ''
        print("[Regex] Fail to find district in ", address_string)
    return district

In [16]:
adsFormat = ['Cổ động chính trị', 'Quảng cáo thương mại', 'Xã hội hoá']

typeOfLocation = [
                'Đất công/Công viên/Hành lang an toàn giao thông',
                'Đất tư nhân/Nhà ở riêng lẻ',
                'Trung tâm thương mại',
                'Chợ',
                'Cây xăng',
                'Nhà chờ xe buýt'
]

In [17]:
def getLocation(address_string):
    url = "https://places.googleapis.com/v1/places:searchText"
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": "AIzaSyAZP9odw7JOw7LqqIJXcfNxZIh4qxpEK6I",
        "X-Goog-FieldMask": "places.displayName,places.formattedAddress,places.location,places.addressComponents"
    }
    data = {
        "textQuery": address_string,
        "locationBias": {
            "circle": {
                "center": {
                    "latitude": 10.774669,
                    "longitude": 106.69247
                },
                "radius": 5000
            }
        },
        "languageCode": "en"
    }

    response = requests.post(url, headers=headers, data=json.dumps(data))
    response_json = json.loads(response.text)

    place = response_json['places'][0]
    print(place)
    output = {
        'title': place['displayName']['text'],        
        'lat': place['location']['latitude'],
        'lng': place['location']['longitude'],
        'address': place['formattedAddress'],
        'typeOfLocation': random.choice(typeOfLocation),
        'adsFormat': random.choice(adsFormat),
        'isPlanned': random.random() > 0.7,
        'image': '',
        'ward': '',
        'district': ''
    }

    for addr_component in place['addressComponents']:
        # Get the ward
        if 'administrative_area_level_3' in addr_component['types']:
            ward = addr_component['longText']
        else:
            print("[Google Maps API] Fail to find ward in ", place['formattedAddress'])
            ward = getWardFromAddress(place['formattedAddress'])
            
        # Get the district
        if 'administrative_area_level_2' in addr_component['types']:
            district = addr_component['longText']
        else:
            print("[Google Maps API] Fail to find district in ", place['formattedAddress'])
            district = getDistrictFromAddress(place['formattedAddress'])
    
    if (ward == '' or district == ''):
        return None

    output['ward'] = ward
    output['district'] = district

    return output

In [18]:
import csv

# Specify the file path
file_path = "dataMA.csv"

# Read the CSV data from the file
with open(file_path, "r", encoding='utf-8') as file:
    json_input = file.read().splitlines()

In [19]:


# Create an empty JSON array
json_output = []

# Iterate through the JSON array
for item in json_input:
    # Get the location
    location = getLocation(item)
    # Add the location to the JSON
    if location == None:
        print("Fail to get location of ", item)
        continue
    json_output.append(location)


{'formattedAddress': '231, 233 Đ. Lê Hồng Phong, Phường 4, Quận 5, Thành phố Hồ Chí Minh, Vietnam', 'addressComponents': [{'longText': '231', 'shortText': '231', 'types': ['subpremise'], 'languageCode': 'en'}, {'longText': '233', 'shortText': '233', 'types': ['street_number'], 'languageCode': 'en'}, {'longText': 'Đường Lê Hồng Phong', 'shortText': 'Đ. Lê Hồng Phong', 'types': ['route'], 'languageCode': 'vi'}, {'longText': 'Quận 5', 'shortText': 'Quận 5', 'types': ['administrative_area_level_2', 'political'], 'languageCode': 'vi'}, {'longText': 'Thành phố Hồ Chí Minh', 'shortText': 'Thành phố Hồ Chí Minh', 'types': ['administrative_area_level_1', 'political'], 'languageCode': 'vi'}, {'longText': 'Vietnam', 'shortText': 'VN', 'types': ['country', 'political'], 'languageCode': 'en'}], 'location': {'latitude': 10.7597268, 'longitude': 106.677128}, 'displayName': {'text': 'Công Ty CP xe khách Phương Trang', 'languageCode': 'vi'}}
[Google Maps API] Fail to find ward in  231, 233 Đ. Lê Hồng P

In [20]:
# Write the JSON data to the file
output_path = "full3.json"
with open(output_path, "w", encoding='utf-8') as file:
    json.dump(json_output, file, ensure_ascii=False)