In [2]:
import pandas as pd
import numpy as np
import requests
import re
import osmnx as ox
import geopandas as gpd

df = pd.read_csv('data/duped_malls.csv')
df

Unnamed: 0,city_name,mall_name,developer_name,is_major_corp,polygon_string
0,Cotabato City,CityMall Cotabato,DoubleDragon Corporation,False,"POLYGON ((124.8350816 6.505341, 124.8354792 6...."
1,Guiguinto,Walter Mart Guiguinto,Walter Mart Inc.,False,"POLYGON ((120.8656377 14.8810593, 120.8656461 ..."
2,Plaridel,Walter Mart Plaridel,Walter Mart Inc.,False,"POLYGON ((120.8656377 14.8810593, 120.8656461 ..."
3,Catarman,Gaisano Grand Catarman,Gaisano Grand Malls,False,"POLYGON ((124.6387314 12.4884816, 124.6395283 ..."
4,Aringay,Primark Town Center Aringay,Primark Philippine Properties,False,"POLYGON ((120.3559675 16.3982112, 120.3559863 ..."
5,Iligan,Gaisano Mall Iligan,Gaisano,False,"POLYGON ((124.2408555 8.2304044, 124.2409586 8..."
6,Iligan,Gaisano Super Citi Mall Iligan,,False,"POLYGON ((124.2408555 8.2304044, 124.2409586 8..."
7,Cagayan de Oro,Puregold Cagayan de Oro,Puregold Price Club,False,"POLYGON ((124.6577807 8.4836775, 124.6581106 8..."
8,Iligan,Puregold Iligan,,False,"POLYGON ((124.6577807 8.4836775, 124.6581106 8..."
9,Cagayan de Oro,Ororama Supercenter Cogon,Ororama Supercenters Inc.,False,"POLYGON ((124.6486268 8.4774937, 124.6487484 8..."


In [3]:
# for duped polygons

duplicates = df[df.duplicated('polygon_string', keep=False)]

for polygon_string, group in duplicates.groupby('polygon_string'):
    mall_names = group['mall_name'].tolist()
    print(f"Paired: {', '.join(mall_names)}")


Paired: Primark Town Center Aringay, Primark Town Center Rosario
Paired: Walter Mart Guiguinto, Walter Mart Plaridel
Paired: Festive Walk Mall, Street of Festive Walk
Paired: Gaisano Grand Arcade Roxas, Gaisano Grand Roxas
Paired: Gaisano Mall Iligan, Gaisano Super Citi Mall Iligan
Paired: Gaisano Grand Catarman, Gaisano Grand Mall Ipil
Paired: Ororama Supercenter Cogon, Ororama Supercenter Carmen
Paired: Puregold Cagayan de Oro, Puregold Iligan
Paired: CityMall Cotabato, CityMall Koronadal
Paired: Gaisano Grand Mall Koronadal, Gaisano Grand Mall Polomolok


In [26]:
print(get_polygon_points2('Walter Mart Guiguinto Bulacan'))
print()
print(get_polygon_points('Walter Mart Guiguinto Bulacan'))

POLYGON ((120.8656377 14.8810593, 120.8656461 14.8806136, 120.8662544 14.8806602, 120.8661548 14.8811077, 120.8656377 14.8810593))

None


In [None]:
# for empty polygons
filtered_df = df[df['polygon'].isnull() | (df['polygon'] == '')]

# Extract mall_name from the filtered DataFrame
emp_df = filtered_df['mall_name']

emp_df

In [None]:
mall_names = []
for i in emp_df:
    mall_names.append(i)

mall_names

In [4]:
# Function to format polygon points as a string in the required format
def format_polygon_points(polygon_points):
    formatted_points = ", ".join([f"{lon} {lat}" for lat, lon in polygon_points])
    return f"POLYGON (({formatted_points}))"
    
# Function to get polygon points from OpenStreetMap within the Philippines boundary
def get_polygon_points(place_name):
    overpass_url = "http://overpass-api.de/api/interpreter"
    overpass_query = f"""
    [out:json];
    // Define the boundary for the Philippines
    area[name="Philippines"];
    // Query for the place name within the Philippines boundary
    area[name="{place_name}"](area);
    // Output the geometry of the area
    out geom;
    """
    response = requests.post(overpass_url, data=overpass_query)
    data = response.json()
    if 'elements' in data and len(data['elements']) > 0:
        element = data['elements'][0]
        if 'type' in element and element['type'] == 'way':
            # Extract polygon points
            polygon_points = [(node['lat'], node['lon']) for node in element['geometry']]
            return format_polygon_points(polygon_points)
    return None

In [5]:
# Alternative functions to get polygons
def format_polygon_points2(polygon_points):
    formatted_points = ", ".join([f"{lat} {lon}" for lat, lon in polygon_points])
    return f"POLYGON (({formatted_points}))"

def get_polygon_points2(mall_name):
    try:
        area = ox.geocode_to_gdf(mall_name)
        if area.empty:
            print(f"Mall Name: {mall_name} Polygon_points: not found")
            return []
        polygon_points = []
        for geometry in area.geometry:
            polygon_points.extend(geometry.exterior.coords)
        return format_polygon_points2(polygon_points)
    except (ox.geocoder.InsufficientResponseError, TypeError, AttributeError):
        print(f"Mall Name: {mall_name} Polygon_points: Polygon points not found again")
        return []

In [None]:
remaining = [] # Paste remaining values from mall_name list here

new = [] # list for Data Frame

for mall in remaining:
    poly = get_polygon_points2(mall)
    if poly:
        new.append([mall, poly])
        print(f"Mall Name: {mall}")
        print()
        print(get_polygon_points2(mall))
        print()
        print()
    else:
        poly = None
        new.append([mall, poly])
        print(f"Mall Name: {mall} Polygon_points: not found")

new_df = pd.DataFrame(new, columns=['mall_name', 'polygon'])


In [None]:
new_df 

In [None]:
new_df.to_csv('left-over-calabarzon.csv', sep=',', index=False, encoding='utf-8')