In [6]:
import csv
from math import radians, sin, cos, sqrt, atan2

def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    
    distance = R * c
    return distance

def safe_int(val, default=0):
    try:
        return int(val)
    except ValueError:
        return default

cities = []

# Load CSV data
with open('Data/Cities/worldcities.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        cities.append(row)

filtered_cities = []
checked_cities = set()

for city in sorted(cities, key=lambda x: safe_int(x['population']), reverse=True):
    if city['id'] in checked_cities:
        continue

    checked_cities.add(city['id'])
    filtered_cities.append(city)
    
    for other_city in cities:
        if other_city['id'] == city['id']:
            continue

        distance = haversine_distance(
            float(city['lat']), float(city['lng']),
            float(other_city['lat']), float(other_city['lng'])
        )

        if distance < 100:  # 100 km threshold
            checked_cities.add(other_city['id'])

# Write filtered cities to a new CSV
with open('filtered_cities.csv', 'w', newline='', encoding='utf-8') as file:
    fieldnames = ["city", "city_ascii", "lat", "lng", "country", "iso2", "iso3", "admin_name", "capital", "population", "id"]
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    for city in filtered_cities:
        writer.writerow(city)
