# Improts

In [None]:
from geopy.geocoders import Nominatim
import json
import pandas as pd
from geopy.geocoders import Nominatim
import time
import numpy as np

## Testing Nominatiom GeoCode

In [2]:
locator = Nominatim(user_agent="Test Geocoder, contact:ivanangelov05@gmail.com")

In [3]:
location = locator.geocode("Rasho Dimitrov, Pernik, Bulgaria")

In [4]:
print(f"""
      Latitude: {location.latitude}
      Longitude: {location.longitude}
      Altitude: {location.altitude}
      Section Lon: {location.point}
      Section Lat: {location.point[0]}
      """)


      Latitude: 42.6139825
      Longitude: 23.0941489
      Altitude: 0.0
      Section Lon: 42 36m 50.337s N, 23 5m 38.936s E
      Section Lat: 42.6139825
      


# Load Data & Fetch Latitude and Longitude

In [5]:
# Load .JSON file
with open("dsk_offices_parsed.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [None]:
# Fetcb lat and lon
results = []
for entry in data["entries"]:
    full_address = f'{entry["city"]}, {entry["address_line"]}'
    try:
        location = locator.geocode(full_address)
        if location:
            lat, lon = location.latitude, location.longitude
        else:
            lat, lon = None, None
    except Exception as e:
        lat, lon = None, None
    
    results.append({
        "otp_id": entry["otp_id"],
        "office_name": entry["office_name"],
        "city": entry["city"],
        "address": entry["address_line"],
        "lat": lat,
        "lon": lon
    })
    
    time.sleep(1)  # avoid rate limits

In [20]:
# Look at data
df = pd.DataFrame(results)
display(df.head())

Unnamed: 0,otp_id,office_name,city,address,lat,lon
0,315,ВЛАДИСЛАВ,Варна,Варненчик 92,43.212944,27.905547
1,337,ПРЕСЛАВ,Варна,Преслав 69,43.203059,27.913274
2,2404,КАВАРНА,Каварна,Добротица 25,43.431666,28.337607
3,704,СЕВЛИЕВО,Севлиево,Свобода 9,43.056967,24.972563
4,401,Търново ВЕЛИКО ТЪРНОВО,Велико,Цар Освободител 3,43.125522,25.689763


# Get User location

In [None]:
user_loc = "София, Александър Стамболийски 101" # Example

In [None]:
user_loc_coordinates = locator.geocode(user_loc) # Get Lat and Lon for the user's address

In [None]:
lat_u, lon_u = user_loc_coordinates.latitude, user_loc_coordinates.longitude

print(f"""
        User Coordinates:
        Latitude: {lat_u}
        Longitude: {lon_u}
""")


        User Coordinates:
        Latitude: 42.6981898
        Longitude: 23.308529



In [None]:
counts = df['address'].value_counts()   # how many times each address appears
df[df['address'].map(counts) != 1]      # addresses with more than one appearance

Unnamed: 0,otp_id,office_name,city,address,lat,lon
3,704,СЕВЛИЕВО,Севлиево,Свобода 9,43.056967,24.972563
28,2601,ХАСКОВО,Хасково,Свобода 9,41.933275,25.557991


In [None]:
df['full_address'] = df['city'] + ', ' + df['address']  # added full_address, because I noticed it is better 
df.head(2)                                              # for Nominatim to find the correct address.

Unnamed: 0,otp_id,office_name,city,address,lat,lon,full_address
0,315,ВЛАДИСЛАВ,Варна,Варненчик 92,43.212944,27.905547,"Варна, Варненчик 92"
1,337,ПРЕСЛАВ,Варна,Преслав 69,43.203059,27.913274,"Варна, Преслав 69"


# Find Nearest ATM
1. Write algorithm for nearest lat/lon

### Schema:
1. Compute deltas:
dlat = lat_i - lat_u
dlon = lon_i - lon_u

2. Haversine core:
a = sin²(dlat/2) + cos(lat_u) * cos(lat_i) * sin²(dlon/2)

3. Central angle:
c = 2 * atan2(√a, √(1−a))
4. Distance:
d = R * c where R ≈ 6_371_000 meters (or 6371 km).

In [None]:
delta_lat_list = [] 
delta_lon_list = []
id_lat_lon_dict = {}    # Key: id, Value: (i_lat, i_lon, delta_lat, delta_lon)
                        # Dictionary for storing coordinates and delta of coordinates.
for idx, row in df.iterrows():
    delta_lat = row["lat"] - lat_u
    delta_lon = row["lon"] - lon_u
    i_lat = row["lat"]
    i_lon = row["lon"]
    id_lat_lon_dict[idx] = (i_lat, i_lon, delta_lat, delta_lon)

In [14]:
id_lat_lon_dict[0][0]

43.2129444

### Haversine score

In [15]:
haversine_scores = []

lat_u_rad = np.deg2rad(lat_u)
lon_u_rad = np.deg2rad(lon_u)

for lat_lon in id_lat_lon_dict.values():
    candidate_lat = lat_lon[0]
    candidate_lon = lat_lon[1]
    candidate_lat_rad = np.deg2rad(candidate_lat)
    candidate_lon_rad = np.deg2rad(candidate_lon)
    delta_lat_rad = candidate_lat_rad - lat_u_rad
    delta_lon_rad = candidate_lon_rad - lon_u_rad
    score = np.sin(delta_lat_rad / 2) ** 2 + np.cos(lat_u_rad) * np.cos(candidate_lat_rad) * np.sin(delta_lon_rad / 2) ** 2
    haversine_scores.append(score)
    


In [16]:
haversine_scores[0]

np.float64(0.0008817282914375221)

### Central Angle

In [17]:
central_angle_scores = []

for haversine_score in haversine_scores:
    cntrl_ang = 2 * np.atan2(np.sqrt(haversine_score), np.sqrt(1 - haversine_score))
    central_angle_scores.append(cntrl_ang)

In [18]:
assert len(central_angle_scores) == df['full_address'].nunique(), \
    "There must be a score for each candidate!"

In [19]:
R = 6_371_000
distances = [
    (idx, R * score)
    for idx, score in enumerate(central_angle_scores)
]

distances.sort(key=lambda x: x[1])
closest_idx, closest_distance = distances[0]

print(closest_idx, closest_distance)


36 73.3660482270946
