In [1]:
from src.utils.coordinates_util import get_coordinates_for_city, haversine_distance
from src.scrapers.scrapers import BasicScraper
from src.types.types import Coordinates, URL, CSSSelector
from typing import Optional, List


import folium
import pandas as pd

In [2]:
def assert_gps_coordinates_equal(
    coord1: Coordinates, coord2: Coordinates, tolerance: float = 1e-2
):
    assert (
        abs(coord1.latitude - coord2.latitude) < tolerance
    ), f"Latitude difference exceeds tolerance: {coord1.latitude} vs {coord2.latitude}"
    assert (
        abs(coord1.longitude - coord2.longitude) < tolerance
    ), f"Longitude difference exceeds tolerance: {coord1.longitude} vs {coord2.longitude}"


expected_coordinates: Coordinates = Coordinates(48.89452, 18.04436)
actual_coordinates: Coordinates = get_coordinates_for_city("Trenčín")

assert_gps_coordinates_equal(actual_coordinates, expected_coordinates)

In [3]:
get_coordinates_for_city("Ul. gen. M. R. Štefánika 426, 911 01 Trenčín")

Coordinates not found for Ul. gen. M. R. Štefánika 426, 911 01 Trenčín


In [4]:
get_coordinates_for_city("gen. M. R. Štefánika 426, 911 01 Trenčín")

(48.8908655, 18.05589508548387)

In [5]:
city1: str = "Bratislava"
city2: str = "Košice"

coord1: Coordinates = get_coordinates_for_city(city1)
coord2: Coordinates = get_coordinates_for_city(city2)

if coord1 and coord2:
    distance = haversine_distance(coord1, coord2)
    print(f"The distance between {city1} and {city2} is {distance:.2f} kilometers.")
else:
    print("Unable to get coordinates for one or both cities.")

The distance between Bratislava and Košice is 309.44 kilometers.


In [6]:
df_cities: pd.DataFrame = pd.read_csv("data/obyvatelia_na_obec.csv")

assert df_cities.shape[0] == 2927, "Number of municipalities doesn't match"
df_cities.head(7)

Unnamed: 0,Kód,Územná jednotka,Spolu,muži (abs.),muži (%),ženy (abs.),ženy (%)
0,SK0101528595,Bratislava - mestská časť Staré Mesto,46080,22210,48.2,23870,51.8
1,SK0102529311,Bratislava - mestská časť Podunajské Biskupice,23464,11193,47.7,12271,52.3
2,SK0102529320,Bratislava - mestská časť Ružinov,81004,37575,46.39,43429,53.61
3,SK0102529338,Bratislava - mestská časť Vrakuňa,20711,9945,48.02,10766,51.98
4,SK0103529346,Bratislava - mestská časť Nové Mesto,44458,20981,47.19,23477,52.81
5,SK0103529354,Bratislava - mestská časť Rača,25733,12717,49.42,13016,50.58
6,SK0103529362,Bratislava - mestská časť Vajnory,6079,3056,50.27,3023,49.73


In [7]:
martinus_website: URL = "https://www.martinus.sk/knihkupectva"

martinus_cities_path: CSSSelector = "div.card__content.mt-none h3"
martinus_streets_path: CSSSelector = "div.card__content.mt-none p:first-of-type"

martinus = BasicScraper(martinus_website)

martinus_cities: List[str] = martinus.scrape(martinus_cities_path)

martinus_cities_clean: List[str] = [
    city.split("-")[0].strip() if "-" in city else city.split("(")[0].strip()
    for city in martinus_cities
]

martinus_streets: List[str] = martinus.scrape(martinus_streets_path)

martinus_streets_clean: List[str] = [
    street.split(",")[1].strip() if "," in street else street.strip()
    for street in martinus_streets
]

assert len(martinus_cities_clean) == len(
    martinus_streets_clean
), "Number of streets does not match number of cities"

assert (
    len(martinus_streets_clean) == 31
), "Scraped number of shops does not match the real number as of 7.1.2024"

In [8]:
martinus_adresses = [
    (city, street)
    for street, city in zip(martinus_streets_clean, martinus_cities_clean)
]

for adress in martinus_adresses:
    print(adress)

('Bratislava', 'Obchodná 26')
('Bratislava', 'Staré Grunty 24')
('Bratislava', 'Autobusová stanica Mlynské Nivy')
('Banská Bystrica', 'Ul. 29 augusta 37')
('Lučenec', 'Námestie republiky 5994/32')
('Zvolen', 'OC Klokan - Obchodná 10325/21')
('Banská Štiavnica', 'Radničné námestie 19')
('Revúca', 'Muránska ulica 1337/16')
('Rimavská Sobota', 'Francisciho 2')
('Košice', 'Toryská 5')
('Košice', 'Hlavná 111')
('Trebišov', 'M. R. Štefánika 2329')
('Levice', 'Ľ. Štúra 1A')
('Nitra', 'Akademická 1/A')
('Nitra', 'Napervillská 4837/5')
('Poprad', 'Námestie svätého Egídia 3290/124')
('Prešov', 'Námestie legionárov 1')
('Humenné', 'Nám. slobody 62')
('Kežmarok', 'Hviezdoslavova 10')
('Sabinov', 'Námestie slobody 32')
('Svidník', 'Sovietskych hrdinov 165/62')
('Považská Bystrica', 'Centrum 8')
('Ilava', 'Moyzesova 1871/123')
('Myjava', 'M.R.Štefánika 929')
('Nové Mesto nad Váhom', 'Weisseho 15/2615')
('Trnava', 'Dolné Bašty 8833/14')
('Liptovský Mikuláš', 'Kamenné pole 4449/3')
('Martin', 'Námesti

In [9]:
df_cities_w_libraries = pd.DataFrame(martinus_adresses)

df_cities_w_libraries.columns = ["City", "Street"]
df_cities_w_libraries.head()

Unnamed: 0,City,Street
0,Bratislava,Obchodná 26
1,Bratislava,Staré Grunty 24
2,Bratislava,Autobusová stanica Mlynské Nivy
3,Banská Bystrica,Ul. 29 augusta 37
4,Lučenec,Námestie republiky 5994/32


In [10]:
def get_coordinates_for_dataframe(df):
    def get_coordinates_for_address(row):
        address = f"{row['Street']} {row['City']}"
        return get_coordinates_for_city(address)

    df["Coordinates"] = df.apply(get_coordinates_for_address, axis=1)
    missing_coordinates = df["Coordinates"].isna().sum().sum()

    return df, missing_coordinates

In [11]:
df_cities_w_libraries, missing_coordinates = get_coordinates_for_dataframe(
    df_cities_w_libraries
)

print(f"Number of missing coordinates: {missing_coordinates}")

df_cities_w_libraries.head()

Coordinates not found for Ul. 29 augusta 37 Banská Bystrica
Coordinates not found for OC Klokan - Obchodná 10325/21 Zvolen
Coordinates not found for Muránska ulica 1337/16 Revúca
Coordinates not found for Stred 157- Srdce Turzovky Turzovka
Number of missing coordinates: 4


Unnamed: 0,City,Street,Coordinates
0,Bratislava,Obchodná 26,"(48.14718, 17.11001)"
1,Bratislava,Staré Grunty 24,"(48.1581106, 17.069674)"
2,Bratislava,Autobusová stanica Mlynské Nivy,"(48.1468327, 17.128482)"
3,Banská Bystrica,Ul. 29 augusta 37,
4,Lučenec,Námestie republiky 5994/32,"(48.3307838, 19.6611366)"


In [12]:
df_cities_w_libraries["Street"] = df_cities_w_libraries["Street"].str.replace(
    r"\bUl\.\s", "", regex=True
)
df_cities_w_libraries["Street"] = df_cities_w_libraries["Street"].str.replace(
    r"\bOC\sKlokan\s-\s", "", regex=True
)
df_cities_w_libraries["Street"] = df_cities_w_libraries["Street"].str.replace(
    r"\bulica\b", "", regex=True
)
df_cities_w_libraries["Street"] = df_cities_w_libraries["Street"].str.replace(
    r"-\sSrdce Turzovky", "", regex=True
)

In [13]:
df_cities_w_libraries, missing_coordinates = get_coordinates_for_dataframe(
    df_cities_w_libraries
)

print(f"Number of missing coordinates: {missing_coordinates}")

df_cities_w_libraries.head()

Number of missing coordinates: 0


Unnamed: 0,City,Street,Coordinates
0,Bratislava,Obchodná 26,"(48.14718, 17.11001)"
1,Bratislava,Staré Grunty 24,"(48.1581106, 17.069674)"
2,Bratislava,Autobusová stanica Mlynské Nivy,"(48.1468327, 17.128482)"
3,Banská Bystrica,29 augusta 37,"(48.7380316, 19.1619753)"
4,Lučenec,Námestie republiky 5994/32,"(48.3307838, 19.6611366)"


In [14]:
def add_marker_to_map(coordinates_list, color, map_obj):
    try:
        for coordinates in coordinates_list:
            if coordinates is None:
                raise ValueError("Coordinates are None. Marker not added.")

            lat, lon = coordinates.latitude, coordinates.longitude

            markers = folium.Marker(
                location=[lat, lon],
                icon=folium.Icon(color=color),
            )
            markers.add_to(map_obj)

        return markers
    except Exception as e:
        raise ValueError(f"Error adding marker: {e}")

In [15]:
map_slovakia = folium.Map(location=[48, 20], zoom_start=8)

add_marker_to_map(df_cities_w_libraries["Coordinates"], "red", map_slovakia)

# map_slovakia

<folium.map.Marker at 0x1e82747a0d0>

![Slovakia Map](data/martinus.png)