In [None]:
import pandas as pd
import unicodedata

def norm_city(s: str) -> str:
    if s is None:
        return ""
    s = str(s).strip()
    s = unicodedata.normalize("NFKC", s)
    return s.casefold()

input_csv = "../data/macedonia_travel_time_matrix.csv"

df_raw = pd.read_csv(input_csv, index_col=0)

df_raw.index.name = None

print("Shape:", df_raw.shape)
df_raw.head()


Shape: (75, 75)


Unnamed: 0,Skopje,Kumanovo,Bitola,Prilep,Tetovo,Štip,Veles,Ohrid,Strumica,Gostivar,...,Zelenikovo,Petrovec,Želino,Star Dojran,Studeničani,Bogovinje,Ilinden,Lipkovo,Makedonska Kamenica,Resen
Skopje,0.0,37.7,167.5,120.0,41.3,61.4,58.8,180.4,125.8,63.5,...,36.5,22.2,36.7,150.4,26.9,52.6,20.1,41.6,102.3,196.1
Kumanovo,36.7,0.0,164.0,116.5,64.4,54.9,55.3,203.5,119.3,86.6,...,36.2,20.6,59.8,146.9,38.9,75.7,24.0,16.3,95.7,192.6
Bitola,157.2,152.9,0.0,52.9,175.9,152.4,122.5,80.9,189.2,153.6,...,144.5,136.5,179.1,180.8,149.9,168.6,146.6,165.7,193.2,36.8
Prilep,108.9,104.6,53.0,0.0,135.4,104.1,74.2,125.7,140.9,137.2,...,96.2,88.2,130.8,132.5,101.6,146.8,98.3,117.5,144.9,81.6
Tetovo,43.2,66.5,176.2,148.4,0.0,90.2,87.2,145.3,154.6,28.4,...,66.7,50.6,11.2,178.8,63.6,14.7,48.9,66.3,131.1,187.8


In [12]:
def get_all_nm_cities() -> List[str]:
    return [
            "Berovo","Bitola","Bogdanci","Debar","Delčevo","Demir Hisar","Demir Kapija",
            "Gevgelija","Gostivar","Kavadarci","Kičevo","Kočani","Kratovo","Kriva Palanka",
            "Kruševo","Kumanovo","Makedonska Kamenica","Makedonski Brod","Negotino","Ohrid",
            "Pehčevo","Prilep","Probištip","Radoviš","Resen","Skopje","Struga","Strumica",
            "Sveti Nikole","Štip","Tetovo","Valandovo","Veles","Vinica"
        ]

cities = get_all_nm_cities()
cities_norm: Set[str] = set(norm_city(c) for c in cities)

print("Cities loaded:", len(cities))
print(cities[:10], "...")


Cities loaded: 34
['Berovo', 'Bitola', 'Bogdanci', 'Debar', 'Delčevo', 'Demir Hisar', 'Demir Kapija', 'Gevgelija', 'Gostivar', 'Kavadarci'] ...


In [None]:
df = df_raw.copy()

df.index = df.index.astype(str).str.strip()
df.columns = df.columns.astype(str).str.strip()

row_keep = [r for r in df.index if norm_city(r) in cities_norm]
col_keep = [c for c in df.columns if norm_city(c) in cities_norm]

df_filtered = df.loc[row_keep, col_keep].copy()

df_filtered.index.name = None

print("Filtered shape:", df_filtered.shape)
df_filtered.head()


Filtered shape: (34, 34)


Unnamed: 0,Skopje,Kumanovo,Bitola,Prilep,Tetovo,Štip,Veles,Ohrid,Strumica,Gostivar,...,Radoviš,Kruševo,Makedonski Brod,Demir Kapija,Bogdanci,Demir Hisar,Valandovo,Pehčevo,Makedonska Kamenica,Resen
Skopje,0.0,37.7,167.5,120.0,41.3,61.4,58.8,180.4,125.8,63.5,...,91.4,169.8,152.8,92.9,128.0,174.0,122.7,137.4,102.3,196.1
Kumanovo,36.7,0.0,164.0,116.5,64.4,54.9,55.3,203.5,119.3,86.6,...,84.9,166.3,160.8,89.5,124.5,170.5,119.2,130.9,95.7,192.6
Bitola,157.2,152.9,0.0,52.9,175.9,152.4,122.5,80.9,189.2,153.6,...,176.4,69.5,95.9,123.3,158.4,32.3,153.1,228.4,193.2,36.8
Prilep,108.9,104.6,53.0,0.0,135.4,104.1,74.2,125.7,140.9,137.2,...,128.1,50.9,45.4,75.0,110.1,59.5,104.8,180.1,144.9,81.6
Tetovo,43.2,66.5,176.2,148.4,0.0,90.2,87.2,145.3,154.6,28.4,...,120.2,158.6,117.7,121.3,156.4,144.1,151.1,166.2,131.1,187.8


In [None]:
output_csv = "../data/macedonia_travel_time_matrix_all_cities_only.csv"

df_filtered.to_csv(output_csv, index=True, index_label="")

output_csv


'../data/macedonia_travel_time_matrix_all_cities_only.csv'