# Create folders with the city name inside data/processed

In [70]:
import os
import re
from typing import List

In [71]:
RAW_DATA_DIR = "../../data/raw/instances"
PROCESSED_DATA_DIR = "../../data/processed"

In [72]:
notebook_dir = os.getcwd()

In [73]:
raw_data_path = os.path.abspath(os.path.join(notebook_dir, RAW_DATA_DIR))
processed_data_path = os.path.abspath(os.path.join(notebook_dir, PROCESSED_DATA_DIR))

In [74]:
def extract_city_name(file_name: str):
    pattern = r"^\d+([A-Za-z]+)\d+\.txt$"
    match = re.match(pattern, file_name)
    if not match:
        return None
    return match.group(1)

In [75]:
def export_city_names(path: str) -> List[str]:
    cities: List[str] = []
    for file_name in os.listdir(path):
        if file_name.endswith(".txt"):
            city = extract_city_name(file_name=file_name)
            if city:
                cities.append(city)
    return cities

In [76]:
def return_unique_cities(cities: List[str]) -> List[str]:
    return list(set(cities))

In [77]:
def create_folders_for_cities(cities: List[str]):
    if len(cities) == 0:
        raise ValueError("Cities list cannot be empty!")
    unique_cities = return_unique_cities(cities)
    for city in unique_cities:
        city_dir = os.path.join(processed_data_path, city)
        os.makedirs(city_dir, exist_ok=True)

In [78]:
cities = export_city_names(raw_data_path)
create_folders_for_cities(cities)