In [1]:
from pathlib import Path
import pandas as pd
from numpy import nan

In [2]:
lines = [
    "cape_flats_line", 
    "central_line",
    "combined_northern_line",
    "northern_line",
    "shuttle_fish_hoek_simons_town",
    "southern_line",
]
files = ["Mon-Fri.txt", "Saturday.txt"]

In [3]:
def get_movement(departure_station: str, arrival_station: str, outbound_ordered_stations: list[str]) -> str:
    # convert stations to lowercase
    stations = [station.lower() for station in outbound_ordered_stations]
    # get the index of the departure station
    departure_index = stations.index(departure_station.lower())
    # get the index of the arrival station
    arrival_index = stations.index(arrival_station.lower())

    # determine if the movement is inbound or outbound
    if departure_index > arrival_index:
        return "inbound"
    elif departure_index < arrival_index:
        return "outbound"
    else:
        return "stationary"

In [4]:
for line_dir in lines:
    for file in files:
        data_path = Path(f"{line_dir}/{file}")
        with open(data_path, "r") as f:
            data = f.readlines()

        # Remove the newline character
        data = [line.strip("\n") for line in data]
        
        # Cleanup the data
        clean = []
        for i, line in enumerate(data):
            if line.startswith("Train"):
                train_no = int(data[i].replace("Train No: ", ""))
                departure_station = data[i+1][:-6]
                departure_time = data[i+1][-5:]
                arrival_station = data[i+2][:-6]
                arrival_time = data[i+2][-5:]
                clean.append({
                    "train": train_no,
                    "departure_station": departure_station,
                    "departure_time": departure_time,
                    "arrival_station": arrival_station,
                    "arrival_time": arrival_time,
                })
        
        # Extract stations and train numbers
        stations = open(f"{line_dir}/stations.txt", "r").read().split("\n")
        
        # Extract train numbers
        trains = []
        for item in clean:
            if item['train'] not in trains:
                trains.append(item['train'])

        # Prepare output
        inbound = { station: {} for station in stations}
        outbound = { station: {} for station in stations}

        for station in stations:
            for train_no in trains:
                movement = "outbound"
                for item in clean:
                    # determine if the movement is inbound or outbound
                    movement = get_movement(item["departure_station"], item["arrival_station"], stations)
                    if movement == "inbound":
                        if item["arrival_station"] == station and item["train"] == train_no:
                            inbound[station][train_no] = item["arrival_time"]
                            break
                    elif movement == "outbound":
                        if item["arrival_station"] == station and item["train"] == train_no:
                            outbound[station][train_no] = item["arrival_time"]
                            break
                else:
                    inbound[station][train_no] = ""
                    outbound[station][train_no] = ""

        # Write output
        with open(Path(f"./{line_dir}/{file.rstrip(".txt")}_inbound.json"), "w") as f:
            f.write(str(inbound))

        # convert to a pandas dataframe
        csv_out = pd.DataFrame(inbound).replace("", nan).dropna(how='all').transpose()
        # reverse order of rows
        csv_out = csv_out.iloc[::-1]
        csv_out.to_csv(Path(f"./{line_dir}_{file.rstrip('.txt')}_inbound.csv"))

        # Write output
        with open(Path(f"./{line_dir}/{file.rstrip(".txt")}_outbound.json"), "w") as f:
            f.write(str(outbound))

        # convert to a pandas dataframe
        csv_out = pd.DataFrame(outbound).replace("", nan).dropna(how='all').transpose()
        # reverse order of rows
        csv_out.to_csv(Path(f"./{line_dir}_{file.rstrip('.txt')}_outbound.csv"))

  csv_out = pd.DataFrame(inbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(outbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(inbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(outbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(inbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(outbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(inbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(outbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(inbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(outbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(inbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFrame(outbound).replace("", nan).dropna(how='all').transpose()
  csv_out = pd.DataFra