In [10]:
import numpy as np
import pandas as pd
import os

In [11]:
data_dir = os.path.join(os.getcwd(), 'data')

fl_costs_dir = os.path.join(data_dir, 'flight_costs.csv')
tr_costs_dir = os.path.join(data_dir, 'train_costs.csv')
fl_time_dir = os.path.join(data_dir, 'flight_time.csv')
tr_time_dir = os.path.join(data_dir, 'train_time.csv')

In [12]:
def get_lower_half_matrix(file_dir):
    file_dir_csv = pd.read_csv(file_dir, header=None)
    file_dir_csv.fillna(1e9, inplace=True)
    file_dir_lower_half: np.ndarray[np.float64] = file_dir_csv.to_numpy()
    return file_dir_lower_half


In [13]:
flight_costs_lower_half = get_lower_half_matrix(fl_costs_dir)

In [14]:
flight_costs_lower_half

array([[0.00e+00, 1.00e+09, 1.00e+09, 1.00e+09],
       [1.23e+02, 0.00e+00, 1.00e+09, 1.00e+09],
       [1.00e+09, 3.21e+02, 0.00e+00, 1.00e+09],
       [2.40e+01, 1.00e+09, 3.10e+01, 0.00e+00]])

In [15]:
def complete_symmetric_matrix(lower_half: np.ndarray, fill_value: float = 0) -> np.ndarray:
    # Create an empty matrix of the same shape as `lower_half`, and fill it with `fill_value`
    symmetric_matrix = np.full_like(lower_half, fill_value)
    
    # Copy the lower half of `lower_half` into the lower half of `symmetric_matrix`
    symmetric_matrix[np.tril_indices_from(
        symmetric_matrix)] = lower_half[np.tril_indices_from(lower_half)]
    
    # Copy the upper half of `symmetric_matrix` into the upper half of `symmetric_matrix`
    symmetric_matrix += symmetric_matrix.T - np.diag(np.diag(symmetric_matrix))

    return symmetric_matrix


def preprocess_data(cities: list, flight_costs_lower_half: np.ndarray,  train_costs_lower_half: np.ndarray, flight_time_lower_half: np.ndarray, train_time_lower_half: np.ndarray, fill_value: float = 1e9) -> tuple:
    flight_costs = complete_symmetric_matrix(
        flight_costs_lower_half, fill_value)
    train_costs = complete_symmetric_matrix(train_costs_lower_half, fill_value)
    flight_time = complete_symmetric_matrix(flight_time_lower_half, fill_value)
    train_time = complete_symmetric_matrix(train_time_lower_half, fill_value)

    flight_df = pd.DataFrame(flight_costs, columns=cities, index=cities)
    train_df = pd.DataFrame(train_costs, columns=cities, index=cities)
    flight_time_df = pd.DataFrame(flight_time, columns=cities, index=cities)
    train_time_df = pd.DataFrame(train_time, columns=cities, index=cities)

    return flight_df, train_df, flight_time_df, train_time_df


In [16]:
complete_symmetric_matrix(flight_costs_lower_half)

array([[0.00e+00, 1.23e+02, 1.00e+09, 2.40e+01],
       [1.23e+02, 0.00e+00, 3.21e+02, 1.00e+09],
       [1.00e+09, 3.21e+02, 0.00e+00, 3.10e+01],
       [2.40e+01, 1.00e+09, 3.10e+01, 0.00e+00]])