In [None]:
import sys
import os

from joblib import load
import numpy as np

sys.path.append(r'/mnt/c/users/ty90rize/Sumo/tools') # path to sumolib
sys.path.append(r'/mnt/c/users/ty90rize/repos/roadmatching/labelling_tool') # path to labelling tool
import sumolib
import pandas as pd

In [None]:
path_osm_network = 'oktober_data/big_map/osm_sumo_data/2023-11-27-15-41-55/osm.net.xml/osm_final_2024-02-25.net.xml'
path_tomtom_network = 'oktober_data/big_map/tomtom_sumo_data/tomtom_sumo.net.xml'

path_tomtom_db_files = 'oktober_data/big_map/tomtom_raw_data/jobs_3537419_results_FO-10-2023.shapefile'
tomtom_toi = 'fr7_0_11_30-12_30_6.dbf'

path_traffic_count = os.path.join(path_tomtom_db_files, tomtom_toi)

### Custom file

In [None]:
folder_path = '/mnt/c/users/ty90rize/Repos/roadmatching/out_dir' # path where file with similarity calculation is located
bigmap_df = pd.read_json(os.path.join(folder_path, 'preprocessed_df_bigmap_2023-12-13.json'))

In [None]:
subset_cols = [i for i in bigmap_df.columns if i not in ['coords_ls1', 'coords_ls2', 'ls1_orig', 'ls2_orig']]
bigmap_df = bigmap_df[~bigmap_df.duplicated(subset = subset_cols)]

In [None]:
bigmap_df

## Apply roadmatching ML algorithm

In [None]:
model_filepath = 'DecisionTreeModel/dt_model_wo_strokes_231213_200151.joblib'

In [None]:
X = bigmap_df[['sinuosity_sim', 'cosine_sim', 'hausdorff_mod']]

In [None]:
clf = load(model_filepath)

bigmap_df['predicted'] = clf.predict(X)
del X

In [None]:
bigmap_df = bigmap_df[['OSM', 'Tomtom', 'predicted']]
bigmap_df

## If applicable: Overwrite manual corrections

In [None]:
# applicable if there are manually labeled instances that shall be used to overwrite potentially different predictions

In [None]:
def build_ground_truth_df(relative_filedir):
    files = os.listdir(relative_filedir)
    dfs = []
    for file in files:
        filepath = os.path.join(relative_filedir, file)
        df = pd.read_csv(filepath)
        dfs.append(df)
    df = pd.concat(dfs).reset_index(drop = True)
    df['matches'] = 1
    df['OSM'] = df['OSM'].astype('str')
    df['Tomtom'] = df['Tomtom'].astype('str')
    return df

In [None]:
eval_gt = build_ground_truth_df('labelling_tool/data_fin') # correct path if necessary
eval_gt.rename(columns = {'matches':'predicted'}, inplace = True)

In [None]:
bigmap_df = bigmap_df[~bigmap_df['OSM'].isin(eval_gt['OSM'])]

In [None]:
bigmap_df

In [None]:
bigmap_df = pd.concat([bigmap_df, eval_gt]).reset_index(drop = True)
bigmap_df

## Continuing

In [None]:
bigmap_df = bigmap_df[bigmap_df['predicted']==1]

In [None]:
bigmap_df

## Get road counts

In [None]:
%load_ext autoreload
%autoreload 2

from dbfread import DBF

In [None]:
cs10_id = []
cs10_hits = []

for record in DBF(path_traffic_count):
    cs10_id.append(record['CS7_Id'])
    cs10_hits.append(record['CS7_Hits'])  

In [None]:
count_df = pd.DataFrame({'Tomtom_id':cs10_id, 'samples':cs10_hits})
count_df['Tomtom_id'] -= 1
count_df['Tomtom_id'] = count_df['Tomtom_id'].astype('str')
count_df

In [None]:
bigmap_df['Tomtom'] = bigmap_df['Tomtom'].astype('str')

In [None]:
count_df = pd.merge(bigmap_df, count_df, how = 'left', left_on = 'Tomtom', right_on = 'Tomtom_id')
count_df

In [None]:
count_df = count_df[~count_df['samples'].isnull()] # filter out nans

### Remove inconsistencies

In [None]:
consistency_check = count_df[['OSM', 'samples']].groupby('OSM').agg({'mean', np.median})
consistency_check.columns = consistency_check.columns.droplevel(0)

In [None]:
consistency_check['rel_diff'] = abs((consistency_check['median'] - consistency_check['mean'])/consistency_check['mean'])

In [None]:
consistency_check['rel_diff'] = consistency_check.apply(lambda x: 0 if (x['mean']==0 and x['median']==0) else x['rel_diff'],
                                                       axis = 1)

## Create xml and write to file

In [None]:
import xml.etree.cElementTree as ET

ids_of_interest = [1]

root = ET.Element("data")
intervals = dict()
for idx, elem in enumerate(ids_of_interest):
    interval = ET.SubElement(root, "interval", id=str(elem), begin=str(idx*3600), end=str(idx*3600+3599))
    print(interval)
    intervals[str(elem)] = interval

In [None]:
consistency_check['OSM_id'] = consistency_check.index

In [None]:
consistency_check

In [None]:
scale_factor = # put your scale factor here

In [None]:
for idx, row in consistency_check.iterrows():
    interval = intervals[str(ids_of_interest[0])]
    ET.SubElement(interval, "edge", id = row['OSM_id'], entered = str(int(row['median']*scale_factor)))
    
tree = ET.ElementTree(root)
ET.indent(tree, space = "\t", level = 0)
tree.write("map_{}_manually_modified.xml".format(int(scale_factor)))