In [1]:
import os
from glob import glob

from typing import List, Tuple

import numpy as np
import pandas as pd
from pandas import DataFrame

from bs4 import BeautifulSoup as bs

In [2]:
STRATEGIES = ["greedy", "random", "rl_based"]
EXT = "*.deepscenario"

# DATASET_PATH = f'../../DeepScenario/deepscenario-dataset/'
DATASET_PATH = f'C:/Users/Yohannes/Documents/School/Master/Thesis/DeepScenario/deepscenario-dataset/'

## Testing

In [133]:
soup = bs('../data/test.deepscenario', 'xml')



In [3]:
content = None
with open('../data/test.deepscenario', "r") as file:
    content = file.readlines()
content = "".join(content)
bs_content = bs(content, features="xml")

In [11]:
cols = ["speed1", "speed2", "speed3", "speed4", "speed5", "speed6", "av1", "av2", "av3", "av4", "av5", "av6"]
speed = list(map(lambda x: float(x.attrs["speed"]), bs_content.DeepScenario.StoryBoard.Story.ObjectAction.findAll("Speed")))
angular_velocity = list(map(lambda x: [float(x.attrs["angularVelocityX"]), float(x.attrs["angularVelocityY"]), float(x.attrs["angularVelocityZ"])], 
                        bs_content.DeepScenario.StoryBoard.Story.ObjectAction.findAll("AngularVelocity")))
print(speed)
print(angular_velocity)
a = {"0_scenario_0": dict(zip(cols, speed + angular_velocity)) }
a["0_scenario_0"]["road"] = "road4"
a["0_scenario_1"] = a["0_scenario_0"]
pd.DataFrame.from_dict(a, orient="index")

[8.981, 9.099, 7.517, 5.192, 6.303, 7.341]
[[0.004, -0.017, 0.001], [0.0, -0.004, 0.001], [0.005, 0.008, 0.001], [0.003, -0.0, -0.0], [-0.005, -0.528, -0.014], [0.007, 0.327, -0.005]]


Unnamed: 0,speed1,speed2,speed3,speed4,speed5,speed6,av1,av2,av3,av4,av5,av6,road
0_scenario_0,8.981,9.099,7.517,5.192,6.303,7.341,"[0.004, -0.017, 0.001]","[0.0, -0.004, 0.001]","[0.005, 0.008, 0.001]","[0.003, -0.0, -0.0]","[-0.005, -0.528, -0.014]","[0.007, 0.327, -0.005]",road4
0_scenario_1,8.981,9.099,7.517,5.192,6.303,7.341,"[0.004, -0.017, 0.001]","[0.0, -0.004, 0.001]","[0.005, 0.008, 0.001]","[0.003, -0.0, -0.0]","[-0.005, -0.528, -0.014]","[0.007, 0.327, -0.005]",road4


## Reading XML files

In [136]:
# Need : [path, file, reward_type, road, scenario, strategy]
def Load_Files(strategy: str = "greedy") -> List[Tuple[str, str, List[str]]]:
    return [( path, file, 
              file.split("\\")[-1].split(".")[0], # scenario_id
              path.split("/")[-1].split("-")[1].split("\\")[0], path.split("/")[-1].split("-")[1].split("\\")[1], # reward_type, road
              path.split("/")[-1].split("-")[2],
              strategy
            ) 
            for path, subdir, files in os.walk(DATASET_PATH + f'{strategy}-strategy/')
            for file in glob(os.path.join(path, EXT))]

In [137]:
Load_Files()[:3]

[('../../DeepScenario/deepscenario-dataset/greedy-strategy/reward-dto\\road1-rain_day-scenarios',
  '../../DeepScenario/deepscenario-dataset/greedy-strategy/reward-dto\\road1-rain_day-scenarios\\0_scenario_0.deepscenario',
  '0_scenario_0',
  'dto',
  'road1',
  'rain_day',
  'greedy'),
 ('../../DeepScenario/deepscenario-dataset/greedy-strategy/reward-dto\\road1-rain_day-scenarios',
  '../../DeepScenario/deepscenario-dataset/greedy-strategy/reward-dto\\road1-rain_day-scenarios\\0_scenario_1.deepscenario',
  '0_scenario_1',
  'dto',
  'road1',
  'rain_day',
  'greedy'),
 ('../../DeepScenario/deepscenario-dataset/greedy-strategy/reward-dto\\road1-rain_day-scenarios',
  '../../DeepScenario/deepscenario-dataset/greedy-strategy/reward-dto\\road1-rain_day-scenarios\\0_scenario_10.deepscenario',
  '0_scenario_10',
  'dto',
  'road1',
  'rain_day',
  'greedy')]

In [153]:
def Load_Data(strategy: str = "all") -> DataFrame:
    d = {}
    speeds = ["speed1", "speed2", "speed3", "speed4", "speed5", "speed6"]

    try:
        if strategy == "all":
            datafiles = []
            for s in STRATEGIES:
                _files = Load_Files(s)
                print(f"Loaded {len(_files)} files for {s} strategy")
                datafiles.extend(_files) 
        else:
            if strategy not in STRATEGIES:
                raise Exception("Strategy not found")
            datafiles = Load_Files(strategy)
        
        amount = len(datafiles)
        counter = 0
        printed = False
        for path, file, scenario_id, reward, road, scenario, strat in datafiles:
            counter += 1
            with open(file, "r") as f:
                bs_content = bs("".join(f.readlines()), features="xml")
            scenario_speeds = bs_content.DeepScenario.StoryBoard.Story.ObjectAction.findAll("Speed")

            unique_scenario_id = f"{scenario_id}|{road}|{reward}|{scenario}|{strat}"
            d[unique_scenario_id] = dict(zip(speeds, map(lambda x: float(x.attrs["speed"]), scenario_speeds)))
            d[unique_scenario_id]["ScenarioID"] = scenario_id
            d[unique_scenario_id]["road"] = road
            d[unique_scenario_id]["reward"] = reward
            d[unique_scenario_id]["scenario"] = scenario
            d[unique_scenario_id]["strategy"] = strat
            
            check = round((counter/amount), 3) * 100 % 5 == 0
            if not printed and check:
                printed = True
                print(f"Loaded {round((counter/amount)*100)}% of the data.")
            elif not check:
                printed = False
    except Exception as e:
        print(e)
    finally:
        return pd.DataFrame.from_dict(d, orient="index")

def saveDf(df: DataFrame, filename: str, path: str) -> None:
    df.to_csv(f"{path}/{filename}.csv")    

In [154]:
df = Load_Data() # STRATEGIES[0]) # this took about 6 min for only one strategy... 6m50s for everything :)
print(df.shape)
df.head()

Loaded 13251 files for greedy strategy
Loaded 13565 files for random strategy
Loaded 6701 files for rl_based strategy
Loaded 0% of the data.
(1530, 11)


Unnamed: 0,speed1,speed2,speed3,speed4,speed5,speed6,scenario_id,road,reward,scenario,strategy
0_scenario_0|road1|dto|rain_day|greedy,0.25,0.049,1.932,3.753,5.989,7.349,0_scenario_0,road1,dto,rain_day,greedy
0_scenario_1|road1|dto|rain_day|greedy,8.287,8.41,8.767,8.871,8.998,9.143,0_scenario_1,road1,dto,rain_day,greedy
0_scenario_10|road1|dto|rain_day|greedy,4.096,3.067,4.165,3.018,1.954,1.452,0_scenario_10,road1,dto,rain_day,greedy
0_scenario_11|road1|dto|rain_day|greedy,3.469,4.427,5.288,6.688,6.846,6.573,0_scenario_11,road1,dto,rain_day,greedy
0_scenario_12|road1|dto|rain_day|greedy,5.888,4.864,4.589,3.943,4.199,3.441,0_scenario_12,road1,dto,rain_day,greedy


In [None]:
saveDf(df, "dataFromXML", "../data")

Next need to run this for all the strategies ```df = Load_Data()```. <br>
Then save the data as a csv file, so we then can join the data with the other dataframes.
