In [1]:
#
# Copyright (C) 2024 by Sonja Filiposka <sonja.filiposka@finki.ukim.mk>
#
# This code is licensed under a Creative Commons Attribution 4.0 International License. (see LICENSE.txt for details)
#
# [TODO]General Description - this notebook is used to extract the delay and the handover information from OMNET output vector file
# It creates two types of output files: 
#    - mobile network communication delay as reported by OMNET
#      - a csv file with the delay for each communication exchange
#    - initial positioning and migration files that are used as input for CloudSim 
#       - initial positioning file defines the start and end time for each car/service and the initial community 
#         based on location of tha car and the location of the base stations (see comments in code)
#       - migration file defines the time stamp when a car moves from one community to another based on relative location to the nearest base station
#

In [2]:
def dataset_sumo(vector_sumo, output):

    # parameters
    # 1 = $vector_sumo - sumo input.file name
    # 2 = $output      - output.file name

    # sumo log format:
    #   <timestep time="0.00">
    #     <vehicle id="0" x="-0.482437" y="38.344131" angle="339.66" type="DEFAULT_VEHTYPE" speed="5.10" pos="5.10" lane="23036317#1_0" slope="0.00" signals="0"/>
    !grep -n 'time=' $vector_sumo | tr -d \" > sumo_times.txt
    !grep -n 'vehicle' $vector_sumo | tr -d \" > sumo_veh.txt

    cols=['row','veh_id','x','y','angle','speed','pos','lane','slope','signals']
    data = vx.read_csv("sumo_veh.txt", sep=' ', header=None,
            names=['row','1','2','3','4','5','6','7','8','veh_id','x','y','angle','type','speed','pos','lane','slope','signals'], 
            usecols=cols,
            convert=True, chunk_size=150_000_000)    
    #print(data.shape, data)

    # clean vehicle's file
    !rm sumo_veh.txt 
    
    data['veh_id'] = data['veh_id'].str.replace('id=', '').str.replace('"', '')
    data['x'] = data["x"].str.replace('x=', '')
    data['y'] = data["y"].str.replace('y=', '')
    data['angle'] = data["angle"].str.replace('angle=', '')
    data['speed'] = data["speed"].str.replace('speed=', '')
    data['pos'] = data["pos"].str.replace('pos=', '')
    data['lane'] = data["lane"].str.replace('lane=', '')
    data['slope'] = data["slope"].str.replace('slope=', '')
    data['signals'] = data["signals"].str.replace('signals=', '').str.replace('/>', '') 
    data['row'] = data["row"].str.replace(':', '').astype('int')
           
    data_t = pd.read_csv("sumo_times.txt", sep=' ', header=None,
            names=['row','1','2','3','4','time'], 
            usecols=['row','time'])
    data_t['time'] = data_t['time'].str.replace('time=', '').str.replace('>', '').astype('float')
    data_t['row'] = data_t['row'].str.replace(':', '').astype('int')  
    # check
    #print(data_t,data_t.shape,data_t.row.dtype, data_t.time.dtype)
    data_du = data["veh_id"].nunique()
    print('\n(1) unique:',data_du)

    # clean times file
    !rm sumo_times.txt

    # difference of <timestep time=... rows
    # 3:  1 row of <vehicle id= ...
    # 4:  2 rows of <vehicle id= ...
    # ....
    # 90: 88 rows of <vehicle id= ...
    data_t['row_diff'] = data_t.row.diff()
    print(data_t, data.shape)    
    
    # build the dictionary
    d = {}
     
    # get times
    for idx, row in data_t.iterrows():
        i = row.row_diff - 2
        row_ant = data_t.iloc[idx-1].row; time_ant = data_t.iloc[idx-1].time
        row_this = data_t.iloc[idx].row;  time = data_t.iloc[idx].time
        while (i > 0):
            d[int(row_ant + i)] = time_ant
            i = i - 1
        # if last row
        if idx == (data_t.shape[0]-1):
            i = row.row_diff - 2
            while (i > 0):
                d[int(row_this + i)] = time
                i = i - 1
        # a way to save memory, otherwise it overflows
        gap = 50000
        if (idx % gap == gap-1):
            label = str(int(idx/gap))
            data['t' + label] = data.row.map(d, default_value = 0.0)    
            d = {}
        
    # get the rest of 'time' values
    label = str(int(idx/gap))
    data['t' + label]  = data.row.map(d, default_value = 0.0)      

    # merge time columns (max 4 columns for 1800s)
    data['t'] = data.t0 + data.t1 + data.t2 + data.t3        
    
    data = data[['t','veh_id','x','y','angle','speed','pos','lane','slope','signals']]
    data_du = data["veh_id"].nunique()
    print(data, '\n(2) unique:',data_du)

    data.export_csv(output, index=False, sep='\t')
    
    del data_t, data
    
    return 1 

In [3]:
def dataset_omnet(vector_omnet, output):

    # parameters
    # 1 = $vector_omnet - omnet input.file name
    # 2 = $output       - output.file name
        
    # First prepare the output file from OMNET, vector-0.vec to suit version 2.0
    !split -d -b 500m $vector_omnet
    !sed -i 's/version 3/version 2/' x00
    !cat x* > vector-1.vec
    # clean
    !rm x*
    # requires to have netperfmeter installed: sudo apt-get install netperfmeter
    !extractvectors vector-1.vec results.bz2 "distance" \
        "measuredSinrDl" "measuredSinrUl" "rcvdSinrDl" "averageCqiDl" \
        "servingCell" "rlcDelayDl" "rlcPacketLossTotal" "rlcPduDelayDl" \
        "rlcPduPacketLossDl" "rlcPduThroughputDl" "rlcThroughputDl"\
        "receivedPacketFromLowerLayer" 

    # get data file: results (it deletes results.bz2)
    if os.path.isfile('results'):
        !rm results
    !bzip2 -d results.bz2
    
    # removing unnecessary information
    !grep 'car' results > r_cars
    !sed -i -e 's/NRSeveralBSALC.//' -e 's/:vector//'  -e 's/ ETV//' \
        -e 's/(packetBytes)//' r_cars
    
    # clean
    !rm vector-1.vec 
    
    cols=['Time','Event','Object','Vector','Split','Value']
    data = vx.read_csv("r_cars", sep='\t', header=None,
        #names=['#','Time','Event','Object','Vector','Split','Value'],
        names=['#','Time','Event','Object','Vector','Value'],
        usecols=['Time','Object','Vector','Value'],
        convert=True, chunk_size=150_000_000) 
    print(data)

    #data = data['#'].drop
    data.Object = data.Object.str.replace('"', '')
    data.Vector = data.Vector.str.replace('"', '')    

    data.export_csv(output, index=False, sep='\t')
    
    # clean
    !rm results r_cars* 
    del data
    
    return 1 


In [4]:
#with pandas: for files smaller than 1 GB (and faster than vaex)
def process_dataset_sumo_future(s_input, output):
    df_pd = pd.read_csv(s_input, sep='\t', header=None,
            names=['t','veh_id','x','y','angle','speed','pos','lane','slope','signals']) 
            #header=1)
    print(df_pd.head())
    print(df_pd['veh_id'])
    df_pd=df_pd.tail(-1)

    
    # with pandas: it is ok if input file is smaller than 1 GB
    df_pd['x1'] = df_pd.groupby('veh_id')['x'].shift(-100).fillna(999999)
    df_pd['y1'] = df_pd.groupby('veh_id')['y'].shift(-100).fillna(999999)
    df_pd['x2'] = df_pd.groupby('veh_id')['x'].shift(-200).fillna(999999)
    df_pd['y2'] = df_pd.groupby('veh_id')['y'].shift(-200).fillna(999999)
    df_pd['x3'] = df_pd.groupby('veh_id')['x'].shift(-300).fillna(999999)
    df_pd['y3'] = df_pd.groupby('veh_id')['y'].shift(-300).fillna(999999)
    df_pd['x4'] = df_pd.groupby('veh_id')['x'].shift(-400).fillna(999999)
    df_pd['y4'] = df_pd.groupby('veh_id')['y'].shift(-400).fillna(999999)
    df_pd['x5'] = df_pd.groupby('veh_id')['x'].shift(-500).fillna(999999)
    df_pd['y5'] = df_pd.groupby('veh_id')['y'].shift(-500).fillna(999999)
    df_pd['x6'] = df_pd.groupby('veh_id')['x'].shift(-600).fillna(999999)
    df_pd['y6'] = df_pd.groupby('veh_id')['y'].shift(-600).fillna(999999)
    df_pd['x7'] = df_pd.groupby('veh_id')['x'].shift(-700).fillna(999999)
    df_pd['y7'] = df_pd.groupby('veh_id')['y'].shift(-700).fillna(999999)
    print(df_pd.shape, df_pd)

    df_pd.to_csv(output, index=False, sep='\t')

    return 1 


In [5]:
#with pandas: for files smaller than 1 GB (and faster than vaex)
def process_dataset_sumo_past(s_input, output):
    df_pd = pd.read_csv(s_input, sep='\t', header=None,
            names=['t','veh_id','x','y','angle','speed','pos','lane','slope','signals']) 
            #header=1)
    print(df_pd.shape)
    print(df_pd['veh_id'])
    df_pd=df_pd.tail(-1)
    print(df_pd.head())
    
    # with pandas: it is ok if input file is smaller than 1 GB
    df_pd['x-1'] = df_pd.groupby('veh_id')['x'].shift(100).fillna(999999)
    df_pd['y-1'] = df_pd.groupby('veh_id')['y'].shift(100).fillna(999999)
    df_pd['x-2'] = df_pd.groupby('veh_id')['x'].shift(200).fillna(999999)
    df_pd['y-2'] = df_pd.groupby('veh_id')['y'].shift(200).fillna(999999)
    df_pd['x-3'] = df_pd.groupby('veh_id')['x'].shift(300).fillna(999999)
    df_pd['y-3'] = df_pd.groupby('veh_id')['y'].shift(300).fillna(999999)
    df_pd['x-4'] = df_pd.groupby('veh_id')['x'].shift(400).fillna(999999)
    df_pd['y-4'] = df_pd.groupby('veh_id')['y'].shift(400).fillna(999999)
    df_pd['x-5'] = df_pd.groupby('veh_id')['x'].shift(500).fillna(999999)
    df_pd['y-5'] = df_pd.groupby('veh_id')['y'].shift(500).fillna(999999)
    df_pd['x-6'] = df_pd.groupby('veh_id')['x'].shift(600).fillna(999999)
    df_pd['y-6'] = df_pd.groupby('veh_id')['y'].shift(600).fillna(999999)
    df_pd['x-7'] = df_pd.groupby('veh_id')['x'].shift(700).fillna(999999)
    df_pd['y-7'] = df_pd.groupby('veh_id')['y'].shift(700).fillna(999999)
    print(df_pd.shape, df_pd)

    df_pd.to_csv(output, index=False, sep='\t')

    return 1 


In [None]:
# measuring execution time
%load_ext autotime

# extract the delay and the handover information from omnet output vector file
maxTime = 1800
communities = 9

#parametrised calls of notebook
import numpy as np
import pandas as pd
import vaex as vx
import os.path

path="/home/jupyter/notebook/OMNET6.0/"
sumo_files = "dataset_AI_input/fdc_signals_"
omnet_files = "VoipDl-Urban-xxxx/VoipDl-Urban-"
#cars = np.array([4928, 4951, 4955, 5712, 5734, 5749, 6900, 6908, 6923, 8589, 8619, 8620])
cars = np.array([8589, 8619, 8620])
out = "dataset_AI_output/"
outFile = "_AI.csv"
# initialPositioning-xxxx.txt and migrations-xxxx.txt will also be.ipynb_checkpoints/created

for i in cars:
    # sumo datasets are already created (based on fdc files)
    # info about cars positioning and parameters over time
    # obtained from sumo with commands:
    # sumo -c Alicante_8620.sumo.cfg --fcd-output.geo true --fcd-output.signals true --fcd-output ../fdc_signals_8620.xml --end 1800
    v_sumo = path + sumo_files + str(i) +  ".xml"
    output = out + str(i) + "_sumo" + outFile
    #print("STEP 1) sumo input : ", v_sumo)
    #print("STEP 1) sumo output : ", output)
    # get sumo dataset
    #df_sumo_exit_code = dataset_sumo(v_sumo, output)

    # process sumo dataset to get future positions
    #v_sumo = output
    #output = out + str(i) + "_sumo_1" + outFile
    #print("STEP 2) sumo input : ", v_sumo)
    #print("STEP 2) sumo output : ", output)
    #df_sumo_exit_code = process_dataset_sumo_future(v_sumo, output)
    # check
    #print(df_sumo_exit_code)
    
    # process sumo dataset to get past positions
    v_sumo = output
    output = out + str(i) + "_sumo_2" + outFile
    print("STEP 2) sumo input : ", v_sumo)
    print("STEP 2) sumo output : ", output)
    df_sumo_exit_code = process_dataset_sumo_past(v_sumo, output)
    # check
    #print(df_sumo_exit_code)

STEP 2) sumo input :  dataset_AI_output/8589_sumo_AI.csv
STEP 2) sumo output :  dataset_AI_output/8589_sumo_2_AI.csv


  df_pd = pd.read_csv(s_input, sep='\t', header=None,


(20679146, 10)
0           veh_id
1                0
2                0
3                0
4                0
             ...  
20679141       888
20679142       962
20679143       972
20679144       975
20679145       999
Name: veh_id, Length: 20679146, dtype: object
      t veh_id          x          y  angle speed   pos        lane slope  \
1   0.0      0  -0.485544  38.341737  39.92  0.00  5.10  33199229_0  0.00   
2  0.01      0  -0.485544  38.341737  39.92  0.03  5.10  33199229_0  0.00   
3  0.02      0  -0.485544  38.341737  39.92  0.05  5.10  33199229_0  0.00   
4  0.03      0  -0.485544  38.341737  39.92  0.08  5.10  33199229_0  0.00   
5  0.04      0  -0.485544  38.341737  39.92  0.10  5.10  33199229_0  0.00   

  signals  
1       0  
2       8  
3       8  
4       8  
5       0  
(20679145, 24) 