# Doc1_Data_Processing

In [24]:
import os
import pandas as pd
import numpy as np
from trajtool import TrajTool
tt = TrajTool()

## 1 Data Preperation

### Vehicle specification

In [25]:
veh_info = pd.read_excel('data/veh_info.xlsx')
veh_info

Unnamed: 0,model,brand,class,fuel,shape,weight/kg,width/mm,height/mm,coef_roll,coef_aero,delta_aero_window_open
0,Sylphy,Nissan,A,ICEV,sedan,1210,1760,1503,0.0105,0.32,0.05
1,Corolla,Toyota,A,ICEV,sedan,1335,1780,1455,0.0105,0.32,0.05
2,Avalon,Toyota,B,ICEV,sedan,1570,1850,1450,0.0105,0.32,0.05
3,TiguanL,Volkswagen,SUV,ICEV,SUV,1620,1842,1682,0.0105,0.45,0.06
4,Ei5,Roewe,A,EV,wagon,1560,1818,1543,0.0105,0.3,0.04
5,ModelY,Tesla,SUV,EV,SUV,1911,1921,1624,0.0105,0.23,0.06


### Test records

In [26]:
read_paths = [
    'data/0-records/Sylphy/DRIVER1/',
    'data/0-records/Sylphy/DRIVER2/',
    'data/0-records/Corolla/DRIVER1/',
    'data/0-records/Corolla/DRIVER2/',
    'data/0-records/Ei5/DRIVER1/',
    'data/0-records/Ei5/DRIVER2/',
    'data/0-records/Avalon/DRIVER1/',
    'data/0-records/Avalon/DRIVER2/',
    'data/0-records/TiguanL/DRIVER1/',
    'data/0-records/TiguanL/DRIVER2/',
    'data/0-records/ModelY/DRIVER1/',
    'data/0-records/ModelY/DRIVER2/',
    ]

In [27]:
data_dict = {}
for read_path in read_paths:
    cali_file, meteoro_file, record_file = os.listdir(read_path)
    model, driver = read_path.split('/')[2:4]
    key = model + '-' + driver  # model-driverID
    data_dict[key] = {
        'cali': pd.read_csv(read_path + cali_file, sep=" "),
        'record': pd.read_csv(read_path + record_file, sep=" "),
        'meteo': pd.read_csv(read_path + meteoro_file, sep=" "),
        'model': model,
        'driver': driver
    }

## 2 Processing and Calculation

### Meteorology

In [28]:
def meteo_process(meteo:pd.DataFrame):
    """
    Processing meteorology data.
    """
    meteo = meteo.copy()
    # densify
    meteo = tt.encode_time(meteo, 'UTC', 'sec of day', '%H%M%S', 'sec of day')
    new_index = np.arange(meteo['sec of day'].min(), meteo['sec of day'].max() + 1)
    meteo = meteo.set_index('sec of day')
    meteo = meteo.reindex(new_index, method='pad')
    # select columns
    meteo = meteo[meteo.columns[[1, 2, 3, 4, 6]]]
    meteo.columns = ['temp[C]', 'humd', 'pressure[hPa]', 'v_wind[km/h]', 'd_wind[D]']
    meteo['humd'] = meteo['humd'].apply(lambda x: x/100)
    return meteo

In [29]:
for key in data_dict.keys():
    data_dict[key]['meteo'] = meteo_process(data_dict[key]['meteo'])

### Records

In [30]:
def cal_roll_force(mass, theta, coef_roll=0.0105):
    """
    Calculate the rolling force.
    mass: vehicle mass (kg)
    theta: road elevation angle (degree)
    coef_roll: rolling resistance coefficient
    """
    g = 9.81 # m/s2
    return coef_roll * mass * g * np.cos(np.radians(theta))

def cal_aero_force(p, t, h, v, v_wind, d_wind, tn_v, width, height, coef_aero):
    """
    Calculate the aerodynamic drag.
    p: ambient air pressure (hPa)
    t: ambient air temperature (℃)
    h: relative humidity (0.0-1.0)
    v: vehicle speed (km/h)
    v_wind: wind speed (km/h)
    tn_v: true north of vehicle direction (degree)
    d_wind: direction from which the wind originates (degree)
    width: width of the frontal area of the vehicle (mm)
    height: height of the frontal area of the vehicle (mm)
    coef_aero: coefficient of aerodynamic drag
    """
    p = p * 100 # hPa to Pa
    R_d = 287.058  # specific gas content of dry air, J/kg/K
    R_v = 461.495  # specific gas content of water vapor, J/kg/K
    p_v = h * (0.61078 * np.exp(17.27 * t / (t + 235.3))) * 1000 # water vapor pressure (Pa)
    air_density = p_v / (R_v * (t + 273.15)) + (p - p_v) / (R_d * (t + 273.15)) # kg/m3
    v_w = v_wind * np.cos(np.radians(tn_v) - np.radians(d_wind)) # km/h
    return 0.5 * air_density * coef_aero * (width/1000) * (height/1000) * ((v + v_w) * 5 / 18)**2

def cal_grad_force(mass, theta):
    """
    Calcualte the grading resistance.
    mass: vehicle mass (kg)
    theta: road elevation angle (degree)
    """
    g = 9.81
    return mass * g * np.sin(np.radians(theta))

def cal_acce_force(mass, a):
    """
    Calculate acceleration resistance.
    mass: vehicle mass (kg)
    a: acceleration (m/s2)
    """
    return mass * a

def cal_trac_power(mass, width, height, theta, v, a, tn_v, p, t, h, v_wind, d_wind, coef_aero, coef_roll=0.0105):
    """
    Calculate tractive power.
    ----------
    mass: vehicle mass (kg)
    width: width of the frontal area of the vehicle (m)
    height: height of the frontal area of the vehicle (m)
    theta: road elevation angle (degree)
    v: vehicle speed (km/h)
    a: acceleration (m/s2)
    tn_v: true north of vehicle direction (degree)
    p: ambient air pressure (hPa)
    t: ambient air temperature (℃)
    h: relative humidity (0.0-1.0)
    v_wind: wind speed (km/h)
    d_wind: direction from which the wind originates (degree)
    coef_aero: coefficient of aerodynamic drag
    coef_roll: rolling resistance coefficient
    ----------
    output: tractive power [kW]
    """
    trac_force = cal_roll_force(mass, theta, coef_roll) \
               + cal_aero_force(p, t, h, v, v_wind, d_wind, tn_v, width, height, coef_aero) \
               + cal_grad_force(mass, theta) \
               + cal_acce_force(mass, a)
    return trac_force * v * 5 / 18 / 1000

In [31]:
def record_process(record:pd.DataFrame, meteo:pd.DataFrame, veh:pd.DataFrame):
    """
    Processing trajectory data.
    """
    record = record.copy()
    # clean
    # record = record.iloc[:-60]
    # record = record.iloc[60:]
    record['Digi_output'] = record['Digi_output'].shift(4)
    record.dropna(inplace=True)
    record.reset_index(drop=True, inplace=True)
    # encode time 
    record['UTC'] = record['UTC'].astype('int')
    record['UTC'] = record['UTC'].astype('str')
    record = tt.encode_time(record, 'UTC', 'sec of day', '%H%M%S', 'sec of day')
    record['sec'] = record['sec of day'].apply(lambda x: x-record['sec of day'].min())
    # densify
    record = tt.densify(record, 'Lon', 'Lat', 'sec')
    # smooth
    record = tt.smooth_traj(record, 'Lon', 'Lat', 'sec', 'lon', 'lat', smoothFunc='kal2D', measure_var = 0.000001, process_var=0.000001)
    record = tt.smooth(record, 'Speed', 'speed', smoothFunc='exp')
    record = tt.smooth(record, 'Alt', 'alt', smoothFunc='exp')
    record = tt.smooth(record, 'TNH', 'heading', smoothFunc='exp')
    record = tt.smooth(record, 'Digi_output', 'pad_pressure', smoothFunc='moving')
    record['pad_pressure'] = record['pad_pressure'].apply(np.floor)
    record = record[['sec','sec of day','lon','lat','speed','alt','heading','pad_pressure']]
    # data type transform
    record['sec'] = record['sec'].astype('int16')
    record['speed'] = record['speed'].astype('float16')
    record['alt'] = record['alt'].astype('float16')
    record['heading'] = record['heading'].astype('float16')
    record['pad_pressure'] = record['pad_pressure'].astype('int16')
    # calculate
    record = tt.cal_interval(record, 'lon', 'lat', 'sec', 'd_gap[m]', 't_gap[s]')
    record = tt.cal_grade(record, 'lon', 'lat', 'd_gap[m]', eleCol='alt')
    record = tt.cal_acc(record, 't_gap[s]', 'speed', 'acc[m/s2]')
    record = tt.cal_VSP(record, 'speed', 'acc[m/s2]', 'VSP[kW/t]', 'grade[D]')
    record.dropna(inplace=True)
    record.reset_index(drop=True, inplace=True)
    # data type transform
    record['acc[m/s2]'] = record['acc[m/s2]'].astype('float16')
    record['VSP[kW/t]'] = record['VSP[kW/t]'].astype('float16')
    record['grade[D]'] = record['grade[D]'].astype('float16')
    # merge data
    data = record.set_index('sec of day').join(meteo)
    data.reset_index(inplace=True, names=['sec of day'])
    data.rename(
        columns={
            'speed': 'speed[km/h]',
            'alt': 'alt[m]',
            'heading': 'true_north[D]'
        }, inplace=True)
    # tractive power
    data['trac_power[kW]'] = data.apply(lambda x:
        cal_trac_power(
            veh['weight/kg']+60*2, veh['width/mm'], veh['height/mm'],
            theta=x['grade[D]'], v=x['speed[km/h]'], a=x['acc[m/s2]'], tn_v=x['true_north[D]'],
            p=x['pressure[hPa]'], t=x['temp[C]'], h=x['humd'], v_wind=x['v_wind[km/h]'], d_wind=x['d_wind[D]'],
            coef_aero=veh['coef_aero']+veh['delta_aero_window_open'], coef_roll=veh['coef_roll']
        ),
        axis=1
    )
    data['trac_power[kW]'] = data['trac_power[kW]'].astype('float16')
    # data['roll_power[kW]'] = data.apply(lambda x: cal_roll_force(veh['weight/kg']+60*2, theta=x['grade[D]'], coef_roll=veh['coef_roll']) * x['speed[km/h]'] * 5 / 18 / 1000, axis=1)
    # data['grade_power[kW]'] = data.apply(lambda x: cal_grad_force(veh['weight/kg']+60*2, theta=x['grade[D]']) * x['speed[km/h]'] * 5 / 18 / 1000, axis=1)
    # data['acce_power[kW]'] = data.apply(lambda x: cal_acce_force(veh['weight/kg']+60*2, a=x['acc[m/s2]']) * x['speed[km/h]'] * 5 / 18 / 1000, axis=1)
    # data['aero_power[kW]'] = data.apply(
    #     lambda x: cal_aero_force(
    #         p=x['pressure[hPa]'], t=x['temp[C]'], h=x['humd'], v=x['speed[km/h]'], v_wind=x['v_wind[km/h]'], d_wind=x['d_wind[D]'], tn_v=x['true_north[D]'], width=veh['width/mm'], height=veh['height/mm'], coef_aero=veh['coef_aero']+veh['delta_aero_window_open']
    #     ) * x['speed[km/h]'] * 5 / 18 / 1000,
    #     axis=1
    # )
    return data

In [32]:
for key in data_dict.keys():
    # meteorology data
    meteo = data_dict[key]['meteo']
    # vehicle information
    veh = veh_info[veh_info['model'] == data_dict[key]['model']]
    # trajectory records
    data_dict[key]['record'] = record_process(data_dict[key]['record'], meteo, veh)

- densified length: 3479;
- densified ratio: 112.84%.
- densified length: 3285;
- densified ratio: 114.42%.
- densified length: 3498;
- densified ratio: 114.35%.
- densified length: 3588;
- densified ratio: 113.72%.
- densified length: 3628;
- densified ratio: 114.38%.
- densified length: 3250;
- densified ratio: 114.44%.
- densified length: 3058;
- densified ratio: 114.40%.
- densified length: 3168;
- densified ratio: 106.20%.
- densified length: 3190;
- densified ratio: 112.72%.
- densified length: 3528;
- densified ratio: 113.88%.
- densified length: 3760;
- densified ratio: 114.74%.
- densified length: 3275;
- densified ratio: 114.71%.


## 3 Save

In [38]:
for key in data_dict.keys():
    data_dict[key]['record'].to_csv('data/1-processed/%s-%s.csv' % (data_dict[key]['model'], data_dict[key]['driver']), index=False)