In [1]:
import os
import pandas as pd
import sys
import time
from datetime import datetime
import time
from collections import defaultdict
import numpy as np

## 인자값 입력
### 1) 완속충전구간 데이터파일이 있는 디렉토리 경로
### 2) 급속충전구간 데이터파일이 있는 디렉토리 경로
### 3) 방전구간 데이터파일이 있는 디렉토리 경로

In [2]:
_file_input_path1 = './data/slow_charge'
_file_input_path2 = './data/fast_charge'
_file_input_path3 = './data/discharge'

In [3]:

# YYYY-mm-dd HH:MM:SS -> epoch
def convertTimeToEpoch(_time):
    date_time = "%s.%s.%s %s:%s:%s" %(_time[8:10], _time[5:7], _time[:4], _time[11:13], _time[14:16], _time[17:19])
    pattern = "%d.%m.%Y %H:%M:%S"
    epoch = int (time.mktime(time.strptime(date_time, pattern)))
    return epoch


def printProgressBar(iteration, total, prefix = 'Progress', suffix = 'Complete',\
                      decimals = 1, length = 50, fill = '█'): 
    # 작업의 진행상황을 표시
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('\r%s |%s| %s%% %s' %(prefix, bar, percent, suffix), end='\r')
    sys.stdout.flush()
    if iteration == total:
        print()


def recursive_search_dir(_nowDir, _filelist, _form = 'csv'):
    dir_list = []  # 현재 디렉토리의 서브디렉토리가 담길 list
    if _nowDir[-1] == '/':
        _nowDir = _nowDir[0:-1]
    f_list = os.listdir(_nowDir)
    for fname in f_list:
        if os.path.isdir(_nowDir + "/" + fname):
            dir_list.append(_nowDir + "/" + fname)
        elif os.path.isfile(_nowDir + "/" + fname):
            file_extension = os.path.splitext(fname)[1]
            if file_extension == '.' + _form.lower() or file_extension == '.' + _form.upper():  # csv
                _filelist.append(_nowDir + "/" + fname)

    for toDir in dir_list:
        recursive_search_dir(toDir, _filelist, _form)

In [4]:
file_list = []
recursive_search_dir(_file_input_path1, file_list)

charge_statistic_df = pd.DataFrame(columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'charge_quan', 'charge_time', 'charge_speed', 'charge_power_quan', 'mileage', 'current_avg', 'volt_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg'])

cnt = 1
t_cnt = len(file_list)
for csv in file_list:
    printProgressBar(cnt, t_cnt)
    df = pd.read_csv(csv)
    car_id = df.iloc[0]['dev_id']
    car_type = df.iloc[0]['car_type']
    date_start = df.iloc[0]['coll_dt']
    start_soc = df.iloc[0]['b_soc']
    end_soc = df.iloc[-1]['b_soc']
    charge_quan = end_soc - start_soc
    charge_time = convertTimeToEpoch(df.iloc[-1]['coll_dt']) - convertTimeToEpoch(df.iloc[0]['coll_dt'])
    charge_speed = (charge_quan / charge_time) * 3600  # 1시간당 충전 SOC 양
    charge_power_quan = df.iloc[-1]['b_accum_charg_power_quan'] - df.iloc[0]['b_accum_charg_power_quan']
    charge_power_per_hour = (charge_power_quan / charge_time) * 3600  # 1시간당 충전 전력 (kW)
    accum_mileage = df.iloc[0]['c_mileage']
    current_avg = df['b_pack_current'].mean()
    volt_avg = df['b_pack_volt'].mean()
    extern_temp_avg = df['a_extern_temp_sensor'].mean()
    soc_avg = df['b_soc'].mean()
    battery_module1_temp_avg = df['b_modul_1_temp'].mean()

    temperature_avg = df['temperature'].mean()
    wind_speed_avg = df['wind_speed'].mean()
    humidity_avg = df['humidity'].mean()
    rainfall_avg = df['rainfall'].mean()
    snowfall_avg = df['snowfall'].mean()

    traffic_volume_avg = df['traffic_volume'].mean()

    # 대기오염 데이터가 없으면, 해당 필드는 NaN으로 처리
    try:
        air_co_avg = df['air_co'].mean()
        air_no2_avg = df['air_no2'].mean()
        air_o3_avg = df['air_o3'].mean()
        air_pm10_avg = df['air_pm10'].mean()
        air_pm25_avg = df['air_pm25'].mean()
        air_so2_avg = df['air_so2'].mean()
    except:
        air_co_avg = np.nan
        air_no2_avg = np.nan
        air_o3_avg = np.nan
        air_pm10_avg = np.nan
        air_pm25_avg = np.nan
        air_so2_avg = np.nan

    append_row = [car_id, car_type, date_start, start_soc, end_soc, charge_quan, charge_time, charge_speed, charge_power_quan, charge_power_per_hour, accum_mileage, current_avg, volt_avg, extern_temp_avg, soc_avg, battery_module1_temp_avg, temperature_avg, wind_speed_avg, humidity_avg, rainfall_avg, snowfall_avg, traffic_volume_avg, air_co_avg, air_no2_avg, air_o3_avg, air_pm10_avg, air_pm25_avg, air_so2_avg]
    append_row_df = pd.DataFrame([append_row], columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'charge_quan', 'charge_time', 'charge_speed', 'charge_power_quan', 'charge_power_per_hour', 'accum_mileage', 'current_avg', 'volt_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg', 'temperature_avg', 'wind_speed_avg', 'humidity_avg', 'rainfall_avg', 'snowfall_avg', 'traffic_volume_avg', 'air_co_avg', 'air_no2_avg', 'air_o3_avg', 'air_pm10_avg', 'air_pm25_avg', 'air_so2_avg'])
    charge_statistic_df = pd.concat([charge_statistic_df, append_row_df])
    cnt+=1
charge_statistic_df = charge_statistic_df.sort_values(by=['date_start'])
charge_statistic_df.reset_index(drop=True, inplace=True)

charge_statistic_df

Progress |██████████████████████████████████████████████████| 100.0% Complete


Unnamed: 0,car_id,car_type,date_start,start_soc,end_soc,charge_quan,charge_time,charge_speed,charge_power_quan,mileage,...,humidity_avg,rainfall_avg,snowfall_avg,traffic_volume_avg,air_co_avg,air_no2_avg,air_o3_avg,air_pm10_avg,air_pm25_avg,air_so2_avg
0,M1200607457,IONIQ5,2022-01-05 19:28:00.000,36.5,48.5,12.0,5393,8.010384,9.1,,...,43.288889,,,,,,,,,
1,M1200607457,IONIQ5,2022-01-05 23:58:09.000,72.0,90.0,18.0,8492,7.630711,15.2,,...,53.205189,,,,,,,,,


In [5]:
file_list = []
recursive_search_dir(_file_input_path2, file_list)

charge_statistic_df = pd.DataFrame(columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'charge_quan', 'charge_time', 'charge_speed', 'charge_power_quan', 'mileage', 'current_avg', 'volt_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg'])

cnt = 1
t_cnt = len(file_list)
for csv in file_list:
    printProgressBar(cnt, t_cnt)
    df = pd.read_csv(csv)
    car_id = df.iloc[0]['dev_id']
    car_type = df.iloc[0]['car_type']
    date_start = df.iloc[0]['coll_dt']
    start_soc = df.iloc[0]['b_soc']
    end_soc = df.iloc[-1]['b_soc']
    charge_quan = end_soc - start_soc
    charge_time = convertTimeToEpoch(df.iloc[-1]['coll_dt']) - convertTimeToEpoch(df.iloc[0]['coll_dt'])
    charge_speed = (charge_quan / charge_time) * 3600  # 1시간당 충전 SOC 양
    charge_power_quan = df.iloc[-1]['b_accum_charg_power_quan'] - df.iloc[0]['b_accum_charg_power_quan']
    charge_power_per_hour = (charge_power_quan / charge_time) * 3600  # 1시간당 충전 전력 (kW)
    accum_mileage = df.iloc[0]['c_mileage']
    current_avg = df['b_pack_current'].mean()
    volt_avg = df['b_pack_volt'].mean()
    extern_temp_avg = df['a_extern_temp_sensor'].mean()
    soc_avg = df['b_soc'].mean()
    battery_module1_temp_avg = df['b_modul_1_temp'].mean()

    temperature_avg = df['temperature'].mean()
    wind_speed_avg = df['wind_speed'].mean()
    humidity_avg = df['humidity'].mean()
    rainfall_avg = df['rainfall'].mean()
    snowfall_avg = df['snowfall'].mean()

    traffic_volume_avg = df['traffic_volume'].mean()

    # 대기오염 데이터가 없으면, 해당 필드는 NaN으로 처리
    try:
        air_co_avg = df['air_co'].mean()
        air_no2_avg = df['air_no2'].mean()
        air_o3_avg = df['air_o3'].mean()
        air_pm10_avg = df['air_pm10'].mean()
        air_pm25_avg = df['air_pm25'].mean()
        air_so2_avg = df['air_so2'].mean()
    except:
        air_co_avg = np.nan
        air_no2_avg = np.nan
        air_o3_avg = np.nan
        air_pm10_avg = np.nan
        air_pm25_avg = np.nan
        air_so2_avg = np.nan

    append_row = [car_id, car_type, date_start, start_soc, end_soc, charge_quan, charge_time, charge_speed, charge_power_quan, charge_power_per_hour, accum_mileage, current_avg, volt_avg, extern_temp_avg, soc_avg, battery_module1_temp_avg, temperature_avg, wind_speed_avg, humidity_avg, rainfall_avg, snowfall_avg, traffic_volume_avg, air_co_avg, air_no2_avg, air_o3_avg, air_pm10_avg, air_pm25_avg, air_so2_avg]
    append_row_df = pd.DataFrame([append_row], columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'charge_quan', 'charge_time', 'charge_speed', 'charge_power_quan', 'charge_power_per_hour', 'accum_mileage', 'current_avg', 'volt_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg', 'temperature_avg', 'wind_speed_avg', 'humidity_avg', 'rainfall_avg', 'snowfall_avg', 'traffic_volume_avg', 'air_co_avg', 'air_no2_avg', 'air_o3_avg', 'air_pm10_avg', 'air_pm25_avg', 'air_so2_avg'])
    charge_statistic_df = pd.concat([charge_statistic_df, append_row_df])
    cnt+=1
charge_statistic_df = charge_statistic_df.sort_values(by=['date_start'])
charge_statistic_df.reset_index(drop=True, inplace=True)

charge_statistic_df

Progress |██████████████████████████████████████████████████| 100.0% Complete


Unnamed: 0,car_id,car_type,date_start,start_soc,end_soc,charge_quan,charge_time,charge_speed,charge_power_quan,mileage,...,humidity_avg,rainfall_avg,snowfall_avg,traffic_volume_avg,air_co_avg,air_no2_avg,air_o3_avg,air_pm10_avg,air_pm25_avg,air_so2_avg
0,M2210703926,IONIQ5,2022-01-02 17:21:23.000,54.5,89.0,34.5,2375,52.294737,28.4,,...,44.846939,,,,,,,,,
1,M2210703775,IONIQ5,2022-01-10 07:54:25.000,9.5,23.0,13.5,591,82.233503,10.2,,...,82.866667,,,,,,,,,
2,M2210703914,IONIQ5,2022-01-10 12:08:46.000,67.5,78.0,10.5,341,110.85044,8.8,,...,60.0,,,,,,,,,
3,M2210703926,IONIQ5,2022-01-10 21:57:43.000,36.0,52.5,16.5,921,64.495114,12.9,,...,64.795699,0.0,,,,,,,,
4,M2210703914,IONIQ5,2022-01-12 09:26:09.000,28.5,64.5,36.0,2392,54.180602,28.2,,...,53.075,,0.2,,,,,,,
5,M1200607327,IONIQ5,2022-01-13 10:59:46.000,25.0,67.0,42.0,1821,83.031301,33.3,,...,45.021978,,,,,,,,,
6,M2210703775,IONIQ5,2022-01-13 11:26:07.000,3.0,10.0,7.0,301,83.72093,4.9,,...,45.0,,,,,,,,,
7,M2211123122,IONIQ5,2022-01-21 11:43:29.000,67.0,86.5,19.5,1189,59.041211,12.8,,...,53.789474,,,,,,,,,
8,M2211123122,IONIQ5,2022-01-26 16:11:10.000,26.0,88.0,62.0,2000,111.6,40.3,,...,40.0,,,,,,,,,
9,M2211123122,IONIQ5,2022-02-04 11:07:51.000,25.0,82.0,57.0,2387,85.965647,36.2,,...,38.0,,,,,,,,,


In [6]:
file_list = []
recursive_search_dir(_file_input_path3, file_list)

discharge_statistic_df = pd.DataFrame(columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'discharge_quan', 'discharge_time', 'discharge_speed', 'discharge_power_quan', 'mileage', 'km_per_kWh', 'current_avg', 'volt_avg', 'car_speed_avg', 'accel_pedal_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg'])

cnt = 1
t_cnt = len(file_list)
for csv in file_list:
    printProgressBar(cnt, t_cnt)
    df = pd.read_csv(csv)
    car_id = df.iloc[0]['dev_id']
    car_type = df.iloc[0]['car_type']
    date_start = df.iloc[0]['coll_dt']
    start_soc = df.iloc[0]['b_soc']
    end_soc = df.iloc[-1]['b_soc']
    discharge_quan = start_soc - end_soc
    discharge_time = convertTimeToEpoch(df.iloc[-1]['coll_dt']) - convertTimeToEpoch(df.iloc[0]['coll_dt'])
    discharge_speed = (discharge_quan / discharge_time) * 3600  # 1시간당 방전 SOC 양
    discharge_power_quan = df.iloc[-1]['b_accum_discharg_power_quan'] - df.iloc[0]['b_accum_discharg_power_quan']
    discharge_power_per_hour = (discharge_power_quan / discharge_time) * 3600  # 1시간당 방전 전력 (kW)
    if df.iloc[-1]['c_mileage'] != 0 and df.iloc[0]['c_mileage'] != 0:
        mileage = df.iloc[-1]['c_mileage'] - df.iloc[0]['c_mileage']
        if mileage >= 10000 or mileage <= 0: # 1) 충전 없이 10,000km 이상 방전한 경우는 오류로 판단, 2) 방전 중 주행거리가 0 이하일 경우 오류로 판단
            mileage = float('nan')
        else:
            accum_mileage = df.iloc[0]['c_mileage'] + (mileage / 2)

    else:
        mileage = float('nan')
    km_per_kWh = mileage / discharge_power_quan

    current_avg = df['b_pack_current'].mean()
    volt_avg = df['b_pack_volt'].mean()

    filtered_df = df['v_car_speed']
    car_speed_avg = filtered_df.mean()
    if car_speed_avg < 0 or car_speed_avg > 180:
        print(car_speed_avg)
        print(filtered_df[filtered_df['v_car_speed'] < 0 or filtered_df['v_car_speed'] > 180])
        break

    try:
        accel_pedal_avg = df['v_accel_pedal_depth'].mean()
    except:
        # 22년 4월 데이터에서, v_accel_pedal_depth 필드 없음.. 필드가 없을 경우, 해당 필드는 NaN으로 처리
        accel_pedal_avg = np.nan

    extern_temp_avg = df['a_extern_temp_sensor'].mean()
    soc_avg = df['b_soc'].mean()
    battery_module1_temp_avg = df['b_modul_1_temp'].mean()

    temperature_avg = df['temperature'].mean()
    wind_speed_avg = df['wind_speed'].mean()
    humidity_avg = df['humidity'].mean()
    rainfall_avg = df['rainfall'].mean()
    snowfall_avg = df['snowfall'].mean()

    traffic_volume_avg = df['traffic_volume'].mean()

    # 대기오염 데이터가 없으면, 해당 필드는 NaN으로 처리
    try:
        air_co_avg = df['air_co'].mean()
        air_no2_avg = df['air_no2'].mean()
        air_o3_avg = df['air_o3'].mean()
        air_pm10_avg = df['air_pm10'].mean()
        air_pm25_avg = df['air_pm25'].mean()
        air_so2_avg = df['air_so2'].mean()
    except:
        air_co_avg = np.nan
        air_no2_avg = np.nan
        air_o3_avg = np.nan
        air_pm10_avg = np.nan
        air_pm25_avg = np.nan
        air_so2_avg = np.nan

    append_row = [car_id, car_type, date_start, start_soc, end_soc, discharge_quan, discharge_time, discharge_speed, discharge_power_quan, discharge_power_per_hour, accum_mileage, mileage, km_per_kWh, current_avg, volt_avg, car_speed_avg, accel_pedal_avg, extern_temp_avg, soc_avg, battery_module1_temp_avg, temperature_avg, wind_speed_avg, humidity_avg, rainfall_avg, snowfall_avg, traffic_volume_avg, air_co_avg, air_no2_avg, air_o3_avg, air_pm10_avg, air_pm25_avg, air_so2_avg]
    append_row_df = pd.DataFrame([append_row], columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'discharge_quan', 'discharge_time', 'discharge_speed', 'discharge_power_quan', 'discharge_power_per_hour', 'accum_mileage', 'mileage', 'km_per_kWh', 'current_avg', 'volt_avg', 'car_speed_avg', 'accel_pedal_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg', 'temperature_avg', 'wind_speed_avg', 'humidity_avg', 'rainfall_avg', 'snowfall_avg', 'traffic_volume_avg', 'air_co_avg', 'air_no2_avg', 'air_o3_avg', 'air_pm10_avg', 'air_pm25_avg', 'air_so2_avg'])
    discharge_statistic_df = pd.concat([discharge_statistic_df, append_row_df])
    cnt+=1
discharge_statistic_df = discharge_statistic_df.sort_values(by=['date_start'])
discharge_statistic_df.reset_index(drop=True, inplace=True)
discharge_statistic_df

Progress |██████████████████████████████████████████████████| 100.0% Complete


Unnamed: 0,car_id,car_type,date_start,start_soc,end_soc,discharge_quan,discharge_time,discharge_speed,discharge_power_quan,mileage,...,humidity_avg,rainfall_avg,snowfall_avg,traffic_volume_avg,air_co_avg,air_no2_avg,air_o3_avg,air_pm10_avg,air_pm25_avg,air_so2_avg
0,M2210703914,IONIQ5,2022-01-05 19:31:31.000,50.5,19.0,31.5,4978,22.780233,24.4,118.0,...,43.291577,,,,,,,,,
1,M2210703926,IONIQ5,2022-01-10 09:44:23.000,60.0,50.5,9.5,5393,6.341554,9.8,36.0,...,77.437037,,,,,,,,,
2,M2210703914,IONIQ5,2022-01-10 10:29:23.000,83.0,64.0,19.0,3081,22.200584,16.7,70.0,...,74.359223,,,,,,,,,
3,M2210703926,IONIQ5,2022-01-10 21:06:08.000,43.5,36.0,7.5,3012,8.964143,8.4,,...,58.0,0.0,,,,,,,,
4,M2210703914,IONIQ5,2022-01-11 21:23:12.000,72.5,47.5,25.0,2982,30.181087,19.9,86.0,...,54.521739,,0.2,,,,,,,
5,M2210703914,IONIQ5,2022-01-11 22:33:15.000,40.0,32.5,7.5,1500,18.0,6.1,,...,56.0,,0.2,,,,,,,
6,M2210703926,IONIQ5,2022-01-12 09:05:24.000,29.5,22.5,7.0,3582,7.035176,6.4,24.0,...,53.481894,,0.2,,,,,,,
7,M2210703926,IONIQ5,2022-01-13 10:59:34.000,39.0,33.5,5.5,2992,6.617647,5.1,20.0,...,45.02,,,,,,,,,
8,M1200607457,IONIQ5,2022-01-15 12:16:48.000,83.5,72.0,11.5,2942,14.07206,11.3,57.0,...,41.244068,,,,,,,,,
9,M2211123122,IONIQ5,2022-01-18 10:32:37.000,40.0,32.0,8.0,1189,24.222035,5.2,26.0,...,51.0,,0.4,,,,,,,
