In [1]:
import os
import pandas as pd
import sys
from collections import defaultdict
import numpy as np
import time
import datetime
import re
import shutil

sys.path.append('/')
import classify_funcs

## 인자값 입력

### 1) 전처리할 파일들이 있는 디렉토리 경로
### 2) 출력파일 디렉토리 경로

In [1]:
# 데이터 파일 디렉토리 경로
data_path = './data'
# 출력 파일 경로
out_path = './out'

if data_path[-1] == '/':
    data_path = data_path[:-1]
if out_path[-1] == '/':
    out_path = out_path[:-1]

## (사용자 설정 디렉토리 내) 전처리대상 파일 리스트 자동 추출

In [3]:
def recursive_search_dir(_nowDir, _filelist):
    dir_list = []  # 현재 디렉토리의 서브디렉토리가 담길 list
    f_list = os.listdir(_nowDir)
    for fname in f_list:
        if fname == "checked_files" or fname == "result_files":  # 이미 전처리한 폴더는 통과
            continue
        if os.path.isdir(_nowDir + "/" + fname):
            dir_list.append(_nowDir + "/" + fname)
        elif os.path.isfile(_nowDir + "/" + fname):
            file_extension = os.path.splitext(fname)[1]
            if file_extension == ".csv" or file_extension == ".CSV":  # csv
                _filelist.append(_nowDir + "/" + fname)

    for toDir in dir_list:
        recursive_search_dir(toDir, _filelist)
        
csv_list = []
print('CSV 파일 목록 불러오는 중..')
recursive_search_dir(data_path, csv_list)
print('총 CSV파일 수 : {}'.format(len(csv_list)))

CSV 파일 목록 불러오는 중..
총 CSV파일 수 : 10


## 데이터 전처리 및 규격화

In [4]:
def printProgressBar(iteration, total, prefix = 'Progress', suffix = 'Complete',\
                      decimals = 1, length = 50, fill = '█'): 
    # 작업의 진행상황을 표시
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('\r%s |%s| %s%% %s' %(prefix, bar, percent, suffix), end='\r')
    sys.stdout.flush()
    if iteration == total:
        print()


print('\n완속 충전구간 분류 중..')
cnt=0
usecols = list('dev_id|coll_dt|b_soc|b_slow_charg_con_sts|b_fast_charg_con_sts|b_pack_current|b_pack_volt|b_modul_1_temp|b_modul_2_temp|b_modul_3_temp|b_modul_4_temp|car_type|b_accum_charg_power_quan|c_mileage|a_extern_temp_sensor'.split('|'))
for csv_file in csv_list:
    printProgressBar(cnt, len(csv_list))
    classify_funcs.slow_func(csv_file, data_path, out_path, usecols)
    cnt+=1
printProgressBar(cnt, len(csv_list))

print('\n급속 충전구간 분류 중..')
cnt=0
usecols = list('dev_id|coll_dt|b_soc|b_slow_charg_con_sts|b_fast_charg_con_sts|b_pack_current|b_pack_volt|b_modul_1_temp|b_modul_2_temp|b_modul_3_temp|b_modul_4_temp|car_type|b_accum_charg_power_quan|c_mileage|a_extern_temp_sensor'.split('|'))
for csv_file in csv_list:
    printProgressBar(cnt, len(csv_list))
    classify_funcs.fast_func(csv_file, data_path, out_path, usecols)
    cnt+=1
printProgressBar(cnt, len(csv_list))

print('\n방전구간 분류 중..')
cnt=0
usecols = list('dev_id|coll_dt|b_soc|b_slow_charg_con_sts|b_fast_charg_con_sts|b_pack_current|b_pack_volt|b_modul_1_temp|b_modul_2_temp|b_modul_3_temp|b_modul_4_temp|car_type|b_accum_discharg_power_quan|c_mileage|v_car_speed|a_extern_temp_sensor|b_charg_lamp_sts_1|v_accel_pedal_depth'.split('|'))
for csv_file in csv_list:
    printProgressBar(cnt, len(csv_list))
    classify_funcs.discharge_func(csv_file, data_path, out_path, usecols)
    cnt+=1
printProgressBar(cnt, len(csv_list))



완속 충전구간 분류 중..
Progress |██████████████████████████████████████████████████| 100.0% Complete

급속 충전구간 분류 중..
Progress |██████████████████████████████████████████████████| 100.0% Complete

완속 충전구간 분류 중..
Progress |██████████████████████████████████████████████████| 100.0% Complete


## 출력 파일 다운로드

In [5]:
shutil.make_archive('out', 'zip', out_path)
from IPython.display import FileLink
FileLink('out.zip')

In [6]:
# YYYY-mm-dd HH:MM:SS -> epoch
def convertTimeToEpoch(_time):
    date_time = "%s.%s.%s %s:%s:%s" %(_time[8:10], _time[5:7], _time[:4], _time[11:13], _time[14:16], _time[17:19])
    pattern = "%d.%m.%Y %H:%M:%S"
    epoch = int (time.mktime(time.strptime(date_time, pattern)))
    return epoch

file_list = []
recursive_search_dir('./out/fast_charge/', file_list)

charge_statistic_df = pd.DataFrame(columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'charge_quan', 'charge_time', 'charge_speed', 'charge_power_quan', 'mileage', 'current_avg', 'volt_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg'])

cnt = 1
t_cnt = len(file_list)
for csv in file_list:
    printProgressBar(cnt, t_cnt)
    df = pd.read_csv(csv)
    car_id = df.iloc[0]['dev_id']
    car_type = df.iloc[0]['car_type']
    date_start = df.iloc[0]['coll_dt']
    start_soc = df.iloc[0]['b_soc']
    end_soc = df.iloc[-1]['b_soc']
    charge_quan = end_soc - start_soc
    charge_time = convertTimeToEpoch(df.iloc[-1]['coll_dt']) - convertTimeToEpoch(df.iloc[0]['coll_dt'])
    charge_speed = (charge_quan / charge_time) * 3600  # 1시간당 충전 SOC 양
    charge_power_quan = df.iloc[-1]['b_accum_charg_power_quan'] - df.iloc[0]['b_accum_charg_power_quan']
    charge_power_per_hour = (charge_power_quan / charge_time) * 3600  # 1시간당 충전 전력 (kW)
    accum_mileage = df.iloc[0]['c_mileage']
    current_avg = df['b_pack_current'].mean()
    volt_avg = df['b_pack_volt'].mean()
    extern_temp_avg = df['a_extern_temp_sensor'].mean()
    soc_avg = df['b_soc'].mean()
    battery_module1_temp_avg = df['b_modul_1_temp'].mean()

    temperature_avg = df['temperature'].mean()
    wind_speed_avg = df['wind_speed'].mean()
    humidity_avg = df['humidity'].mean()
    rainfall_avg = df['rainfall'].mean()
    snowfall_avg = df['snowfall'].mean()

    traffic_volume_avg = df['traffic_volume'].mean()

    # 대기오염 데이터가 없으면, 해당 필드는 NaN으로 처리
    try:
        air_co_avg = df['air_co'].mean()
        air_no2_avg = df['air_no2'].mean()
        air_o3_avg = df['air_o3'].mean()
        air_pm10_avg = df['air_pm10'].mean()
        air_pm25_avg = df['air_pm25'].mean()
        air_so2_avg = df['air_so2'].mean()
    except:
        air_co_avg = np.nan
        air_no2_avg = np.nan
        air_o3_avg = np.nan
        air_pm10_avg = np.nan
        air_pm25_avg = np.nan
        air_so2_avg = np.nan

    append_row = [car_id, car_type, date_start, start_soc, end_soc, charge_quan, charge_time, charge_speed, charge_power_quan, charge_power_per_hour, accum_mileage, current_avg, volt_avg, extern_temp_avg, soc_avg, battery_module1_temp_avg, temperature_avg, wind_speed_avg, humidity_avg, rainfall_avg, snowfall_avg, traffic_volume_avg, air_co_avg, air_no2_avg, air_o3_avg, air_pm10_avg, air_pm25_avg, air_so2_avg]
    append_row_df = pd.DataFrame([append_row], columns=['car_id', 'car_type', 'date_start', 'start_soc', 'end_soc', 'charge_quan', 'charge_time', 'charge_speed', 'charge_power_quan', 'charge_power_per_hour', 'accum_mileage', 'current_avg', 'volt_avg', 'extern_temp_avg', 'soc_avg', 'battery_module1_temp_avg', 'temperature_avg', 'wind_speed_avg', 'humidity_avg', 'rainfall_avg', 'snowfall_avg', 'traffic_volume_avg', 'air_co_avg', 'air_no2_avg', 'air_o3_avg', 'air_pm10_avg', 'air_pm25_avg', 'air_so2_avg'])
    charge_statistic_df = pd.concat([charge_statistic_df, append_row_df])
    cnt+=1
charge_statistic_df = charge_statistic_df.sort_values(by=['date_start'])
charge_statistic_df.reset_index(drop=True, inplace=True)

charge_statistic_df.to_csv(_file_output, index=False)

Progress |████----------------------------------------------| 9.1% Complete

KeyError: 'temperature'