In [61]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta,timezone
import matplotlib.pyplot as plt

from sklearn.cluster import DBSCAN,KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder,OneHotEncoder

plt.rcParams['font.family'] ='Malgun Gothic'
plt.rcParams['axes.unicode_minus'] =False

In [62]:
df = pd.read_csv('../csv/50area_dummy1_232,000.csv')
time_cols = ['마지막충전종료시간','연결시작시간', '충전시작시간', '충전종료시간', '연결종료시간', '출발예상시간']
evse_cols = ['충전소위치', '충전기이름', '충전기타입','방전지원여부', '예약충전','충전량(kWh)', '요청충전량(kWh)']
dfc = df[time_cols + evse_cols].copy()
for col in time_cols:
    dfc[col] = pd.to_datetime(dfc[col],format='ISO8601')

In [None]:
dfc['요일'] = dfc['연결시작시간'].dt.weekday
dfc['사용하지않은시간'] = dfc['연결시작시간']-dfc['마지막충전종료시간'] #충전기 인기 척도
dfc['사용자예상충전소이용시간'] = dfc['출발예상시간'] - dfc['연결시작시간'] 
dfc['사용자예상충전소요시간차이']= dfc['출발예상시간'] - dfc['충전종료시간'] #음의 값이면 사용자 예상 보다 충전이 더 걸림 즉 사용자가 충전 시간을 잘 이해하지 못함
dfc['실제총충전소이용시간'] = dfc['연결종료시간']-dfc['연결시작시간']
dfc['실제충전시간'] = dfc['충전종료시간']-dfc['충전시작시간']
dfc['충전시작소요시간'] = dfc['충전시작시간']-dfc['연결시작시간'] #충전기 꼽꼬 충전 안하는 사람들 충전소의 충전기 이용 신뢰도에 영향을 주는 요인 *********************
dfc['충전완료후출발소요시간'] = dfc['충전종료시간']- dfc['연결종료시간'] #음의 값이면 더 오래 물고 있엇다. 늦게 플러그를 뽑은 사용자 ***********************************
dfc['실제이용시간과예상출발시간차이'] = dfc['출발예상시간'] - dfc['연결종료시간'] #음의 값이면 더 오래 물고 있엇다. 자신이 입력한 예상 출력 시간 보다 늦게 플러그 뽑은 사용자***************************
dfc['요청충전량차이'] = dfc['요청충전량(kWh)']-dfc['충전량(kWh)'] #음의 값이면 실제보다 더 충전 양의 값이면 덜 충전
dfc['충전량당시간이해관계']= dfc['사용자예상충전소이용시간']/dfc['요청충전량(kWh)'] #사용자가 이만큼이면 이정도 쓸꺼야 즉 사용자가 인식하는 충전에 소요되는 시간
dfc['이용시간당전력사용량'] = dfc['충전량(kWh)'] / (dfc['실제충전시간'].dt.total_seconds() / 3600)

In [64]:
dfc

Unnamed: 0,마지막충전종료시간,연결시작시간,충전시작시간,충전종료시간,연결종료시간,출발예상시간,충전소위치,충전기이름,충전기타입,방전지원여부,예약충전,충전량(kWh),요청충전량(kWh),요일,사용하지않은시간,사용자예상충전소이용시간,사용자예상충전소요시간차이,실제총충전소이용시간,실제충전시간,충전시작소요시간,충전완료후출발소요시간,실제이용시간과예상출발시간차이,요청충전량차이,충전량당시간이해관계,이용시간당전력사용량
0,2019-11-01 00:00:00+00:00,2019-11-03 00:32:00+00:00,2019-11-03 00:36:00+00:00,2019-11-03 00:50:00+00:00,2019-11-03 01:00:00+00:00,2019-11-03 00:55:00+00:00,st-00,st-00_evse-00,FC,n,n,12.83,12.62,6,2 days 00:32:00,0 days 00:23:00,0 days 00:05:00,0 days 00:28:00,0 days 00:14:00,0 days 00:04:00,-1 days +23:50:00,-1 days +23:55:00,-0.21,0 days 00:01:49.350237717,54.985714
1,2019-11-03 01:00:00+00:00,2019-11-03 19:24:00+00:00,2019-11-03 19:46:00+00:00,2019-11-03 19:55:00+00:00,2019-11-03 20:05:00+00:00,2019-11-03 19:54:00+00:00,st-00,st-00_evse-00,FC,n,n,14.00,13.69,6,0 days 18:24:00,0 days 00:30:00,-1 days +23:59:00,0 days 00:41:00,0 days 00:09:00,0 days 00:22:00,-1 days +23:50:00,-1 days +23:49:00,-0.31,0 days 00:02:11.482834185,93.333333
2,2019-11-03 20:05:00+00:00,2019-11-06 09:18:00+00:00,2019-11-06 09:28:00+00:00,2019-11-06 09:48:00+00:00,2019-11-06 09:52:00+00:00,2019-11-06 09:37:00+00:00,st-00,st-00_evse-00,FC,n,n,22.56,22.88,2,2 days 13:13:00,0 days 00:19:00,-1 days +23:49:00,0 days 00:34:00,0 days 00:20:00,0 days 00:10:00,-1 days +23:56:00,-1 days +23:45:00,0.32,0 days 00:00:49.825174825,67.680000
3,2019-11-06 09:52:00+00:00,2019-11-09 08:24:00+00:00,2019-11-09 08:25:00+00:00,2019-11-09 08:34:00+00:00,2019-11-09 08:38:00+00:00,2019-11-09 08:47:00+00:00,st-00,st-00_evse-00,FC,n,n,9.07,8.99,5,2 days 22:32:00,0 days 00:23:00,0 days 00:13:00,0 days 00:14:00,0 days 00:09:00,0 days 00:01:00,-1 days +23:56:00,0 days 00:09:00,-0.08,0 days 00:02:33.503893214,60.466667
4,2019-11-09 08:38:00+00:00,2019-11-12 04:27:00+00:00,2019-11-12 04:56:00+00:00,2019-11-12 05:08:00+00:00,2019-11-12 05:20:00+00:00,2019-11-12 05:36:00+00:00,st-00,st-00_evse-00,FC,n,n,12.48,12.53,1,2 days 19:49:00,0 days 01:09:00,0 days 00:28:00,0 days 00:53:00,0 days 00:12:00,0 days 00:29:00,-1 days +23:48:00,0 days 00:16:00,0.05,0 days 00:05:30.407023144,62.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219695,2024-12-24 06:31:00+00:00,2024-12-24 12:33:00+00:00,2024-12-24 12:51:00+00:00,2024-12-24 13:06:00+00:00,2024-12-24 13:16:00+00:00,NaT,st-49,st-49_evse-04,FC,n,n,13.80,13.42,1,0 days 06:02:00,NaT,NaT,0 days 00:43:00,0 days 00:15:00,0 days 00:18:00,-1 days +23:50:00,NaT,-0.38,NaT,55.200000
219696,2024-12-24 13:16:00+00:00,2024-12-25 08:36:00+00:00,2024-12-25 09:06:00+00:00,2024-12-25 09:14:00+00:00,2024-12-25 09:26:00+00:00,2024-12-25 09:23:00+00:00,st-49,st-49_evse-04,FC,n,y,6.35,6.48,2,0 days 19:20:00,0 days 00:47:00,0 days 00:09:00,0 days 00:50:00,0 days 00:08:00,0 days 00:30:00,-1 days +23:48:00,-1 days +23:57:00,0.13,0 days 00:07:15.185185185,47.625000
219697,2024-12-25 09:26:00+00:00,2024-12-26 23:46:00+00:00,2024-12-26 23:53:00+00:00,2024-12-27 00:15:00+00:00,2024-12-27 00:20:00+00:00,2024-12-27 00:43:00+00:00,st-49,st-49_evse-04,FC,n,y,17.23,17.24,3,1 days 14:20:00,0 days 00:57:00,0 days 00:28:00,0 days 00:34:00,0 days 00:22:00,0 days 00:07:00,-1 days +23:55:00,0 days 00:23:00,0.01,0 days 00:03:18.375870069,46.990909
219698,2024-12-27 00:20:00+00:00,2024-12-28 11:58:00+00:00,2024-12-28 12:27:00+00:00,2024-12-28 12:58:00+00:00,2024-12-28 13:13:00+00:00,2024-12-28 12:47:00+00:00,st-49,st-49_evse-04,FC,n,y,33.83,34.43,5,1 days 11:38:00,0 days 00:49:00,-1 days +23:49:00,0 days 01:15:00,0 days 00:31:00,0 days 00:29:00,-1 days +23:45:00,-1 days +23:34:00,0.60,0 days 00:01:25.390647690,65.477419


In [None]:
#jenkins 등급화 작업

In [None]:
#시간데이터 모두 초 단위로 통일 및 없는 값은 missing으로 매칭
# missing이 의미 있는 컬럼 만 분리 생성해야함
new_time_col = ['사용하지않은시간','사용자예상충전소이용시간','사용자예상충전소요시간차이','실제총충전소이용시간','실제충전시간','충전시작소요시간','충전완료후출발소요시간','실제이용시간과예상출발시간차이','충전량당시간이해관계']
time_cols = time_cols+new_time_col

for col in time_cols:
    if pd.api.types.is_timedelta64_dtype(dfc[col]):
        dfc[f'{col}_ts'] = dfc[col].apply(lambda x: int(x.total_seconds()) if pd.notnull(x) else 0).astype('int64')
    else:
        dfc[f'{col}_ts'] = dfc[col].apply(lambda x: int(x.timestamp()) if pd.notnull(x) else 0).astype('int64')
    dfc[f'{col}_missing'] = dfc[col].isnull().astype(int)

Unnamed: 0,마지막충전종료시간,연결시작시간,충전시작시간,충전종료시간,연결종료시간,출발예상시간,충전소위치,충전기이름,충전기타입,방전지원여부,예약충전,충전량(kWh),요청충전량(kWh),요일,사용하지않은시간,사용자예상충전소이용시간,사용자예상충전소요시간차이,실제총충전소이용시간,실제충전시간,충전시작소요시간,충전완료후출발소요시간,실제이용시간과예상출발시간차이,요청충전량차이,충전량당시간이해관계,이용시간당전력사용량,마지막충전종료시간_ts,마지막충전종료시간_missing,연결시작시간_ts,연결시작시간_missing,충전시작시간_ts,충전시작시간_missing,충전종료시간_ts,충전종료시간_missing,연결종료시간_ts,연결종료시간_missing,출발예상시간_ts,출발예상시간_missing,사용하지않은시간_ts,사용하지않은시간_missing,사용자예상충전소이용시간_ts,사용자예상충전소이용시간_missing,사용자예상충전소요시간차이_ts,사용자예상충전소요시간차이_missing,실제총충전소이용시간_ts,실제총충전소이용시간_missing,실제충전시간_ts,실제충전시간_missing,충전시작소요시간_ts,충전시작소요시간_missing,충전완료후출발소요시간_ts,충전완료후출발소요시간_missing,실제이용시간과예상출발시간차이_ts,실제이용시간과예상출발시간차이_missing,충전량당시간이해관계_ts,충전량당시간이해관계_missing
0,2019-11-01 00:00:00+00:00,2019-11-03 00:32:00+00:00,2019-11-03 00:36:00+00:00,2019-11-03 00:50:00+00:00,2019-11-03 01:00:00+00:00,2019-11-03 00:55:00+00:00,st-00,st-00_evse-00,FC,n,n,12.83,12.62,6,2 days 00:32:00,0 days 00:23:00,0 days 00:05:00,0 days 00:28:00,0 days 00:14:00,0 days 00:04:00,-1 days +23:50:00,-1 days +23:55:00,-0.21,0 days 00:01:49.350237717,54.985714,1572566400,0,1572741120,0,1572741360,0,1572742200,0,1572742800,0,1572742500,0,174720,0,1380,0,300,0,1680,0,840,0,240,0,-600,0,-300,0,109,0
1,2019-11-03 01:00:00+00:00,2019-11-03 19:24:00+00:00,2019-11-03 19:46:00+00:00,2019-11-03 19:55:00+00:00,2019-11-03 20:05:00+00:00,2019-11-03 19:54:00+00:00,st-00,st-00_evse-00,FC,n,n,14.00,13.69,6,0 days 18:24:00,0 days 00:30:00,-1 days +23:59:00,0 days 00:41:00,0 days 00:09:00,0 days 00:22:00,-1 days +23:50:00,-1 days +23:49:00,-0.31,0 days 00:02:11.482834185,93.333333,1572742800,0,1572809040,0,1572810360,0,1572810900,0,1572811500,0,1572810840,0,66240,0,1800,0,-60,0,2460,0,540,0,1320,0,-600,0,-660,0,131,0
2,2019-11-03 20:05:00+00:00,2019-11-06 09:18:00+00:00,2019-11-06 09:28:00+00:00,2019-11-06 09:48:00+00:00,2019-11-06 09:52:00+00:00,2019-11-06 09:37:00+00:00,st-00,st-00_evse-00,FC,n,n,22.56,22.88,2,2 days 13:13:00,0 days 00:19:00,-1 days +23:49:00,0 days 00:34:00,0 days 00:20:00,0 days 00:10:00,-1 days +23:56:00,-1 days +23:45:00,0.32,0 days 00:00:49.825174825,67.680000,1572811500,0,1573031880,0,1573032480,0,1573033680,0,1573033920,0,1573033020,0,220380,0,1140,0,-660,0,2040,0,1200,0,600,0,-240,0,-900,0,49,0
3,2019-11-06 09:52:00+00:00,2019-11-09 08:24:00+00:00,2019-11-09 08:25:00+00:00,2019-11-09 08:34:00+00:00,2019-11-09 08:38:00+00:00,2019-11-09 08:47:00+00:00,st-00,st-00_evse-00,FC,n,n,9.07,8.99,5,2 days 22:32:00,0 days 00:23:00,0 days 00:13:00,0 days 00:14:00,0 days 00:09:00,0 days 00:01:00,-1 days +23:56:00,0 days 00:09:00,-0.08,0 days 00:02:33.503893214,60.466667,1573033920,0,1573287840,0,1573287900,0,1573288440,0,1573288680,0,1573289220,0,253920,0,1380,0,780,0,840,0,540,0,60,0,-240,0,540,0,153,0
4,2019-11-09 08:38:00+00:00,2019-11-12 04:27:00+00:00,2019-11-12 04:56:00+00:00,2019-11-12 05:08:00+00:00,2019-11-12 05:20:00+00:00,2019-11-12 05:36:00+00:00,st-00,st-00_evse-00,FC,n,n,12.48,12.53,1,2 days 19:49:00,0 days 01:09:00,0 days 00:28:00,0 days 00:53:00,0 days 00:12:00,0 days 00:29:00,-1 days +23:48:00,0 days 00:16:00,0.05,0 days 00:05:30.407023144,62.400000,1573288680,0,1573532820,0,1573534560,0,1573535280,0,1573536000,0,1573536960,0,244140,0,4140,0,1680,0,3180,0,720,0,1740,0,-720,0,960,0,330,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219695,2024-12-24 06:31:00+00:00,2024-12-24 12:33:00+00:00,2024-12-24 12:51:00+00:00,2024-12-24 13:06:00+00:00,2024-12-24 13:16:00+00:00,NaT,st-49,st-49_evse-04,FC,n,n,13.80,13.42,1,0 days 06:02:00,NaT,NaT,0 days 00:43:00,0 days 00:15:00,0 days 00:18:00,-1 days +23:50:00,NaT,-0.38,NaT,55.200000,1735021860,0,1735043580,0,1735044660,0,1735045560,0,1735046160,0,0,1,21720,0,0,1,0,1,2580,0,900,0,1080,0,-600,0,0,1,0,1
219696,2024-12-24 13:16:00+00:00,2024-12-25 08:36:00+00:00,2024-12-25 09:06:00+00:00,2024-12-25 09:14:00+00:00,2024-12-25 09:26:00+00:00,2024-12-25 09:23:00+00:00,st-49,st-49_evse-04,FC,n,y,6.35,6.48,2,0 days 19:20:00,0 days 00:47:00,0 days 00:09:00,0 days 00:50:00,0 days 00:08:00,0 days 00:30:00,-1 days +23:48:00,-1 days +23:57:00,0.13,0 days 00:07:15.185185185,47.625000,1735046160,0,1735115760,0,1735117560,0,1735118040,0,1735118760,0,1735118580,0,69600,0,2820,0,540,0,3000,0,480,0,1800,0,-720,0,-180,0,435,0
219697,2024-12-25 09:26:00+00:00,2024-12-26 23:46:00+00:00,2024-12-26 23:53:00+00:00,2024-12-27 00:15:00+00:00,2024-12-27 00:20:00+00:00,2024-12-27 00:43:00+00:00,st-49,st-49_evse-04,FC,n,y,17.23,17.24,3,1 days 14:20:00,0 days 00:57:00,0 days 00:28:00,0 days 00:34:00,0 days 00:22:00,0 days 00:07:00,-1 days +23:55:00,0 days 00:23:00,0.01,0 days 00:03:18.375870069,46.990909,1735118760,0,1735256760,0,1735257180,0,1735258500,0,1735258800,0,1735260180,0,138000,0,3420,0,1680,0,2040,0,1320,0,420,0,-300,0,1380,0,198,0
219698,2024-12-27 00:20:00+00:00,2024-12-28 11:58:00+00:00,2024-12-28 12:27:00+00:00,2024-12-28 12:58:00+00:00,2024-12-28 13:13:00+00:00,2024-12-28 12:47:00+00:00,st-49,st-49_evse-04,FC,n,y,33.83,34.43,5,1 days 11:38:00,0 days 00:49:00,-1 days +23:49:00,0 days 01:15:00,0 days 00:31:00,0 days 00:29:00,-1 days +23:45:00,-1 days +23:34:00,0.60,0 days 00:01:25.390647690,65.477419,1735258800,0,1735387080,0,1735388820,0,1735390680,0,1735391580,0,1735390020,0,128280,0,2940,0,-660,0,4500,0,1860,0,1740,0,-900,0,-1560,0,85,0


In [None]:
#문자열 처리

In [None]:
#수치형 처리

In [None]:
colu = dfc.columns.to_list()
colu

['마지막충전종료시간',
 '연결시작시간',
 '충전시작시간',
 '충전종료시간',
 '연결종료시간',
 '출발예상시간',
 '충전소위치',
 '충전기이름',
 '충전기타입',
 '방전지원여부',
 '예약충전',
 '충전량(kWh)',
 '요청충전량(kWh)',
 '요일',
 '사용하지않은시간',
 '사용자예상충전소이용시간',
 '사용자예상충전소요시간차이',
 '실제총충전소이용시간',
 '실제충전시간',
 '충전시작소요시간',
 '충전완료후출발소요시간',
 '실제이용시간과예상출발시간차이',
 '요청충전량차이',
 '충전량당시간이해관계',
 '이용시간당전력사용량',
 '마지막충전종료시간_ts',
 '마지막충전종료시간_missing',
 '연결시작시간_ts',
 '연결시작시간_missing',
 '충전시작시간_ts',
 '충전시작시간_missing',
 '충전종료시간_ts',
 '충전종료시간_missing',
 '연결종료시간_ts',
 '연결종료시간_missing',
 '출발예상시간_ts',
 '출발예상시간_missing',
 '사용하지않은시간_ts',
 '사용하지않은시간_missing',
 '사용자예상충전소이용시간_ts',
 '사용자예상충전소이용시간_missing',
 '사용자예상충전소요시간차이_ts',
 '사용자예상충전소요시간차이_missing',
 '실제총충전소이용시간_ts',
 '실제총충전소이용시간_missing',
 '실제충전시간_ts',
 '실제충전시간_missing',
 '충전시작소요시간_ts',
 '충전시작소요시간_missing',
 '충전완료후출발소요시간_ts',
 '충전완료후출발소요시간_missing',
 '실제이용시간과예상출발시간차이_ts',
 '실제이용시간과예상출발시간차이_missing',
 '충전량당시간이해관계_ts',
 '충전량당시간이해관계_missing']

In [None]:
#훈련 데이터 분리
x = dfc

In [None]:
dfc