<a href="https://colab.research.google.com/github/p25-c4/kerly_project/blob/main/03_18_sy_%EB%8D%B0%EC%9D%B4%ED%84%B0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# 한글폰트 설정
mpl.rc("font",family = "NanumGothic")
mpl.rc("axes", unicode_minus = False)

# Data load

In [None]:
items = pd.read_csv("/home/piai/bigdata/on_items.csv")
items.head()
orders = pd.read_csv("/home/piai/bigdata/on_orders.csv")
orders.head()
users = pd.read_csv("/home/piai/bigdata/on_users.csv",encoding='cp949')
users.head()

Unnamed: 0,idUser,Gender,Age,FamilyCount,MemberYN
0,U10001,여성,26,2,Y
1,U10002,남성,61,2,Y
2,U10003,여성,34,2,Y
3,U10004,남성,26,1,N
4,U10005,여성,33,3,Y


## orders 에 Delay_YN 열 추가

In [None]:
# OrderDT와 DeliveryDT 타입 변경
orders['OrderDT'] = pd.to_datetime(orders['OrderDT'], format='%d%b%Y:%H:%M:%S')
orders['DeliveryDT'] = pd.to_datetime(orders['DeliveryDT'], format='%d%b%Y:%H:%M:%S')
# DeliveryDT가 오전 7시보다 늦으면 'Y', 이전이면 'N'인 파생 변수 생성
orders['Delay_YN'] = orders['DeliveryDT'].dt.hour >= 7
orders['Delay_YN'] = orders['Delay_YN'].map({True: 1, False: 0}) # 지연: 1, 지연X: 0

orders.head()

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN
0,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M12-S0070-1113,17030,2021-12-04 03:26:00,0
1,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M15-S0140-1247,7680,2021-12-04 03:26:00,0
2,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M23-S0580-1047,12420,2021-12-04 03:26:00,0
3,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M12-S0640-1010,6250,2021-12-04 03:26:00,0
4,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M17-S0130-1018,14450,2021-12-04 03:26:00,0


## users에 Agegroup 추가

In [None]:
## users에 Agegroup 추가
# Age 열을 기준으로 2030/4060
bins = [19,39,69]  # 연령대 경계값
labels = ['2030s','4060s']  # 각 연령대에 해당하는 라벨
users['AgeGroup'] = pd.cut(users['Age'], bins=bins, labels=labels, right=True)

In [None]:
# AgeGroup object로 변경
users['AgeGroup'] = users['AgeGroup'].astype("object")
users['AgeGroup']

0       2030s
1       4060s
2       2030s
3       2030s
4       2030s
        ...  
2995    4060s
2996    2030s
2997    2030s
2998    2030s
2999    2030s
Name: AgeGroup, Length: 3000, dtype: object

# DataFrame 합치기

In [None]:
print(users.shape, items.shape, orders.shape)

(3000, 6) (11019, 11) (1043272, 7)


In [None]:
# orders + items
items_NoPrice = items.iloc[:,:-3]  # 중복 제거를 위해 items에서 Price 관련 변수들 제거
order_item = pd.merge(orders, items_NoPrice, on='ItemCode', how='left') # orders + items
order_item = order_item.drop_duplicates(ignore_index= True) # 중복행 제거
order_item.shape

(1043272, 14)

In [None]:
order_user_item = pd.merge(order_item, users, on='idUser', how='left') # order_item + users
order_user_item.shape

(1043272, 19)

In [None]:
order_user_item.isnull().sum()

idUser            0
idOrder           0
OrderDT           0
ItemCode          0
Price             0
DeliveryDT        0
Delay_YN          0
ItemLargeCode     0
ItemLargeName     0
ItemMiddleCode    0
ItemMiddleName    0
ItemSmallCode     0
ItemSmallName     0
ItemName          0
Gender            0
Age               0
FamilyCount       0
MemberYN          0
AgeGroup          0
dtype: int64

## not_food 처리

In [None]:
# list of not food --> 시간 오래 걸렸음
not_food = ["냄비", "기계", "디스펜서", "퍼즐", "냉장고", "밀폐용기",
            "액자", "수동 반죽펴기", "채칼", "캔따기", "모자",
            "카지노","용기","케이스","만두통", "바람개비", "레깅스", "수박 나무라고요",
            "스텐 상추바구니 2호 야채 튀김 과일 바스켓 채반",
            "스테인레스 상추바구니 1호 메쉬바구니 소쿠리 채반",
            "스텐 타공상추바구니 2호","수박과도 大 (10개묶음) 0151","양상추 1cm 샌들 슬리퍼  JG6 (7157830)",
            "후르츠커버포켓크로스백(수박)", "양상추 여름 블라우스 JAP60 (7271217)", "양상추 여자 슬랙스 BF33 (7416269)",
            "양상추 여자 여름샌들 JK131 (7162818)", "MT124 A4 참외색 매직터치 두성 180g 10매입 X ( 3세트 )",
            "에이프릴래빗 양상추 스트링 빅포켓 숏야상점퍼 MA0906 (6660376)", "사랑에 빠진 과일토퍼 - 참외",
            "[총알배송] 제오닉 화채 수박통 8L 채반 믹싱볼 샐러드볼 판촉물 답례품 사은",
            "익사이팅 컬러링토이 유화물감 스포츠 스포츠 참외",
            "[하프클럽/한샘]SOK 프레시 여름 사각 수박용기 4L 2P",
            "참외 모형 1P (소)", "양상추키우기 7종", "양상추키우기 7종 채소키우기", "나 채소",
            "초 가을 계절 개성 캐쥬얼 참외 껍질 쿨링 여성", "22511712S3488857739", "타공 상추 바구니 2호 과일채소 씻어 빠른건조 주방",
            "OBSESSION 100g150g200g250g300g Kabura Bullet 금속 지그 헤드 오징어 어시", "나혼자키운다(대파)",
            "밀폐용기 42호 3L 2P세트 수박보관통 과일 대파 냉장고수납 야채 냉동실정리", "양파 대파 감자 마늘 생강 조개 과일망 메시 메쉬 그물망 일자형",
            "기타보관용기 지혜 냉동만두 수납함 다층 물만두 전용", "냉장고박스 Shujiabao.만두케이스 냉동만두 다층", "(5개묶음)까기쉬운 삼덕 밤가위 색상랜덤발송 밤까기",
            "낮과 밤을 만들어요 5명 set", "달의 위상과 지구의 낮과 밤 5인용", "반고흐 패브릭 포스터 별이빛나는밤 M", "밤깍는 생활 생밤칼 주방","별이 빛나는 밤에 반지 폭3mm 전체925실버",
            "별이 빛나는 밤에 반지 폭3mm 전체925실버", "코르크메모지 액자(산타는 밤에 와요) 5인용 집콕놀이", "통조림 캔따개 황도 참치 꽁치 따개 캔 오프너 업소용",
            "황도12궁풀러렌축구공 10인용", "그린 고사리 부쉬 조화", "그린 고사리설악초 페이퍼바스켓세트 조화 실크플라워", "인조나무 인테리어 조화 화분 솔향고사리 2P SET",
            "기타보관용기 만두통 냉동만두 가정용 급속냉동", "냉장고박스 만두케이스 계란 신선보존 훈툰만두 냉동",
            "냉장고박스 만두케이스 냉동만두 가정용 급속냉동", "냉장고박스 만두케이스 냉동만두 다층 가정용 급속냉동 혼돈",
            "쟁반 급속냉동 반달형 물만두 수납함 냉동만두", "정리함 만두통 냉동만두 전용 물만두 받침판 냉장고","초대형 물고기 미끼 20cm 80g 대형 포퍼 지깅 도구 저크 베이트 낚시 루어 태",
            "뇨끼판 뇨끼 보드 파스타 반죽 국수 면 만드는 도구", "클레어 파스타집게 다용도 샐러드 면요리용 조리도구"
            ]
len(not_food)

68

In [None]:
# ItemName에 not_food의 값이 포함된 경우 추출
not_food_order_item = order_user_item[order_user_item['ItemName'].apply(lambda x: any(item for item in not_food if item in x))]
print(not_food_order_item.shape)
not_food_order_item.head()

(24561, 19)


Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,ItemMiddleName,ItemSmallCode,ItemSmallName,ItemName,Gender,Age,FamilyCount,MemberYN,AgeGroup
103,U10001,U10001-O2021-1021,2021-09-11 08:18:12,L4-M12-S0430-1008,1320,2021-09-13 05:56:00,0,L4,신선식품,M12,과일,S0430,수박,DIY614 수박바람개비만들기,여성,26,2,Y,2030s
107,U10001,U10001-O2021-1022,2021-08-31 17:06:38,L1-M15-S0140-1357,26770,2021-09-01 06:36:00,0,L1,가공식품,M15,냉동,S0140,냉동,쟁반 급속냉동 반달형 물만두 수납함 냉동만두,여성,26,2,Y,2030s
186,U10001,U10001-O2022-1009,2022-09-28 18:40:30,L4-M12-S0560-1016,13780,2022-09-29 05:41:00,0,L4,신선식품,M12,과일,S0560,참외,초 가을 계절 개성 캐쥬얼 참외 껍질 쿨링 여성 22511712S3488857739,여성,26,2,Y,2030s
284,U10001,U10001-O2023-1003,2023-11-29 19:40:33,L1-M11-S0280-1051,26940,2023-11-30 06:58:00,0,L1,가공식품,M11,곡물,S0280,밤,낮과 밤을 만들어요 5명 set,여성,26,2,Y,2030s
295,U10001,U10001-O2023-1004,2023-12-06 15:25:42,L4-M12-S0430-1013,22900,2023-12-07 06:32:00,0,L4,신선식품,M12,과일,S0430,수박,[하프클럽/한샘]SOK 프레시 여름 사각 수박용기 4L 2P,여성,26,2,Y,2030s


    - 총 24357 개(총 주문 데이터의 약 2%)
    - 분석 대상이 아님 --> 제외

In [None]:
# items 에서 not_food 제거
order_user_item = order_user_item.drop(index = not_food_order_item.index)

## Datetime 추가

In [None]:
order_user_item['Date'] = order_user_item['OrderDT'].dt.strftime('%Y-%m-%d')

## Year, Month, Day, Hour 추가

In [None]:
# OrderDT 열을 datetime 객체로 변환
order_user_item['OrderDT'] = pd.to_datetime(order_user_item['OrderDT'], format='%d%b%Y:%H:%M:%S')

# datetime 객체에서 년을 추출하여 새로운 컬럼 Month에 저장
order_user_item['Year'] = order_user_item['OrderDT'].dt.year
# datetime 객체에서 월을 추출하여 새로운 컬럼 Month에 저장
order_user_item['Month'] = order_user_item['OrderDT'].dt.month
# datetime 객체에서 일을 추출하여 새로운 컬럼 Month에 저장
order_user_item['Day'] = order_user_item['OrderDT'].dt.day
# datetime 객체에서 시를 추출하여 새로운 컬럼 Month에 저장
order_user_item['Hour'] = order_user_item['OrderDT'].dt.hour
order_user_item.head()

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,...,Gender,Age,FamilyCount,MemberYN,AgeGroup,Date,Year,Month,Day,Hour
0,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M12-S0070-1113,17030,2021-12-04 03:26:00,0,L1,가공식품,M12,...,여성,26,2,Y,2030s,2021-12-03,2021,12,3,20
1,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M15-S0140-1247,7680,2021-12-04 03:26:00,0,L1,가공식품,M15,...,여성,26,2,Y,2030s,2021-12-03,2021,12,3,20
2,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M23-S0580-1047,12420,2021-12-04 03:26:00,0,L1,가공식품,M23,...,여성,26,2,Y,2030s,2021-12-03,2021,12,3,20
3,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M12-S0640-1010,6250,2021-12-04 03:26:00,0,L4,신선식품,M12,...,여성,26,2,Y,2030s,2021-12-03,2021,12,3,20
4,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M17-S0130-1018,14450,2021-12-04 03:26:00,0,L4,신선식품,M17,...,여성,26,2,Y,2030s,2021-12-03,2021,12,3,20


## ItemName list col 생성

In [None]:
# ItemName 소문자로 변경
order_user_item['ItemName'] = order_user_item['ItemName'].apply(lambda x: x.lower())

In [None]:
import re

# 불용문자 목록
stop_words = ["&amp;", "&times;"]

# 불용어와 특수문자를 처리한 데이터를 데이터프레임에 새로운 열로 추가
order_user_item['CleanedItemName'] = order_user_item['ItemName'].apply(lambda name: re.sub(r"[?)(\[-\]-/~_:<>=#]",' ',name))
order_user_item['CleanedItemName'] = order_user_item['CleanedItemName'].apply(lambda name: re.sub('|'.join(map(re.escape, stop_words)), ' ', name))
order_user_item.head()

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,...,Age,FamilyCount,MemberYN,AgeGroup,Date,Year,Month,Day,Hour,CleanedItemName
0,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M12-S0070-1113,17030,2021-12-04 03:26:00,0,L1,가공식품,M12,...,26,2,Y,2030s,2021-12-03,2021,12,3,20,이츠웰 후르츠칵테일 과일통조림 3k 슬라이스
1,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M15-S0140-1247,7680,2021-12-04 03:26:00,0,L1,가공식품,M15,...,26,2,Y,2030s,2021-12-03,2021,12,3,20,베지가든 수제교자만두 매콤한 김치맛 720g 360gx2개입
2,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M23-S0580-1047,12420,2021-12-04 03:26:00,0,L1,가공식품,M23,...,26,2,Y,2030s,2021-12-03,2021,12,3,20,매일유업 매일 상하치즈 피자용 모짜렐라 슈레드치즈 200gx5봉
3,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M12-S0640-1010,6250,2021-12-04 03:26:00,0,L4,신선식품,M12,...,26,2,Y,2030s,2021-12-03,2021,12,3,20,계양구 배달 싱싱한 방울토마토 750g 작전 과일나라
4,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M17-S0130-1018,14450,2021-12-04 03:26:00,0,L4,신선식품,M17,...,26,2,Y,2030s,2021-12-03,2021,12,3,20,낙지호롱 900g 20 25개 낙지꼬치구이 냉동낙지 양념소스70gx1팩 제공


## 재주문 간격

In [None]:
# 데이터프레임 이름이 order_user라고 가정하고 수정
order_user_item['OrderDT'] = pd.to_datetime(order_user_item['OrderDT'])  # OrderDT를 datetime 타입으로 변환

# 데이터프레임을 idUser, ItemCode, OrderDT 기준으로 정렬
order_user_sorted = order_user_item.sort_values(by=['idUser', 'OrderDT'])

# idUser와 ItemCode 별로 그룹화한 후, OrderDT 열의 차이(diff)를 계산하여 새로운 열에 저장
order_user_sorted['TimeBetweenOrders'] = order_user_sorted.copy().groupby(['idUser'])['OrderDT'].diff()

# 첫 번째 주문을 제외하고 싶다면, NaN 값이 있는 행을 제거
# repeat_order_periods = order_user_sorted.dropna(subset=['TimeBetweenOrders'])

order_user_sorted.head(100)

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,...,FamilyCount,MemberYN,AgeGroup,Date,Year,Month,Day,Hour,CleanedItemName,TimeBetweenOrders
7,U10001,U10001-O2021-1002,2021-01-07 17:08:51,L1-M15-S0140-1323,22520,2021-01-08 06:24:00,0,L1,가공식품,M15,...,2,Y,2030s,2021-01-07,2021,1,7,17,오뚜기 듬뿍 새우볶음밥450g 2인분 x 5봉지,NaT
8,U10001,U10001-O2021-1002,2021-01-07 17:08:51,L1-M21-S0540-1082,3780,2021-01-08 06:24:00,0,L1,가공식품,M21,...,2,Y,2030s,2021-01-07,2021,1,7,17,동원 양반 차돌된장찌개 460g,0 days 00:00:00
9,U10001,U10001-O2021-1002,2021-01-07 17:08:51,L4-M12-S0350-1036,21630,2021-01-08 06:24:00,0,L4,신선식품,M12,...,2,Y,2030s,2021-01-07,2021,1,7,17,산지직송 새콤달콤 부사 사과 5kg 13과내,0 days 00:00:00
10,U10001,U10001-O2021-1002,2021-01-07 17:08:51,L4-M17-S0530-1026,33310,2021-01-08 06:24:00,0,L4,신선식품,M17,...,2,Y,2030s,2021-01-07,2021,1,7,17,완도 활전복 1kg 중 22 25미,0 days 00:00:00
11,U10001,U10001-O2021-1003,2021-01-14 16:50:14,L1-M11-S0220-1033,3950,2021-01-15 06:28:00,0,L1,가공식품,M11,...,2,Y,2030s,2021-01-14,2021,1,14,16,농심 봉지라면 멀티팩 골라담기 농심 신라면 5개입 1팩,6 days 23:41:23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,U10001,U10001-O2021-1022,2021-08-31 17:06:38,L4-M17-S0130-1040,23380,2021-09-01 06:36:00,0,L4,신선식품,M17,...,2,Y,2030s,2021-08-31,2021,8,31,17,프라임 냉동낙지 3kg 2.5kg 활낙지 냉동 제품,0 days 00:00:00
110,U10001,U10001-O2021-1022,2021-08-31 17:06:38,L4-M17-S0810-1001,10140,2021-09-01 06:36:00,0,L4,신선식품,M17,...,2,Y,2030s,2021-08-31,2021,8,31,17,법성포참맛 더 커진 프리미엄 영광굴비 100g,0 days 00:00:00
111,U10001,U10001-O2021-1022,2021-08-31 17:06:38,L4-M22-S0360-1007,12630,2021-09-01 06:36:00,0,L4,신선식품,M22,...,2,Y,2030s,2021-08-31,2021,8,31,17,자연맛남 자연농원 유기농 적상추 1kg,0 days 00:00:00
112,U10001,U10001-O2021-1023,2021-09-07 18:25:16,L1-M15-S0140-1413,4820,2021-09-08 05:13:00,0,L1,가공식품,M15,...,2,Y,2030s,2021-09-07,2021,9,7,18,해태제과 얇은피 고향만두 800g+150g 950g,7 days 01:18:38


In [None]:
# 결과 확인
df_for_merge = order_user_sorted[['idOrder','OrderDT', 'TimeBetweenOrders']]
df_for_merge = df_for_merge.drop_duplicates(subset='idOrder', keep='first')

In [None]:
# 병합
order_user_item = pd.merge(order_user_item, df_for_merge, on=['idOrder','OrderDT'], how='left')

In [None]:
order_user_item[:100]

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,...,FamilyCount,MemberYN,AgeGroup,Date,Year,Month,Day,Hour,CleanedItemName,TimeBetweenOrders
0,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M12-S0070-1113,17030,2021-12-04 03:26:00,0,L1,가공식품,M12,...,2,Y,2030s,2021-12-03,2021,12,3,20,이츠웰 후르츠칵테일 과일통조림 3k 슬라이스,3 days 09:06:40
1,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M15-S0140-1247,7680,2021-12-04 03:26:00,0,L1,가공식품,M15,...,2,Y,2030s,2021-12-03,2021,12,3,20,베지가든 수제교자만두 매콤한 김치맛 720g 360gx2개입,3 days 09:06:40
2,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M23-S0580-1047,12420,2021-12-04 03:26:00,0,L1,가공식품,M23,...,2,Y,2030s,2021-12-03,2021,12,3,20,매일유업 매일 상하치즈 피자용 모짜렐라 슈레드치즈 200gx5봉,3 days 09:06:40
3,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M12-S0640-1010,6250,2021-12-04 03:26:00,0,L4,신선식품,M12,...,2,Y,2030s,2021-12-03,2021,12,3,20,계양구 배달 싱싱한 방울토마토 750g 작전 과일나라,3 days 09:06:40
4,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M17-S0130-1018,14450,2021-12-04 03:26:00,0,L4,신선식품,M17,...,2,Y,2030s,2021-12-03,2021,12,3,20,낙지호롱 900g 20 25개 낙지꼬치구이 냉동낙지 양념소스70gx1팩 제공,3 days 09:06:40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L1-M19-S0190-1003,3380,2021-06-07 06:38:00,0,L1,가공식품,M19,...,2,Y,2030s,2021-06-06,2021,6,6,14,당일발송 추천오뚜기 1000아일랜드드레싱250g,6 days 21:17:50
96,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L4-M18-S0110-1084,18330,2021-06-07 06:38:00,0,L4,신선식품,M18,...,2,Y,2030s,2021-06-06,2021,6,6,14,하선정 하선정 시원한포기김치 5kg,6 days 21:17:50
97,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L4-M22-S0170-1031,17840,2021-06-07 06:38:00,0,L4,신선식품,M22,...,2,Y,2030s,2021-06-06,2021,6,6,14,말린 도라지 길경 1kg 도라지차 요리 육수 중국산,6 days 21:17:50
98,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L4-M22-S0700-1064,4240,2021-06-07 06:38:00,0,L4,신선식품,M22,...,2,Y,2030s,2021-06-06,2021,6,6,14,애호박 2개 600g내외 팔팔마켓 생 풋호박 인큐 호박,6 days 21:17:50


## Combined 추가

In [None]:
order_user_item['Combined'] = order_user_item.apply(lambda row: f"{row['AgeGroup']}, {row['Gender']}", axis=1)
order_user_item[:100]

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,...,MemberYN,AgeGroup,Date,Year,Month,Day,Hour,CleanedItemName,TimeBetweenOrders,Combined
0,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M12-S0070-1113,17030,2021-12-04 03:26:00,0,L1,가공식품,M12,...,Y,2030s,2021-12-03,2021,12,3,20,이츠웰 후르츠칵테일 과일통조림 3k 슬라이스,3 days 09:06:40,"2030s, 여성"
1,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M15-S0140-1247,7680,2021-12-04 03:26:00,0,L1,가공식품,M15,...,Y,2030s,2021-12-03,2021,12,3,20,베지가든 수제교자만두 매콤한 김치맛 720g 360gx2개입,3 days 09:06:40,"2030s, 여성"
2,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M23-S0580-1047,12420,2021-12-04 03:26:00,0,L1,가공식품,M23,...,Y,2030s,2021-12-03,2021,12,3,20,매일유업 매일 상하치즈 피자용 모짜렐라 슈레드치즈 200gx5봉,3 days 09:06:40,"2030s, 여성"
3,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M12-S0640-1010,6250,2021-12-04 03:26:00,0,L4,신선식품,M12,...,Y,2030s,2021-12-03,2021,12,3,20,계양구 배달 싱싱한 방울토마토 750g 작전 과일나라,3 days 09:06:40,"2030s, 여성"
4,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M17-S0130-1018,14450,2021-12-04 03:26:00,0,L4,신선식품,M17,...,Y,2030s,2021-12-03,2021,12,3,20,낙지호롱 900g 20 25개 낙지꼬치구이 냉동낙지 양념소스70gx1팩 제공,3 days 09:06:40,"2030s, 여성"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L1-M19-S0190-1003,3380,2021-06-07 06:38:00,0,L1,가공식품,M19,...,Y,2030s,2021-06-06,2021,6,6,14,당일발송 추천오뚜기 1000아일랜드드레싱250g,6 days 21:17:50,"2030s, 여성"
96,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L4-M18-S0110-1084,18330,2021-06-07 06:38:00,0,L4,신선식품,M18,...,Y,2030s,2021-06-06,2021,6,6,14,하선정 하선정 시원한포기김치 5kg,6 days 21:17:50,"2030s, 여성"
97,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L4-M22-S0170-1031,17840,2021-06-07 06:38:00,0,L4,신선식품,M22,...,Y,2030s,2021-06-06,2021,6,6,14,말린 도라지 길경 1kg 도라지차 요리 육수 중국산,6 days 21:17:50,"2030s, 여성"
98,U10001,U10001-O2021-1020,2021-06-06 14:52:31,L4-M22-S0700-1064,4240,2021-06-07 06:38:00,0,L4,신선식품,M22,...,Y,2030s,2021-06-06,2021,6,6,14,애호박 2개 600g내외 팔팔마켓 생 풋호박 인큐 호박,6 days 21:17:50,"2030s, 여성"


In [None]:
order_user_item.head()

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,...,MemberYN,AgeGroup,Date,Year,Month,Day,Hour,CleanedItemName,TimeBetweenOrders,Combined
0,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M12-S0070-1113,17030,2021-12-04 03:26:00,0,L1,가공식품,M12,...,Y,2030s,2021-12-03,2021,12,3,20,이츠웰 후르츠칵테일 과일통조림 3k 슬라이스,3 days 09:06:40,"2030s, 여성"
1,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M15-S0140-1247,7680,2021-12-04 03:26:00,0,L1,가공식품,M15,...,Y,2030s,2021-12-03,2021,12,3,20,베지가든 수제교자만두 매콤한 김치맛 720g 360gx2개입,3 days 09:06:40,"2030s, 여성"
2,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M23-S0580-1047,12420,2021-12-04 03:26:00,0,L1,가공식품,M23,...,Y,2030s,2021-12-03,2021,12,3,20,매일유업 매일 상하치즈 피자용 모짜렐라 슈레드치즈 200gx5봉,3 days 09:06:40,"2030s, 여성"
3,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M12-S0640-1010,6250,2021-12-04 03:26:00,0,L4,신선식품,M12,...,Y,2030s,2021-12-03,2021,12,3,20,계양구 배달 싱싱한 방울토마토 750g 작전 과일나라,3 days 09:06:40,"2030s, 여성"
4,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M17-S0130-1018,14450,2021-12-04 03:26:00,0,L4,신선식품,M17,...,Y,2030s,2021-12-03,2021,12,3,20,낙지호롱 900g 20 25개 낙지꼬치구이 냉동낙지 양념소스70gx1팩 제공,3 days 09:06:40,"2030s, 여성"


## 요일 추가

In [None]:
order_user_item['Date'] = pd.to_datetime(order_user_item['Date'], errors='coerce')
# 요일 추출 (월요일=0, 일요일=6)
order_user_item['Weekday'] = order_user_item['Date'].dt.weekday
# 숫자 요일을 문자열 요일로 매핑
weekday_map = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
order_user_item['WeekdayName'] = order_user_item['Weekday'].map(weekday_map)
# order_user = order_user.drop(columns='Weekday')

order_user_item.head()

Unnamed: 0,idUser,idOrder,OrderDT,ItemCode,Price,DeliveryDT,Delay_YN,ItemLargeCode,ItemLargeName,ItemMiddleCode,...,Date,Year,Month,Day,Hour,CleanedItemName,TimeBetweenOrders,Combined,Weekday,WeekdayName
0,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M12-S0070-1113,17030,2021-12-04 03:26:00,0,L1,가공식품,M12,...,2021-12-03,2021,12,3,20,이츠웰 후르츠칵테일 과일통조림 3k 슬라이스,3 days 09:06:40,"2030s, 여성",4,Friday
1,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M15-S0140-1247,7680,2021-12-04 03:26:00,0,L1,가공식품,M15,...,2021-12-03,2021,12,3,20,베지가든 수제교자만두 매콤한 김치맛 720g 360gx2개입,3 days 09:06:40,"2030s, 여성",4,Friday
2,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L1-M23-S0580-1047,12420,2021-12-04 03:26:00,0,L1,가공식품,M23,...,2021-12-03,2021,12,3,20,매일유업 매일 상하치즈 피자용 모짜렐라 슈레드치즈 200gx5봉,3 days 09:06:40,"2030s, 여성",4,Friday
3,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M12-S0640-1010,6250,2021-12-04 03:26:00,0,L4,신선식품,M12,...,2021-12-03,2021,12,3,20,계양구 배달 싱싱한 방울토마토 750g 작전 과일나라,3 days 09:06:40,"2030s, 여성",4,Friday
4,U10001,U10001-O2021-1001,2021-12-03 20:59:27,L4-M17-S0130-1018,14450,2021-12-04 03:26:00,0,L4,신선식품,M17,...,2021-12-03,2021,12,3,20,낙지호롱 900g 20 25개 낙지꼬치구이 냉동낙지 양념소스70gx1팩 제공,3 days 09:06:40,"2030s, 여성",4,Friday


# csv 저장

In [None]:
ordeuch file or directory: 'home/piai/bigdata/onr_user_item.to_csv("kurly.csv", index=False)