In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('Food_Delivery_Times.csv')

# 기본 정보 확인
print("데이터 크기:", df.shape)
print("\n컬럼명:")
print(df.columns.tolist())
print("\n처음 5행:")
print(df.head())
print("\n데이터 타입:")
print(df.dtypes)
print("\n결측치 확인:")
print(df.isnull().sum())

데이터 크기: (1000, 9)

컬럼명:
['Order_ID', 'Distance_km', 'Weather', 'Traffic_Level', 'Time_of_Day', 'Vehicle_Type', 'Preparation_Time_min', 'Courier_Experience_yrs', 'Delivery_Time_min']

처음 5행:
   Order_ID  Distance_km Weather Traffic_Level Time_of_Day Vehicle_Type  \
0       522         7.93   Windy           Low   Afternoon      Scooter   
1       738        16.42   Clear        Medium     Evening         Bike   
2       741         9.52   Foggy           Low       Night      Scooter   
3       661         7.44   Rainy        Medium   Afternoon      Scooter   
4       412        19.03   Clear           Low     Morning         Bike   

   Preparation_Time_min  Courier_Experience_yrs  Delivery_Time_min  
0                    12                     1.0                 43  
1                    20                     2.0                 84  
2                    28                     1.0                 59  
3                     5                     1.0                 37  
4             

In [6]:
# 공백 정리
df["Weather"] = df["Weather"].astype(str).str.strip()
df["Traffic_Level"] = df["Traffic_Level"].astype(str).str.strip()
df["Time_of_Day"] = df["Time_of_Day"].astype(str).str.strip()
df["Vehicle_Type"] = df["Vehicle_Type"].astype(str).str.strip()

In [7]:
# 결측치 처리(문자열)
df.loc[df["Weather"].isin(["nan", "None", "NaN"]), "Weather"] = "Unknown"
df.loc[df["Traffic_Level"].isin(["nan", "None", "NaN"]), "Traffic_Level"] = "Unknown"
df.loc[df["Time_of_Day"].isin(["nan", "None", "NaN"]), "Time_of_Day"] = "Unknown"

In [10]:
# 숫자형 변환
df["Distance_km"] = pd.to_numeric(df["Distance_km"], errors="coerce")
df["Preparation_Time_min"] = pd.to_numeric(df["Preparation_Time_min"], errors="coerce")
df["Courier_Experience_yrs"] = pd.to_numeric(df["Courier_Experience_yrs"], errors="coerce")
df["Delivery_Time_min"] = pd.to_numeric(df["Delivery_Time_min"], errors="coerce")

In [9]:
# 결측치 처리, 경력결측치는 중앙값으로 채움
df["Courier_Experience_yrs"] = df["Courier_Experience_yrs"].fillna(df["Courier_Experience_yrs"].median())

In [13]:
# Tableau용 추가컬럼 만들기
# 지연 여부 (기준 60분)
df["Delayed"] = df["Delivery_Time_min"] > 60
df["Delay_Over"] = df["Delivery_Time_min"] - 60 
df.loc[df["Delay_Over"] < 0, "Delay_Over"] = 0  #지연없이 빨리도착한것도 0으로설정


# 이동시간 = 전체배달시간 - 조리시간
df["Move_Time"] = df["Delivery_Time_min"] - df["Preparation_Time_min"]


# 거리 구간
df["Distance"] = pd.cut(
    df["Distance_km"],
    bins=[0, 3, 7, 12, 1000],
    labels=["0-3km", "3-7km", "7-12km", "12km+"]
)

# 조리시간 구간
df["PrepTime"] = pd.cut(
    df["Preparation_Time_min"],
    bins=[0, 10, 20, 30, 1000],
    labels=["<=10m", "11-20m", "21-30m", "31m+"]
)

# 경력 구간
df["Experience"] = pd.cut(
    df["Courier_Experience_yrs"],
    bins=[0, 1, 3, 5, 1000],
    labels=["<=1y", "1-3y", "3-5y", "5y+"]
)

In [14]:
# 필요한 컬럼만 저장
cols = [
    "Order_ID",
    "Time_of_Day", "Traffic_Level", "Weather", "Vehicle_Type",
    "Distance_km", "Preparation_Time_min", "Courier_Experience_yrs", "Delivery_Time_min",
    "Move_Time",
    "Delayed", "Delay_Over",
    "Distance", "PrepTime", "Experience"
]

df[cols].to_csv("Food_Delivery_Times_tableau.csv", index=False, encoding="utf-8-sig")

print("저장완료")
print(df[cols].head())

저장완료
   Order_ID Time_of_Day Traffic_Level Weather Vehicle_Type  Distance_km  \
0       522   Afternoon           Low   Windy      Scooter         7.93   
1       738     Evening        Medium   Clear         Bike        16.42   
2       741       Night           Low   Foggy      Scooter         9.52   
3       661   Afternoon        Medium   Rainy      Scooter         7.44   
4       412     Morning           Low   Clear         Bike        19.03   

   Preparation_Time_min  Courier_Experience_yrs  Delivery_Time_min  Move_Time  \
0                    12                     1.0                 43         31   
1                    20                     2.0                 84         64   
2                    28                     1.0                 59         31   
3                     5                     1.0                 37         32   
4                    16                     5.0                 68         52   

   Delayed  Delay_Over Distance PrepTime Experience  
0  