In [None]:
# pip install simpy

# 시뮬레이션
- 개인금융
- 큐(대기시간) DF
- 서비스시간 DF

## Train Data (2024-11월)

In [None]:
import simpy
import pandas as pd
import random
from datetime import datetime, timedelta

# Simulation Parameters
BANK_OPEN_TIME = 0  # Simulation starts at 9:00 AM, 0 minutes
BANK_CLOSE_TIME = 7 * 60  # Simulation ends at 4:00 PM, 7 hours (16:00 - 9:00)
NUM_TELLERS = 5  # Number of tellers
START_DATE = datetime(2024, 11, 1, 9, 0)  # Start date and time
END_DATE = datetime(2024, 11, 30, 16, 0)  # End date and time

# Customer arrival distribution parameters
# 고객 도착 분포 / 분포의 분산 정도
# 평균 시간 -> 3분
WEIBULL_PARAMS = { 
    (0, 2 * 60): (4.0, 5.158),  # 9:00 - 11:00
    (2 * 60, 5 * 60): (5.0, 5.062),  # 11:00 - 14:00
    (5 * 60, 7 * 60): (5.0, 5.062),  # 14:00 - 16:00
}

# Service time distribution parameters (Gamma, converted to minutes)
# 서비스 시간 -> 15분
# 점심시간 (창구가 절반으로) => 서비스 시간을 의도보다 2배 많게 설정
# 알파/베타 : 분포 모양/시간의 분산 정도
GAMMA_PARAMS = {
    (0, 2 * 60): (6.0, 4.5),  # 9:00 - 11:00
    (2 * 60, 5 * 60): (6.0, 4.5),  # 11:00 - 14:00 (점심시간)
    (5 * 60, 7 * 60): (6.0, 4.5),  # 14:00 - 16:00
}

# Data collection
queue_data = []  # Logs entry data
service_data = []  # Logs completed service data
customer_id = 0  # Unique ID for each customer


def customer_arrival_time(env_now):
    """Determine customer arrival time based on the current time."""
    for (start, end), (alpha, beta) in WEIBULL_PARAMS.items():
        if start <= env_now < end:
            # print(max(random.weibullvariate(beta, alpha), 2))
            return max(random.weibullvariate(beta, alpha), 2)  # Ensure minimal arrival time
    return 1  # Default minimal arrival time if no match


def customer_service_time(env_now):
    """Generate service time using Gamma distribution based on the current time."""
    for (start, end), (shape, scale) in GAMMA_PARAMS.items():
        if start <= env_now < end:
            # print(max(random.gammavariate(shape, scale), 7))
            return max(random.gammavariate(shape, scale), 7)  # Convert seconds to minutes
    return 3  # Default minimal service time if no match


def customer(env, customer_id, bank_tellers, current_date):
    """Handle the process of a single customer."""
    global queue_data, service_data

    arrival_time = env.now

    # Record queue data
    queue_length = len(bank_tellers.queue)
    in_service = min(bank_tellers.count, bank_tellers.capacity)

    queue_data.append({
        "ID": customer_id,
        "Arrival Date": current_date + timedelta(minutes=arrival_time),
        "Arrival Time": arrival_time,
        "Queue Length": queue_length,
        "In Service": in_service,
    })

    # Request teller
    with bank_tellers.request() as request:
        yield request
        service_start_time = env.now
        service_time = customer_service_time(env.now)
        waiting_duration = service_start_time - arrival_time
        yield env.timeout(service_time)

        # Record service data
        service_data.append({
            "ID": customer_id,
            "Arrival Date": current_date + timedelta(minutes=arrival_time),
            "Arrival Time": arrival_time,
            "Start Date": current_date + timedelta(minutes=service_start_time),
            "Finish Date": current_date + timedelta(minutes=service_start_time + service_time),
            "Waiting Duration": waiting_duration,
            "Service Duration": service_time,
        })


def customer_arrival(env, bank_tellers, current_date):
    """Generate customer arrivals."""
    global customer_id

    while True:
        next_arrival = customer_arrival_time(env.now)
        new_time = env.now + next_arrival

        # Stop generating customers after closing time
        if new_time >= BANK_CLOSE_TIME:
            break

        yield env.timeout(next_arrival)
        customer_id += 1
        env.process(customer(env, customer_id, bank_tellers, current_date))


def run_day(env, bank_tellers, current_date):
    """Run the simulation for a single day."""
    env.process(customer_arrival(env, bank_tellers, current_date))
    env.run(until=BANK_CLOSE_TIME)


def run_month():
    """Run the simulation for a full month."""
    current_date = START_DATE
    global queue_data, service_data, customer_id

    queue_data.clear()
    service_data.clear()
    customer_id = 0

    while current_date < END_DATE:
        # Set up simulation environment for the day
        env = simpy.Environment()
        bank_tellers = simpy.Resource(env, capacity=NUM_TELLERS)

        # Run simulation for the current day
        run_day(env, bank_tellers, current_date)

        # Advance to the next day
        current_date += timedelta(days=1)

    # Convert queue data to a DataFrame
    queue_df = pd.DataFrame(queue_data)

    # Convert service data to a DataFrame
    service_df = pd.DataFrame(service_data)

    return queue_df, service_df


# Run the simulation
queue_df, service_df = run_month()

# Display results
print("Queue DataFrame:")
print(queue_df)

print("\nService DataFrame:")
print(service_df)

print(f"\nTotal Number of Customers Served: {len(queue_data)}")

In [None]:
queue_df['Queue Length'].max()

In [None]:
queue_df.to_csv('Bank_Queue_train.csv')
service_df.to_csv('Bank_Service_train.csv')

## Test Data (2024-12월)

In [None]:
START_DATE = datetime(2024, 12, 1, 9, 0)  # Start date and time
END_DATE = datetime(2024, 12, 31, 16, 0)  # End date and time

# Run the simulation
queue_df, service_df = run_month()

# Display results
print("Queue DataFrame:")
print(queue_df)

print("\nService DataFrame:")
print(service_df)

print(f"\nTotal Number of Customers Served: {len(queue_data)}")

In [None]:
queue_df['Queue Length'].max()

In [None]:
queue_df.to_csv('Bank_Queue_test.csv')
service_df.to_csv('Bank_Service_test.csv')

In [None]:
# 오후 세시에 예약을 할 때
# 인풋 : 전날 데이터 + 당일 오후 15시 전 데이터 : 7*60 + 3*60 분
# 아웃풋: 오후 세시의 대기 인수와 대기시간 예측 

# 데이터프레임 2개 
# 생성 대기열 도착시간 
# 나가는 대기열
# 1분:3명
# 2분:4명


# 데이터프레임 전처리

In [None]:
import pandas as pd

queue_train_df = pd.read_csv('Bank_Queue_train.csv')
service_train_df = pd.read_csv('Bank_Service_train.csv')

queue_test_df = pd.read_csv('Bank_Queue_test.csv')
service_test_df = pd.read_csv('Bank_Service_test.csv')

In [None]:
queue_train_df['Arrival Date_dt'] = pd.to_datetime(queue_train_df['Arrival Date'])
service_train_df['Arrival Date_dt'] = pd.to_datetime(service_train_df['Arrival Date'])
service_train_df['Finish Date_dt'] = pd.to_datetime(service_train_df['Finish Date'])

queue_test_df['Arrival Date_dt'] = pd.to_datetime(queue_test_df['Arrival Date'])
service_test_df['Arrival Date_dt'] = pd.to_datetime(service_test_df['Arrival Date'])
service_test_df['Finish Date_dt'] = pd.to_datetime(service_test_df['Finish Date'])

# 예측모델
- 방문하려고 하는 시간의
- 예상 대기인수 & 예상 대기시간 예측
<br></br>
- 대기가 적은 시간대에 에약이 몰리면 그 때의 혼잡도는 어떻게 예측?

## 데이터 필터링하여 예측
1. 모델 재사용
    - 모든 데이터로 한 번만 모델을 학습하고, 예측 단계에서는 동일한 모델을 재사용합니다.
2. 필터링 효율화
    - 데이터를 한 번 필터링하고, 반복적으로 활용해 병합/필터링 시간을 줄입니다.
3. 병렬 처리 도입
    - joblib 같은 라이브러리를 사용해 반복 작업을 병렬 처리하면 실행 시간을 단축할 수 있습니다.

### 입력한 데이터(예측하려는 일자)와 가장 가까운 날짜(single day)를 기준으로 예측

In [None]:
from joblib import Parallel, delayed
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, root_mean_squared_error
from datetime import datetime, timedelta

pd.options.display.float_format = '{:.2f}'.format

# 1. 데이터 준비
def prepare_data(queue_df, service_df):
    """
    데이터를 병합하여 재사용 가능한 병합 데이터 준비
    """
    merged_data = pd.merge(queue_df, service_df, on="ID", suffixes=('_queue', '_service'))
    merged_data['Arrival Time_queue'] = (
        (merged_data['Arrival Date_dt_queue'].dt.hour - 9) * 60 + merged_data['Arrival Date_dt_queue'].dt.minute
    )
    return merged_data

# 2. 모델 학습 및 평가
def train_and_evaluate(train_data, test_data):
    """
    train 데이터로 모델 학습 및 test 데이터로 성능 평가
    """
    X_train = train_data[['Arrival Time_queue', 'Queue Length', 'In Service']]
    y_train_customers = train_data['Queue Length']
    y_train_time = train_data['Waiting Duration']

    X_test = test_data[['Arrival Time_queue', 'Queue Length', 'In Service']]
    y_test_customers = test_data['Queue Length']
    y_test_time = test_data['Waiting Duration']

    # 대기 고객 수 모델 학습
    waiting_customers_model = RandomForestRegressor(n_estimators=100, random_state=42)
    waiting_customers_model.fit(X_train, y_train_customers)
    y_pred_customers = waiting_customers_model.predict(X_test)

    # 대기 시간 모델 학습
    waiting_time_model = RandomForestRegressor(n_estimators=100, random_state=42)
    waiting_time_model.fit(X_train, y_train_time)
    y_pred_time = waiting_time_model.predict(X_test)

    # 성능 평가 (MSE)
    mse_customers = root_mean_squared_error(y_test_customers, y_pred_customers)
    mse_time = root_mean_squared_error(y_test_time, y_pred_time)

    print(f"RMSE (대기 고객 수): {mse_customers}")
    print(f"RMSE (대기 시간): {mse_time}")

    return waiting_customers_model, waiting_time_model

# 3. 예측
def predict_single_day(merged_data, target_date, waiting_customers_model, waiting_time_model):
    """
    특정 날짜에 대한 예측 수행
    """
    merged_data['Time Difference'] = (merged_data['Arrival Date_dt_queue'] - target_date).abs()
    closest_row = merged_data.loc[merged_data['Time Difference'].idxmin()]
    queue_length = closest_row['Queue Length']
    in_service = closest_row['In Service']
    arrival_time = (target_date.hour - 9) * 60 + target_date.minute

    input_data = pd.DataFrame([[arrival_time, queue_length, in_service]],
                              columns=['Arrival Time_queue', 'Queue Length', 'In Service'])

    predicted_customers = waiting_customers_model.predict(input_data)[0]
    predicted_time = waiting_time_model.predict(input_data)[0]

    return {
        "Date": target_date,
        "Predicted Waiting Customers": round(predicted_customers, 0),
        "Predicted Waiting Time (minutes)": round(predicted_time, 2)
    }

# 4. 롤링 예측
def rolling_forecast(queue_train_df, queue_test_df, service_train_df, service_test_df, target_date):
    """
    학습 및 테스트 데이터로 학습하고 예측 수행
    """
    # 데이터 준비
    train_data = prepare_data(queue_train_df, service_train_df)
    test_data = prepare_data(queue_test_df, service_test_df)

    # 모델 학습 및 평가
    waiting_customers_model, waiting_time_model = train_and_evaluate(train_data, test_data)

    # 예측 범위 설정
    target_date = datetime.strptime(target_date, "%Y-%m-%d %H:%M")

    # 예측 수행
    predictions = predict_single_day(test_data, target_date, waiting_customers_model, waiting_time_model)
    return predictions

# 5. 실행
input_date = input("예측하려는 날짜를 입력하세요 (형식: YYYY-MM-DD HH:MM): ")
predicted_result = rolling_forecast(queue_train_df, queue_test_df, service_train_df, service_test_df, input_date)

# 결과 출력
# print(f"예측 날짜: {predicted_result['Date']}")
print(f"예상 대기 고객 수: {predicted_result['Predicted Waiting Customers']}")
print(f"예상 대기 시간: {predicted_result['Predicted Waiting Time (minutes)']} minutes")


### Train / Test 를 활용하여 12월 데이터 예측 및 성능평가

In [None]:


# 2. 학습 성능 평가 (11월 데이터를 기반으로 롤링 학습 평가)
def evaluate_train_performance(train_data, start_date, end_date):
    """
    11월 데이터를 이용하여 학습 성능 평가
    """
    rolling_start_date = start_date
    rolling_end_date = start_date + timedelta(days=7)

    predictions_customers = []
    predictions_time = []
    actual_customers = []
    actual_time = []

    while rolling_end_date <= end_date:
        # 학습 데이터
        rolling_train_data = train_data[
            (train_data['Arrival Date_dt_queue'].dt.date >= rolling_start_date.date()) &
            (train_data['Arrival Date_dt_queue'].dt.date < rolling_end_date.date())
        ]
        
        # 평가 데이터
        rolling_test_data = train_data[
            train_data['Arrival Date_dt_queue'].dt.date == rolling_end_date.date()
        ]

        if len(rolling_train_data) == 0 or len(rolling_test_data) == 0:
            rolling_start_date += timedelta(days=1)
            rolling_end_date += timedelta(days=1)
            continue

        # X, y 분리
        X_train = rolling_train_data[['Arrival Time_queue', 'Queue Length', 'In Service']]
        y_train_customers = rolling_train_data['Queue Length']
        y_train_time = rolling_train_data['Waiting Duration']

        X_test = rolling_test_data[['Arrival Time_queue', 'Queue Length', 'In Service']]
        y_test_customers = rolling_test_data['Queue Length']
        y_test_time = rolling_test_data['Waiting Duration']

        # 모델 학습
        waiting_customers_model = RandomForestRegressor(n_estimators=100, random_state=42)
        waiting_customers_model.fit(X_train, y_train_customers)
        y_pred_customers = waiting_customers_model.predict(X_test)

        waiting_time_model = RandomForestRegressor(n_estimators=100, random_state=42)
        waiting_time_model.fit(X_train, y_train_time)
        y_pred_time = waiting_time_model.predict(X_test)

        # 실제 값과 예측 값 저장
        predictions_customers.extend(y_pred_customers)
        predictions_time.extend(y_pred_time)
        actual_customers.extend(y_test_customers.values)
        actual_time.extend(y_test_time.values)

        # 다음 범위로 이동
        rolling_start_date += timedelta(days=1)
        rolling_end_date += timedelta(days=1)

    # RMSE 계산
    rmse_customers = mean_squared_error(actual_customers, predictions_customers, squared=False)
    rmse_time = mean_squared_error(actual_time, predictions_time, squared=False)

    return rmse_customers, rmse_time

# 3. 일반화 성능 평가 (12월 데이터를 11월 모델로 예측하여 평가)
def evaluate_test_performance(train_data, test_data):
    """
    11월 데이터를 학습하여 12월 데이터를 예측하고 일반화 성능 평가
    """
    # 모델 학습
    X_train = train_data[['Arrival Time_queue', 'Queue Length', 'In Service']]
    y_train_customers = train_data['Queue Length']
    y_train_time = train_data['Waiting Duration']

    waiting_customers_model = RandomForestRegressor(n_estimators=100, random_state=42)
    waiting_customers_model.fit(X_train, y_train_customers)

    waiting_time_model = RandomForestRegressor(n_estimators=100, random_state=42)
    waiting_time_model.fit(X_train, y_train_time)

    # 테스트 데이터 예측
    X_test = test_data[['Arrival Time_queue', 'Queue Length', 'In Service']]
    y_test_customers = test_data['Queue Length']
    y_test_time = test_data['Waiting Duration']

    y_pred_customers = waiting_customers_model.predict(X_test)
    y_pred_time = waiting_time_model.predict(X_test)

    # RMSE 계산
    rmse_customers = mean_squared_error(y_test_customers, y_pred_customers, squared=False)
    rmse_time = mean_squared_error(y_test_time, y_pred_time, squared=False)

    return rmse_customers, rmse_time

# 4. 실행
# 학습 데이터: 11월 데이터
train_data = prepare_data(queue_train_df, service_train_df)

# 테스트 데이터: 12월 데이터
test_data = prepare_data(queue_test_df, service_test_df)

# 학습 데이터 성능 평가
train_start_date = datetime(2024, 11, 1)
train_end_date = datetime(2024, 11, 30)
rmse_train_customers, rmse_train_time = evaluate_train_performance(train_data, train_start_date, train_end_date)

# 테스트 데이터 성능 평가
rmse_test_customers, rmse_test_time = evaluate_test_performance(train_data, test_data)

# 결과 출력
print("\n학습 데이터 성능 (Training Performance)")
print(f"RMSE (대기 고객 수): {rmse_train_customers:}")
print(f"RMSE (대기 시간): {rmse_train_time:}")

print("\n테스트 데이터 성능 (Generalization Performance)")
print(f"RMSE (대기 고객 수): {rmse_test_customers:}")
print(f"RMSE (대기 시간): {rmse_test_time:}")
