In [2]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster
from geopy.distance import geodesic
import random

# ==============================
# 1. CSV 파일 경로 및 컬럼 설정
# ==============================
file_path = r"C:\Users\enjoy\Study\project\teamproject\FAF5.5.1_HiLoForecasts.csv"

use_cols = [
    "dms_orig", "dms_dest", "dms_mode", "sctg2", "trade_type",
    "tons_2020", "tons_2021", "tons_2022", "tons_2023", 
    "value_2020", "value_2021", "value_2022", "value_2023"
]

chunksize = 1_000_000
filtered_chunks = []

In [3]:
# ==============================
# 2. CSV 읽고 필터링 + 파생 컬럼 생성
# ==============================
for chunk in pd.read_csv(file_path, usecols=use_cols, chunksize=chunksize):
    chunk_filtered = chunk[(chunk["dms_mode"] == 1) & (chunk["trade_type"] == 1)].copy()
    
    # 연도별 톤당 운임료
    for year in range(2020, 2024):
        chunk_filtered[f"avg_price_per_ton_{year}"] = chunk_filtered[f"value_{year}"] / chunk_filtered[f"tons_{year}"]
    
    # 총 톤수, 총 운임료, 평균 톤당 운임료
    chunk_filtered["tons_total"] = chunk_filtered[["tons_2020","tons_2021","tons_2022","tons_2023"]].sum(axis=1)
    chunk_filtered["value_total"] = chunk_filtered[["value_2020","value_2021","value_2022","value_2023"]].sum(axis=1)
    chunk_filtered["avg_price_per_ton_mean"] = chunk_filtered[[
        "avg_price_per_ton_2020","avg_price_per_ton_2021","avg_price_per_ton_2022","avg_price_per_ton_2023"
    ]].mean(axis=1)
    
    # 성장률
    chunk_filtered["tons_growth"] = (chunk_filtered["tons_2023"] - chunk_filtered["tons_2020"]) / chunk_filtered["tons_2020"]
    chunk_filtered["value_growth"] = (chunk_filtered["value_2023"] - chunk_filtered["value_2020"]) / chunk_filtered["value_2020"]
    
    # 거리 시뮬레이션
    np.random.seed(42)
    chunk_filtered["distance_km"] = np.random.randint(50, 2000, size=len(chunk_filtered))
    
    # 기사 매칭용 합성 컬럼
    n_drivers = 1000
    chunk_filtered["driver_id"] = np.random.randint(1, n_drivers+1, size=len(chunk_filtered))
    vehicle_types = ["소형", "중형", "대형"]
    chunk_filtered["vehicle_type"] = np.random.choice(vehicle_types, size=len(chunk_filtered))
    vehicle_load = {"소형":5, "중형":15, "대형":25}  # 톤
    chunk_filtered["vehicle_max_load"] = chunk_filtered["vehicle_type"].map(vehicle_load)
    
    # 기사 현재 위치 시뮬레이션
    chunk_filtered["current_location_lat"] = np.random.uniform(30, 50, size=len(chunk_filtered))
    chunk_filtered["current_location_lon"] = np.random.uniform(-120, -70, size=len(chunk_filtered))
    
    # 예상 수익
    chunk_filtered["expected_profit"] = chunk_filtered["tons_total"] * chunk_filtered["avg_price_per_ton_mean"]
    
    filtered_chunks.append(chunk_filtered)

In [4]:
# ==============================
# 3. 전체 데이터 합치기
# ==============================
df = pd.concat(filtered_chunks, ignore_index=True)
print(f"최종 데이터 행 수: {len(df):,}")
print(f"최종 데이터 열 수: {df.shape[1]:,}")

최종 데이터 행 수: 282,850
최종 데이터 열 수: 29


In [6]:
# ==============================
# 4. 기사 매칭 시뮬레이션
# ==============================
sample_size = 1000
df_sample = df.sample(sample_size, random_state=42).copy()

driver_location = (40.7128, -74.0060)  # 기사 위치 (뉴욕)
cost_per_km = 0.5
driver_max_load = 15

# 거리 계산
df_sample["driver_distance_km"] = df_sample.apply(
    lambda row: geodesic(driver_location, (row["current_location_lat"], row["current_location_lon"])).km,
    axis=1
)
df_sample["match_possible"] = df_sample["tons_total"] <= driver_max_load
df_sample["transport_cost"] = df_sample["driver_distance_km"] * cost_per_km
df_sample["actual_profit"] = df_sample["expected_profit"] - df_sample["transport_cost"]

# 필터링 후 상위 매칭
df_matches = df_sample[df_sample["match_possible"]].copy()
df_matches_top10 = df_matches.sort_values("actual_profit", ascending=False).head(10)
print("🚚 기사 매칭 상위 10건")
print(df_matches_top10[["dms_orig","dms_dest","tons_total","expected_profit","driver_distance_km","transport_cost","actual_profit"]])

🚚 기사 매칭 상위 10건
        dms_orig  dms_dest  tons_total  expected_profit  driver_distance_km  \
154452       512       394    1.440843       292.893685          311.481018   
221413       399        92    0.361079       380.067310          716.557749   
176022       409       299    4.464595        64.748607          133.784763   
279207       451       132    4.362023        27.581034          100.987828   
202128       262       223   12.006204        80.140591          209.902543   
3412          69       511    8.262485        49.571880          198.558126   
35808        171       201    9.917953        10.575830          121.692249   
132168       372       419    2.931656       143.384911          393.790774   
122810       372        63    0.044161         2.084254          132.967255   
252048       131       421    2.469233        22.215942          179.784774   

        transport_cost  actual_profit  
154452      155.740509     137.153176  
221413      358.278874      21.7884

In [7]:
# ==============================
# 5. Folium 지도 시각화 (MarkerCluster)
# ==============================
m = folium.Map(location=driver_location, zoom_start=5)
folium.Marker(location=driver_location, popup="기사 위치 (New York)", icon=folium.Icon(color="blue", icon="truck", prefix="fa")).add_to(m)

marker_cluster = MarkerCluster().add_to(m)
for idx, row in df_matches.iterrows():
    folium.Marker(
        location=[row["current_location_lat"], row["current_location_lon"]],
        popup=(
            f"출발지: {row['dms_orig']} → 도착지: {row['dms_dest']}<br>"
            f"톤수: {row['tons_total']} 톤<br>"
            f"예상 운임: {row['expected_profit']:.2f} USD<br>"
            f"거리: {row['driver_distance_km']:.1f} km<br>"
            f"운송 비용: {row['transport_cost']:.2f} USD<br>"
            f"<b>실수익: {row['actual_profit']:.2f} USD</b>"
        ),
        icon=folium.Icon(color="green", icon="usd", prefix="fa")
    ).add_to(marker_cluster)

m