# 항공사 데이터_회귀분석_LightGBM

In [1]:
import pandas as pd
import numpy as np

# =========================
# 항공사 데이터 로드 (날씨 데이터 제거 버전)
# =========================
df = pd.read_csv("new_flight_analysis_summary.csv", encoding="utf-8-sig")

# =========================
# 회귀 타깃 (지연 시간)
# =========================
df = df[df["지연_분"].notna()].copy()

print("✅ 회귀 대상 데이터 수:", len(df))

# =========================
# 변수 정의
# =========================
num_cols = [
    "dep_hour",
    "dep_min",        # ✅ CSV 기준으로 통일
    "dep_weekday",
    "is_weekend"
]

cat_cols = [
    "항공사",
    "출발지",
    "arrival_code",
    "flight_type"
]

X = df[num_cols + cat_cols]
y = df["지연_분"]


✅ 회귀 대상 데이터 수: 2836872


In [2]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from lightgbm import LGBMRegressor

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

prep = ColumnTransformer([
    ("num", "passthrough", num_cols),
    ("cat", OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1), cat_cols)
])

Xtr = prep.fit_transform(X_train)
Xte = prep.transform(X_test)

model = LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    random_state=42
)

model.fit(Xtr, y_train)
pred = model.predict(Xte)

print("MAE:", mean_absolute_error(y_test, pred))
print("RMSE:", mean_squared_error(y_test, pred, squared=False))
print("R2:", r2_score(y_test, pred))


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.020689 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 390
[LightGBM] [Info] Number of data points in the train set: 2269497, number of used features: 8
[LightGBM] [Info] Start training from score 24.460704
MAE: 11.100222251403794
RMSE: 22.196636212215378
R2: 0.272046456061224
