In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, average_precision_score

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Embedding, Flatten, Concatenate, Dropout, BatchNormalization
)
from tensorflow.keras.optimizers import Adam


In [2]:
df = pd.read_csv("flight_weather_merged.csv", low_memory=False)

print(len(df))


FileNotFoundError: [Errno 2] No such file or directory: 'flight_weather_merged.csv'

In [None]:
def make_delay_label(x):
    if pd.isna(x):
        return 0
    if any(k in x for k in ["지연", "결항", "회항"]):
        return 1
    return 0

df["is_delay"] = df["RMK_KOR"].apply(make_delay_label)


In [None]:
df["flight_datetime"] = pd.to_datetime(df["flight_datetime"])
df["weekday"] = df["flight_datetime"].dt.weekday
df["month"] = df["flight_datetime"].dt.month
df["hour"] = df["flight_datetime"].dt.hour
df["flight_date"] = df["flight_datetime"].dt.date


In [None]:
cat_cols = [
    "AIRLINE_KOREAN",
    "BOARDING_KOR",
    "ARRIVED_KOR",
    "LINE",
    "IO",
    "일기현상",
    "1층 운형"
]

num_cols = [
    "기온(°C)",
    "이슬점온도(°C)",
    "습도(%)",
    "풍속_ms",
    "강수량(mm)",
    "전운량_okta",
    "적설(cm)",
    "weekday",
    "month",
    "hour"
]

cat_cols = [c for c in cat_cols if c in df.columns]
num_cols = [c for c in num_cols if c in df.columns]


In [None]:
for c in num_cols:
    df[c] = df[c].fillna(df[c].median())

for c in cat_cols:
    df[c] = df[c].fillna("UNKNOWN")


In [None]:
label_encoders = {}
cat_cardinality = {}

for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le
    cat_cardinality[col] = df[col].nunique()


In [None]:
df = df.sort_values("flight_date")
split_date = df["flight_date"].quantile(0.8)

train_df = df[df["flight_date"] <= split_date]
test_df  = df[df["flight_date"] > split_date]

X_train_cat = train_df[cat_cols].values
X_train_num = train_df[num_cols].values
y_train = train_df["is_delay"].values

X_test_cat = test_df[cat_cols].values
X_test_num = test_df[num_cols].values
y_test = test_df["is_delay"].values


In [None]:
# === 범주형 입력 & Embedding ===
cat_inputs = []
cat_embeddings = []

for i, col in enumerate(cat_cols):
    input_i = Input(shape=(1,), name=f"{col}_input")
    embed_dim = min(50, (cat_cardinality[col] + 1) // 2)

    embed_i = Embedding(
        input_dim=cat_cardinality[col] + 1,
        output_dim=embed_dim,
        name=f"{col}_embed"
    )(input_i)

    embed_i = Flatten()(embed_i)

    cat_inputs.append(input_i)
    cat_embeddings.append(embed_i)

# === 수치형 입력 ===
num_input = Input(shape=(len(num_cols),), name="num_input")

# === 결합 ===
x = Concatenate()(cat_embeddings + [num_input])

# === DNN ===
x = Dense(256, activation="relu")(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

x = Dense(128, activation="relu")(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

x = Dense(64, activation="relu")(x)

output = Dense(1, activation="sigmoid")(x)

model = Model(inputs=cat_inputs + [num_input], outputs=output)


In [None]:
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="binary_crossentropy",
    metrics=[
        tf.keras.metrics.AUC(name="auc"),
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall")
    ]
)

model.summary()


In [None]:
class_weight = {
    0: 1,
    1: 70
}

history = model.fit(
    [X_train_cat[:, i] for i in range(len(cat_cols))] + [X_train_num],
    y_train,
    validation_split=0.1,
    epochs=20,
    batch_size=4096,
    class_weight=class_weight,
    verbose=2
)


In [None]:
y_prob = model.predict(
    [X_test_cat[:, i] for i in range(len(cat_cols))] + [X_test_num],
    batch_size=4096
).ravel()

y_pred = (y_prob >= 0.5).astype(int)

print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))
print("PR-AUC :", average_precision_score(y_test, y_prob))
