In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. 데이터 불러오기
df = pd.read_csv('./dataset/must_use_final.csv')
df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP'])
df = df.sort_values(by=['VSL_ID', 'TIMESTAMP'])

# 2. 3시간 전 데이터 생성 (10분 × 18 = 3시간)
shift_steps = 18
features = ['LAT', 'LON', 'COG', 'HEADING']
for col in features:
    df[f'{col}_t-18'] = df.groupby('VSL_ID')[col].shift(shift_steps)

# 3. 결측치 제거
df = df.dropna().reset_index(drop=True)

# 4. 군집별 결과 저장용 리스트
cluster_results = []

# 5. 군집(CLUSTER_1)별 모델 학습 및 평가
unique_clusters = sorted(df['CLUSTER_1'].unique())

for cluster in unique_clusters:
    cluster_df = df[df['CLUSTER_1'] == cluster]

    # 항구 클래스가 1개뿐이면 분류 불가 → 스킵
    if cluster_df['PORT_NAME'].nunique() < 2:
        continue

    X = cluster_df[[f'{col}_t-18' for col in features]]
    y = cluster_df['PORT_NAME']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    # Soft Voting 모델 정의
    rf_clf = RandomForestClassifier()
    svc_clf = SVC(probability=True)

    voting_clf = VotingClassifier(
        estimators=[('rf', rf_clf), ('svc', svc_clf)],
        voting='soft'
    )

    # 학습
    voting_clf.fit(X_train, y_train)

    # 예측
    y_pred = voting_clf.predict(X_test)

    # 성능 기록
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)

    cluster_results.append({
        'CLUSTER_1': cluster,
        'accuracy': acc,
        'classes': y.unique().tolist(),
        'f1_macro': report['macro avg']['f1-score'],
        'f1_weighted': report['weighted avg']['f1-score']
    })

# 6. 결과 정리
results_df = pd.DataFrame(cluster_results)
print(results_df)


   CLUSTER_1  accuracy                                            classes  \
0          1  0.802319  [KRKAN, JPMOJ, JPIMI, JPHIJ, JPNGS, JPIMB, KRI...   
1          2  0.925825                       [CNNGB, CNSHA, CNTAC, CNNJI]   
2          3  0.545473  [JPMKX, JPUKB, JPTYO, JPYOK, JPSMZ, JPKIJ, JPO...   
3          4  0.770228                       [CNHUA, HKHKG, TWKHH, TWKEL]   
4          6  0.822139                [CNQDG, CNDAG, CNLYG, CNRZH, CNTXG]   
5          7  0.970441                                     [RUVVO, RUNJK]   

   f1_macro  f1_weighted  
0  0.707210     0.792817  
1  0.689121     0.914170  
2  0.513165     0.541406  
3  0.786664     0.771338  
4  0.779599     0.816008  
5  0.961901     0.969962  
