In [None]:
# 클러스터링 분석 코드

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

# 데이터 불러오기
merged_df = pd.read_csv("C:/Users/유하민/git/exchange_rate_and_KOSPI/data/processed/merged_exchange_kospi_data.csv")

# 특징 추출: 변동률 계산
merged_df['Exchange_Rate_Change'] = merged_df['Exchange_Rate'].pct_change().fillna(0)
merged_df['KOSPI_Change'] = merged_df['Closing_Price'].pct_change().fillna(0)

features = merged_df[['Exchange_Rate_Change', 'KOSPI_Change']]

# 데이터 표준화
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# 최적의 클러스터 수 결정 (엘보우 방법)
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(scaled_features)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(8, 4))
plt.plot(range(1, 11), inertia, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k')
plt.show()

# 클러스터링
optimal_k = 3  # 예를 들어, 엘보우 방법으로 k=3 결정
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
merged_df['Cluster'] = kmeans.fit_predict(scaled_features)

# 클러스터링 결과 시각화
plt.figure(figsize=(10, 6))
plt.scatter(merged_df['Exchange_Rate_Change'], merged_df['KOSPI_Change'], c=merged_df['Cluster'], cmap='viridis')
plt.xlabel('Exchange Rate Change')
plt.ylabel('KOSPI Change')
plt.title('KMeans Clustering of Exchange Rate and KOSPI Changes')
plt.show()
