In [None]:
import numpy as np
import pandas as pd
import pywt
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# 예제 데이터 생성 (여러 변수의 시계열 데이터)
np.random.seed(1)
n_samples = 1024
n_variables = 3
time_series = np.cumsum(np.random.randn(n_samples, n_variables), axis=0)  # Brownian motion

# 웨이블릿 변환을 통한 노이즈 제거
wavelet = 'db1'
coeffs_list = [pywt.wavedec(time_series[:, i], wavelet, level=5) for i in range(n_variables)]
threshold = 0.5
coeffs_thresholded_list = [
    [pywt.threshold(c, threshold, mode='soft') if i > 0 else c for i, c in enumerate(coeffs)]
    for coeffs in coeffs_list
]
denoised_time_series = np.array([
    pywt.waverec(coeffs_thresholded, wavelet) for coeffs_thresholded in coeffs_thresholded_list
]).T

# 기능적 시계열 주성분 분석 (FTS-PCA)
window_size = 20
X = np.array([
    denoised_time_series[i:i + window_size].flatten() 
    for i in range(len(denoised_time_series) - window_size)
])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=5)
principal_components = pca.fit_transform(X_scaled)

# 주성분을 사용한 머신러닝 모델 학습 및 예측
y = denoised_time_series[window_size:, 0]  # 예제로 첫 번째 변수의 다음 값을 예측
X_train, X_test, y_train, y_test = train_test_split(principal_components, y, test_size=0.2, random_state=0, shuffle=False)
# X_train, X_test, y_train, y_test = train_test_split(principal_components, y, test_size=0.2, random_state=0)
model = RandomForestRegressor(n_estimators=100, random_state=0)
model.fit(X_train, y_train)

# 예측 및 성능 평가
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# 시각화
plt.figure(figsize=(15, 10))

plt.subplot(3, 1, 1)
plt.plot(denoised_time_series[:, 0], label='Denoised Time Series (First Variable)')
plt.legend()

plt.subplot(3, 1, 2)
plt.plot(y_test, label='True Values')
plt.plot(y_pred, label='Predicted Values', linestyle='--')
plt.legend()

plt.subplot(3, 1, 3)
plt.plot(y_test - y_pred, label='Difference', color='red')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import pywt
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# 예제 데이터 생성 (여러 변수의 시계열 데이터)
np.random.seed(1245)
n_samples = 1024
n_variables = 5
time_series = np.cumsum(np.random.randn(n_samples, n_variables), axis=0)  # Brownian motion

# 웨이블릿 변환을 통한 노이즈 제거
wavelet = 'db1'
coeffs_list = [pywt.wavedec(time_series[:, i], wavelet, level=5) for i in range(n_variables)]
threshold = 0.5
coeffs_thresholded_list = [
    [pywt.threshold(c, threshold, mode='soft') if i > 0 else c for i, c in enumerate(coeffs)]
    for coeffs in coeffs_list
]
denoised_time_series = np.array([
    pywt.waverec(coeffs_thresholded, wavelet) for coeffs_thresholded in coeffs_thresholded_list
]).T

# 기능적 시계열 주성분 분석 (FTS-PCA)
window_size = 20
X = np.array([
    denoised_time_series[i:i + window_size] 
    for i in range(len(denoised_time_series) - window_size)
])
y = denoised_time_series[window_size:, 0]  # 예제로 첫 번째 변수의 다음 값을 예측

# 각 변수에 대해 PCA를 수행
n_components = 5
pca_models = [PCA(n_components=n_components) for _ in range(n_variables)]
X_pca = np.zeros((X.shape[0], n_components * n_variables))

for i in range(n_variables):
    pca_models[i].fit(X[:, :, i])
    X_pca[:, i*n_components:(i+1)*n_components] = pca_models[i].transform(X[:, :, i])

# 데이터 표준화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_pca)

# 주성분을 사용한 머신러닝 모델 학습 및 예측
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=1, shuffle=False)
model = RandomForestRegressor(n_estimators=100, random_state=0)
model.fit(X_train, y_train)

# 예측 및 성능 평가
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# 시각화
plt.figure(figsize=(15, 10))

plt.subplot(3, 1, 1)
plt.plot(denoised_time_series[:, 0], label='Denoised Time Series (First Variable)')
plt.legend()

plt.subplot(3, 1, 2)
plt.plot(y_test, label='True Values')
plt.plot(y_pred, label='Predicted Values', linestyle='--')
plt.legend()

plt.subplot(3, 1, 3)
plt.plot(y_test - y_pred, label='Difference', color='red')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Wavelet Decomposition Example

import pywt
import numpy as np
import matplotlib.pyplot as plt

# 예제 신호 생성
np.random.seed(0)
time_series = np.cumsum(np.random.randn(1024))  # Brownian motion

# 웨이블릿 분해 수행
wavelet = 'db1'  # Daubechies 웨이블릿
coeffs = pywt.wavedec(time_series, wavelet, level=5)

# 근사 계수와 세부 계수 시각화
cA = coeffs[0]  # 근사 계수
cD = coeffs[1:]  # 세부 계수

# 분해된 계수로부터 원본 신호 재구성
reconstructed_time_series = pywt.waverec(coeffs, wavelet)

# 원본 신호와 재구성된 신호 비교
plt.figure(figsize=(12, 12))

plt.subplot(3, 1, 1)
plt.plot(time_series, label='Original Time Series')
plt.legend()

plt.subplot(3, 1, 2)
plt.plot(reconstructed_time_series, label='Reconstructed Time Series', linestyle='--')
plt.legend()

plt.subplot(3, 1, 3)
plt.plot(time_series - reconstructed_time_series, label='Difference', color='red')
plt.legend()

plt.tight_layout()
plt.subplots_adjust(hspace=0.5)  # 서브플롯 간의 세로 간격 조정
plt.show()
