<a href="https://colab.research.google.com/github/Alpha-mon/AI-RoboAdvisor/blob/main/Stock_Price_Prediction_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [4]:
# 야후 파이낸스 주식 정보 가져오기
# 주식 종목 지정 필수

ticker = 'NVDA'
end_date = datetime.now()
start_date = end_date - timedelta(days=3650)
data = yf.download(ticker, start=start_date, end=end_date)

closing_prices = data['Close']
volume = data['Volume']
moving_average = closing_prices.rolling(window=120).mean()

[*********************100%%**********************]  1 of 1 completed


In [5]:
# 6개 알고리즘 (Bollinger, MACD, MOK, RSI, STCK, WR) 전체 코드


# Bollinger

def calculate_bollinger_bands(closing_prices, window=20, num_std=2):
    rolling_mean = closing_prices.rolling(window=window).mean()
    rolling_std = closing_prices.rolling(window=window).std()

    upper_band = rolling_mean + num_std * rolling_std
    lower_band = rolling_mean - num_std * rolling_std
    return upper_band, lower_band

upper_band, lower_band = calculate_bollinger_bands(closing_prices)
bollinger_bands_data = pd.DataFrame({'Upper Band': upper_band, 'Lower Band': lower_band})

bollinger_bands_data.dropna(inplace=True)
result = np.where(closing_prices > upper_band, (closing_prices - upper_band) / (upper_band - lower_band), np.where(closing_prices < lower_band, (closing_prices - lower_band) / (lower_band - upper_band), np.nan))
bollinger_bands_data['Result'] = pd.Series(result, index=closing_prices.index)

bollinger_bands_data['Result'].fillna(method='ffill', inplace=True)

bollinger_bands_data['Prediction'] = bollinger_bands_data['Result'].shift(-1)
bollinger_bands_data.dropna(inplace=True)
bollinger_bands_data['Trend'] = bollinger_bands_data['Prediction'].diff()
bollinger_bands_data['Trend'] = (bollinger_bands_data['Trend'] + 1) / 2

min_pred = bollinger_bands_data['Prediction'].min()
max_pred = bollinger_bands_data['Prediction'].max()
bollinger_bands_data['Prediction'] = (bollinger_bands_data['Prediction'] - min_pred) / (max_pred - min_pred)

min_trend = bollinger_bands_data['Trend'].min()
max_trend = bollinger_bands_data['Trend'].max()
bollinger_bands_data['Trend'] = (bollinger_bands_data['Trend'] - min_trend) / (max_trend - min_trend)*100

pd.set_option('display.float_format', '{:.4f}'.format)


# MACD

def calculate_macd(closing_prices):

    short_ema = closing_prices.ewm(span=26, adjust=False).mean()
    long_ema = closing_prices.ewm(span=12, adjust=False).mean()

    dif = short_ema - long_ema
    signal_line = dif.ewm(span=9, adjust=False).mean()
    histogram = dif - signal_line

    return dif, signal_line, histogram

dif, signal_line, histogram = calculate_macd(closing_prices)
macd_data = pd.DataFrame({'DIF': dif, 'Signal Line': signal_line, 'Histogram': histogram})
macd_data['Result'] = np.where(macd_data['DIF'] > macd_data['Signal Line'], 1, 0)

macd_data['Result'] = (macd_data['Result'].rolling(window=20, min_periods=1).mean())*100
macd_data['Prediction'] = macd_data['Result'].shift(-1)
macd_data.dropna(inplace=True)

# MOK

def calculate_mok(closing_prices, period=14, ma_period=20):
    returns = closing_prices.pct_change()

    moving_average = closing_prices.rolling(window=ma_period).mean()
    momentum = closing_prices.diff(period)
    normalized_momentum = 100 * (momentum - np.min(momentum)) / (np.max(momentum) - np.min(momentum))

    return normalized_momentum

mok_values = calculate_mok(closing_prices)

# RSI

def calculate_rsi(closing_prices, window=20):
    price_changes = closing_prices.diff()
    up_changes = price_changes.where(price_changes > 0, 0)
    down_changes = -price_changes.where(price_changes < 0, 0)

    avg_up_changes = up_changes.rolling(window=window, min_periods=1).mean()
    avg_down_changes = down_changes.rolling(window=window, min_periods=1).mean()

    rs = avg_up_changes / avg_down_changes
    rsi = 100 - (100 / (1 + rs))

    return rsi

rsi_result = calculate_rsi(closing_prices, window=len(closing_prices)//10)  # 기간을 데이터의 1/10로 동적으로 설정
rsi_result.dropna(inplace=True)

# STCK

def calculate_stck(closing_prices, window=20):
    lowest_low = closing_prices.rolling(window=window).min()
    highest_high = closing_prices.rolling(window=window).max()

    stck = 100 * (closing_prices - lowest_low) / (highest_high - lowest_low)

    stck_ma = stck.rolling(window=20).mean()

    return stck_ma

stck_result = calculate_stck(closing_prices)
stck_result.dropna(inplace=True)

# WR

def calculate_williams_r(closing_prices, period=14):
    high_prices = data['High']
    low_prices = data['Low']
    moving_average = closing_prices.rolling(window=20).mean()

    highest_high = high_prices.rolling(window=period).max()
    lowest_low = low_prices.rolling(window=period).min()

    williams_r = (highest_high - moving_average) / (highest_high - lowest_low) * -100

    normalized_williams_r = 100 * (williams_r + 100) / 100

    return normalized_williams_r

wr_result = calculate_williams_r(closing_prices)

In [6]:
# 6개 지표들을 하나의 데이터 프레임에 합치기

final_df = pd.DataFrame({
    'Bollinger': bollinger_bands_data['Trend'],
    'MACD': macd_data['Prediction'],
    'MOK': mok_values,
    'RSI': rsi_result,
    'STCK': stck_result,
    'WR': wr_result
})

# NaN 값 제거
final_df.dropna(inplace=True)

# 최종 데이터 프레임 출력
print(final_df)

            Bollinger    MACD     MOK     RSI    STCK      WR
Date                                                         
2014-02-13    43.8505 55.0000 40.3087 55.0434 50.1006 27.0795
2014-02-14    28.7469 55.0000 40.3868 56.6366 51.5429 25.5513
2014-02-18    42.4711 55.0000 40.3676 56.6001 53.3726 28.8534
2014-02-19    51.9447 55.0000 40.4145 57.2606 54.9226 29.2618
2014-02-20    47.6999 55.0000 40.4602 58.9275 56.5726 28.8429
...               ...     ...     ...     ...     ...     ...
2023-10-10    47.6999 60.0000 57.2054 60.5174 35.5890 51.8735
2023-10-11    47.6999 55.0000 67.9281 60.7924 38.7841 45.8897
2023-10-12    47.6999 50.0000 65.7451 60.8578 41.9000 41.5991
2023-10-13    47.6999 45.0000 55.6667 60.1984 45.1522 38.4900
2023-10-16    47.6999 40.0000 60.2106 60.6209 49.3747 40.2701

[2435 rows x 6 columns]


In [7]:
# LSTM 모델


final_df_values = final_df.values
data_values = data.values

# 입력 시퀀스에 대한 타임 스텝(T)을 정의합니다.

T = 10  # 원하는대로 조정할 수 있습니다.

# 입력 및 타겟을 위한 데이터 시퀀스 생성
final_df_sequences = []
data_sequences = []

for i in range(len(data_values) - T):
    final_df_sequences.append(final_df_values[i:i+T])
    data_sequences.append(closing_prices.iloc[i+T])

filtered_final_df_sequences = []
filtered_data_sequences = []

# 시퀀스 길이 맞추기
for i, seq in enumerate(final_df_sequences):
    if len(seq) == 10:
        filtered_final_df_sequences.append(seq)
        filtered_data_sequences.append(data_sequences[i])

final_df_sequences = filtered_final_df_sequences
data_sequences = filtered_data_sequences

# 시퀀스를 넘파이 배열로 변환
X = np.array(final_df_sequences)
y = np.array(data_sequences)


# 데이터를 훈련 및 테스트 세트로 분할
split_ratio = 0.8  # 분할 비율을 조정할 수 있습니다.
split_index = int(split_ratio * len(X))

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# LSTM 모델 구축
model = Sequential()

# 첫 번째 LSTM 레이어 (시퀀스 출력을 반환하여 다음 레이어로 전달)
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(T, 6)))

# 두 번째 LSTM 레이어
model.add(LSTM(50, activation='relu', return_sequences=True))

# 세 번째 LSTM 레이어 (시퀀스 출력을 반환하지 않음)
model.add(LSTM(50, activation='relu'))

# 출력 레이어
model.add(Dense(1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')


# 모델 훈련
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# 테스트 데이터에서 모델 평가
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

# 예측 수행
predictions = model.predict(X_test)
print(predictions)

# 예측 결과를 데이터 프레임으로 변환
predictions_df = pd.DataFrame(predictions, columns=["Predictions"])
print(predictions_df)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 45369.60546875
[[ 1.14638653e+01]
 [ 1.38425789e+01]
 [ 1.44404945e+01]
 [ 1.59301834e+01]
 [ 1.74010105e+01]
 [ 1.83350792e+01]
 [ 1.40595207e+01]
 [ 2.16099205e+01]
 [ 1.96277809e+01]
 [ 2.45463257e+01]
 [ 2.46257515e+01]
 [ 2.03941689e+01]
 [ 2.03253937e+01]
 [ 1.69576721e+01]
 [ 1.78264503e+01]
 [ 2.40258636e+01]
 [ 2.41442909e+01]
 [ 2.61205139e+01]
 [ 2.84595108e+01]
 [ 2.79103699e+01]
 [ 2.70832787e+01]
 [ 2.65989380e+01]
 [ 2.73157196e+01]
 [ 2.78280487e+01]
 [ 2.56143494e+01]
 [ 2.37369499e+01]
 [ 2.17223282e+01]
 [ 2.03315964e+01]
 [ 1.88406868e+01]
 [ 1.80932961e+01]
 [ 1.71140289e+01]
 [ 1.47027626e+01]
 [ 1.33933182e+01]
 [ 1.06101952e+01]
 [ 7.46698809e+00]
 [ 5.70870638e+00]
 [ 4.41621065e+00]
 [ 2.45288491e+00]
 [ 9.88823175e-01]
 [ 2.55933785e+00]
 [ 1.14828873e+01]
 [ 1.34011307e+01]
 [ 6.27295036e+01]
 [ 2.50208664e+01]
 [ 2.45111923e+01]
 [ 2.491

In [8]:
# LSTM 모델의 예측 값을 6개 지표 값과 함께 생성 모델 입력값으로 넣기 위해

# T 타임 스텝 만큼의 길이를 고려하여 LSTM 예측값 넣기
final_df["LSTM_Predictions"] = np.nan
final_df["LSTM_Predictions"].iloc[-len(predictions_df):] = predictions_df["Predictions"].values

# NaN 값 제거
final_df.dropna(inplace=True)

# 다시 시퀀스로 변환
final_df_values = final_df.values

final_df_sequences = []
for i in range(len(data_values) - T):
    final_df_sequences.append(final_df_values[i:i+T])

X = np.array(final_df_sequences)

  X = np.array(final_df_sequences)


In [9]:
# 생성 및 구분 모델

from tensorflow.keras.layers import Reshape, Flatten, LeakyReLU, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# 생성 모델 G (주식 가격의 시퀀스를 생성)
def build_generator(input_shape=(T, 7)):  # 입력 값 : LSTM 모델 예측 + 6개 지표 값
    model = Sequential()

    # LSTM 레이어와 BatchNormalization, LeakyReLU 활성화 함수를 이용하여 시퀀스를 학습
    model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))

    # Flatten layer를 제거하고, T 길이의 시퀀스를 생성하도록 수정
    model.add(LSTM(128))
    model.add(Dense(T, activation='linear'))

    noise = tf.keras.layers.Input(shape=input_shape)
    generated_sequence = model(noise)

    return Model(noise, generated_sequence)

# 구분 모델 D (주가의 실제 시퀀스와 생성된 시퀀스를 구분)
def build_discriminator(input_shape=(T, 1)):
    model = Sequential()

    # LSTM 레이어와 LeakyReLU 활성화 함수를 사용하여 시퀀스를 처리
    model.add(LSTM(128, input_shape=input_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))

    sequence = tf.keras.layers.Input(shape=input_shape)
    validity = model(sequence)

    # 주어진 시퀀스가 실제인지 생성된 것인지에 대한 확률
    return Model(sequence, validity)

In [10]:
# 학습 함수 (생성된 주식 가격 시퀀스와 실제 주식 가격 시퀀스를 사용하여 판별자를 학습)

def train_gan(generator, discriminator, combined, epochs, batch_size=32):
    valid = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))

    for epoch in range(epochs):
        idx = np.random.randint(0, X_train.shape[0], batch_size)
        sequences = X_train[idx]

        # 주식 예측값 생성
        predicted_stock = model.predict(sequences)

        # LSTM 예측값을 sequences에 추가
        predicted_stock_reshaped = predicted_stock.reshape(batch_size, 1, 1)  # (batch_size, 1, 1) 형태로 변환
        predicted_stock_expanded = np.repeat(predicted_stock_reshaped, T, axis=1)  # predicted_stock_reshaped를 T 타임 스텝만큼 확장
        X_train_combined = np.concatenate([sequences, predicted_stock_expanded], axis=2)

        generated_stock = generator.predict(X_train_combined)
        generated_stock_reshaped = generated_stock.reshape(batch_size, T, 1)

        # 판별자 학습
        d_loss_real = discriminator.train_on_batch(predicted_stock_expanded, valid)
        d_loss_fake = discriminator.train_on_batch(generated_stock_reshaped, fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # 생성자 학습
        g_loss = combined.train_on_batch(X_train_combined, valid)

        print(f"{epoch}/{epochs} [D loss: {d_loss[0]} | D accuracy: {100 * d_loss[1]}] [G loss: {g_loss}]")

In [11]:
# 판별자 및 생성자 모델 초기화, 생성자와 판별자를 결합하여

discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])

generator = build_generator()
z = tf.keras.layers.Input(shape=(T, 7))
generated_sequence = generator(z)

discriminator.trainable = False
validity = discriminator(generated_sequence)

combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

#학습 시작
train_gan(generator, discriminator, combined, epochs=10, batch_size=32)

# 학습 후 최종 예측을 수행하는 함수
def final_predictions(generator, test_data, batch_size=32):
    predicted_stock_lstm = model.predict(test_data)

    # LSTM 예측값을 test_data에 추가
    predicted_stock_expanded = np.repeat(predicted_stock_lstm[:, np.newaxis], T, axis=1)  # (batch_size, T)로 형태 변경
    test_data_combined = np.concatenate([test_data, predicted_stock_expanded], axis=2)

    # G 모델을 사용하여 합성 주가를 생성
    generated_stock = generator.predict(test_data_combined)

    # LSTM 예측과 G 모델의 예측을 평균
    final_predicted_stock = (predicted_stock_lstm + generated_stock.mean(axis=1)) / 2.0

    return final_predicted_stock

# 학습 후 예측
after_gan_predictions = final_predictions(generator, X_test, batch_size=32)

# 전체 예측 출력
print(after_gan_predictions)

# 최종 예측 출력 (평균)
average_prediction = np.mean(after_gan_predictions)
print(average_prediction)

0/10 [D loss: 0.6467320322990417 | D accuracy: 48.4375] [G loss: 0.6923903822898865]
1/10 [D loss: 0.6353811025619507 | D accuracy: 48.4375] [G loss: 0.6917588710784912]
2/10 [D loss: 0.621012419462204 | D accuracy: 57.8125] [G loss: 0.6911375522613525]
3/10 [D loss: 0.609836995601654 | D accuracy: 65.625] [G loss: 0.6899081468582153]
4/10 [D loss: 0.5848531275987625 | D accuracy: 82.8125] [G loss: 0.6919918656349182]
5/10 [D loss: 0.572172611951828 | D accuracy: 75.0] [G loss: 0.690685510635376]
6/10 [D loss: 0.5602634102106094 | D accuracy: 73.4375] [G loss: 0.6893919706344604]
7/10 [D loss: 0.554436445236206 | D accuracy: 79.6875] [G loss: 0.6892921924591064]
8/10 [D loss: 0.5505445152521133 | D accuracy: 68.75] [G loss: 0.6864761710166931]
9/10 [D loss: 0.556775376200676 | D accuracy: 59.375] [G loss: 0.6840354204177856]
[[5.6647887 5.6450806 5.625985  ... 5.920446  5.9143167 5.90173  ]
 [6.8541455 6.8344374 6.815342  ... 7.1098027 7.1036735 7.091087 ]
 [7.1531034 7.133395  7.1143 

In [12]:
# 전체 코드

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# 야후 파이낸스 주식 정보 가져오기
# 주식 종목 지정 필수

ticker = 'NVDA'
end_date = datetime.now()
start_date = end_date - timedelta(days=3650)
data = yf.download(ticker, start=start_date, end=end_date)

closing_prices = data['Close']
volume = data['Volume']
moving_average = closing_prices.rolling(window=120).mean()

# 6개 알고리즘 (Bollinger, MACD, MOK, RSI, STCK, WR) 전체 코드


# Bollinger

def calculate_bollinger_bands(closing_prices, window=20, num_std=2):
    rolling_mean = closing_prices.rolling(window=window).mean()
    rolling_std = closing_prices.rolling(window=window).std()

    upper_band = rolling_mean + num_std * rolling_std
    lower_band = rolling_mean - num_std * rolling_std
    return upper_band, lower_band

upper_band, lower_band = calculate_bollinger_bands(closing_prices)
bollinger_bands_data = pd.DataFrame({'Upper Band': upper_band, 'Lower Band': lower_band})

bollinger_bands_data.dropna(inplace=True)
result = np.where(closing_prices > upper_band, (closing_prices - upper_band) / (upper_band - lower_band), np.where(closing_prices < lower_band, (closing_prices - lower_band) / (lower_band - upper_band), np.nan))
bollinger_bands_data['Result'] = pd.Series(result, index=closing_prices.index)

bollinger_bands_data['Result'].fillna(method='ffill', inplace=True)

bollinger_bands_data['Prediction'] = bollinger_bands_data['Result'].shift(-1)
bollinger_bands_data.dropna(inplace=True)
bollinger_bands_data['Trend'] = bollinger_bands_data['Prediction'].diff()
bollinger_bands_data['Trend'] = (bollinger_bands_data['Trend'] + 1) / 2

min_pred = bollinger_bands_data['Prediction'].min()
max_pred = bollinger_bands_data['Prediction'].max()
bollinger_bands_data['Prediction'] = (bollinger_bands_data['Prediction'] - min_pred) / (max_pred - min_pred)

min_trend = bollinger_bands_data['Trend'].min()
max_trend = bollinger_bands_data['Trend'].max()
bollinger_bands_data['Trend'] = (bollinger_bands_data['Trend'] - min_trend) / (max_trend - min_trend)*100

pd.set_option('display.float_format', '{:.4f}'.format)


# MACD

def calculate_macd(closing_prices):

    short_ema = closing_prices.ewm(span=26, adjust=False).mean()
    long_ema = closing_prices.ewm(span=12, adjust=False).mean()

    dif = short_ema - long_ema
    signal_line = dif.ewm(span=9, adjust=False).mean()
    histogram = dif - signal_line

    return dif, signal_line, histogram

dif, signal_line, histogram = calculate_macd(closing_prices)
macd_data = pd.DataFrame({'DIF': dif, 'Signal Line': signal_line, 'Histogram': histogram})
macd_data['Result'] = np.where(macd_data['DIF'] > macd_data['Signal Line'], 1, 0)

macd_data['Result'] = (macd_data['Result'].rolling(window=20, min_periods=1).mean())*100
macd_data['Prediction'] = macd_data['Result'].shift(-1)
macd_data.dropna(inplace=True)

# MOK

def calculate_mok(closing_prices, period=14, ma_period=20):
    returns = closing_prices.pct_change()

    moving_average = closing_prices.rolling(window=ma_period).mean()
    momentum = closing_prices.diff(period)
    normalized_momentum = 100 * (momentum - np.min(momentum)) / (np.max(momentum) - np.min(momentum))

    return normalized_momentum

mok_values = calculate_mok(closing_prices)

# RSI

def calculate_rsi(closing_prices, window=20):
    price_changes = closing_prices.diff()
    up_changes = price_changes.where(price_changes > 0, 0)
    down_changes = -price_changes.where(price_changes < 0, 0)

    avg_up_changes = up_changes.rolling(window=window, min_periods=1).mean()
    avg_down_changes = down_changes.rolling(window=window, min_periods=1).mean()

    rs = avg_up_changes / avg_down_changes
    rsi = 100 - (100 / (1 + rs))

    return rsi

rsi_result = calculate_rsi(closing_prices, window=len(closing_prices)//10)  # 기간을 데이터의 1/10로 동적으로 설정
rsi_result.dropna(inplace=True)

# STCK

def calculate_stck(closing_prices, window=20):
    lowest_low = closing_prices.rolling(window=window).min()
    highest_high = closing_prices.rolling(window=window).max()

    stck = 100 * (closing_prices - lowest_low) / (highest_high - lowest_low)

    stck_ma = stck.rolling(window=20).mean()

    return stck_ma

stck_result = calculate_stck(closing_prices)
stck_result.dropna(inplace=True)

# WR

def calculate_williams_r(closing_prices, period=14):
    high_prices = data['High']
    low_prices = data['Low']
    moving_average = closing_prices.rolling(window=20).mean()

    highest_high = high_prices.rolling(window=period).max()
    lowest_low = low_prices.rolling(window=period).min()

    williams_r = (highest_high - moving_average) / (highest_high - lowest_low) * -100

    normalized_williams_r = 100 * (williams_r + 100) / 100

    return normalized_williams_r

wr_result = calculate_williams_r(closing_prices)

# 6개 지표들을 하나의 데이터 프레임에 합치기

final_df = pd.DataFrame({
    'Bollinger': bollinger_bands_data['Trend'],
    'MACD': macd_data['Prediction'],
    'MOK': mok_values,
    'RSI': rsi_result,
    'STCK': stck_result,
    'WR': wr_result
})

# NaN 값 제거
final_df.dropna(inplace=True)

# 최종 데이터 프레임 출력
print(final_df)

# LSTM 모델


final_df_values = final_df.values
data_values = data.values

# 입력 시퀀스에 대한 타임 스텝(T)을 정의합니다.

T = 10  # 원하는대로 조정할 수 있습니다.

# 입력 및 타겟을 위한 데이터 시퀀스 생성
final_df_sequences = []
data_sequences = []

for i in range(len(data_values) - T):
    final_df_sequences.append(final_df_values[i:i+T])
    data_sequences.append(closing_prices.iloc[i+T])

filtered_final_df_sequences = []
filtered_data_sequences = []

# 시퀀스 길이 맞추기
for i, seq in enumerate(final_df_sequences):
    if len(seq) == 10:
        filtered_final_df_sequences.append(seq)
        filtered_data_sequences.append(data_sequences[i])

final_df_sequences = filtered_final_df_sequences
data_sequences = filtered_data_sequences

# 시퀀스를 넘파이 배열로 변환
X = np.array(final_df_sequences)
y = np.array(data_sequences)


# 데이터를 훈련 및 테스트 세트로 분할
split_ratio = 0.8  # 분할 비율을 조정할 수 있습니다.
split_index = int(split_ratio * len(X))

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# LSTM 모델 구축
model = Sequential()

# 첫 번째 LSTM 레이어 (시퀀스 출력을 반환하여 다음 레이어로 전달)
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(T, 6)))

# 두 번째 LSTM 레이어
model.add(LSTM(50, activation='relu', return_sequences=True))

# 세 번째 LSTM 레이어 (시퀀스 출력을 반환하지 않음)
model.add(LSTM(50, activation='relu'))

# 출력 레이어
model.add(Dense(1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')


# 모델 훈련
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# 테스트 데이터에서 모델 평가
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

# 예측 수행
predictions = model.predict(X_test)
print(predictions)

# 예측 결과를 데이터 프레임으로 변환
predictions_df = pd.DataFrame(predictions, columns=["Predictions"])
print(predictions_df)

# LSTM 모델의 예측 값을 6개 지표 값과 함께 생성 모델 입력값으로 넣기 위해

# T 타임 스텝 만큼의 길이를 고려하여 LSTM 예측값 넣기
final_df["LSTM_Predictions"] = np.nan
final_df["LSTM_Predictions"].iloc[-len(predictions_df):] = predictions_df["Predictions"].values

# NaN 값 제거
final_df.dropna(inplace=True)

# 다시 시퀀스로 변환
final_df_values = final_df.values

final_df_sequences = []
for i in range(len(data_values) - T):
    final_df_sequences.append(final_df_values[i:i+T])

X = np.array(final_df_sequences)

# 생성 및 구분 모델

from tensorflow.keras.layers import Reshape, Flatten, LeakyReLU, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# 생성 모델 G (주식 가격의 시퀀스를 생성)
def build_generator(input_shape=(T, 7)):  # 입력 값 : LSTM 모델 예측 + 6개 지표 값
    model = Sequential()

    # LSTM 레이어와 BatchNormalization, LeakyReLU 활성화 함수를 이용하여 시퀀스를 학습
    model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))

    # Flatten layer를 제거하고, T 길이의 시퀀스를 생성하도록 수정
    model.add(LSTM(128))
    model.add(Dense(T, activation='linear'))

    noise = tf.keras.layers.Input(shape=input_shape)
    generated_sequence = model(noise)

    return Model(noise, generated_sequence)

# 구분 모델 D (주가의 실제 시퀀스와 생성된 시퀀스를 구분)
def build_discriminator(input_shape=(T, 1)):
    model = Sequential()

    # LSTM 레이어와 LeakyReLU 활성화 함수를 사용하여 시퀀스를 처리
    model.add(LSTM(128, input_shape=input_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))

    sequence = tf.keras.layers.Input(shape=input_shape)
    validity = model(sequence)

    # 주어진 시퀀스가 실제인지 생성된 것인지에 대한 확률
    return Model(sequence, validity)

    # 학습 함수 (생성된 주식 가격 시퀀스와 실제 주식 가격 시퀀스를 사용하여 판별자를 학습)

def train_gan(generator, discriminator, combined, epochs, batch_size=32):
    valid = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))

    for epoch in range(epochs):
        idx = np.random.randint(0, X_train.shape[0], batch_size)
        sequences = X_train[idx]

        # 주식 예측값 생성
        predicted_stock = model.predict(sequences)

        # LSTM 예측값을 sequences에 추가
        predicted_stock_reshaped = predicted_stock.reshape(batch_size, 1, 1)  # (batch_size, 1, 1) 형태로 변환
        predicted_stock_expanded = np.repeat(predicted_stock_reshaped, T, axis=1)  # predicted_stock_reshaped를 T 타임 스텝만큼 확장
        X_train_combined = np.concatenate([sequences, predicted_stock_expanded], axis=2)

        generated_stock = generator.predict(X_train_combined)
        generated_stock_reshaped = generated_stock.reshape(batch_size, T, 1)

        # 판별자 학습
        d_loss_real = discriminator.train_on_batch(predicted_stock_expanded, valid)
        d_loss_fake = discriminator.train_on_batch(generated_stock_reshaped, fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # 생성자 학습
        g_loss = combined.train_on_batch(X_train_combined, valid)

        print(f"{epoch}/{epochs} [D loss: {d_loss[0]} | D accuracy: {100 * d_loss[1]}] [G loss: {g_loss}]")

        # 판별자 및 생성자 모델 초기화, 생성자와 판별자를 결합하여

discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])

generator = build_generator()
z = tf.keras.layers.Input(shape=(T, 7))
generated_sequence = generator(z)

discriminator.trainable = False
validity = discriminator(generated_sequence)

combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

#학습 시작
train_gan(generator, discriminator, combined, epochs=10, batch_size=32)

# 학습 후 최종 예측을 수행하는 함수
def final_predictions(generator, test_data, batch_size=32):
    predicted_stock_lstm = model.predict(test_data)

    # LSTM 예측값을 test_data에 추가
    predicted_stock_expanded = np.repeat(predicted_stock_lstm[:, np.newaxis], T, axis=1)  # (batch_size, T)로 형태 변경
    test_data_combined = np.concatenate([test_data, predicted_stock_expanded], axis=2)

    # G 모델을 사용하여 합성 주가를 생성
    generated_stock = generator.predict(test_data_combined)

    # LSTM 예측과 G 모델의 예측을 평균
    final_predicted_stock = (predicted_stock_lstm + generated_stock.mean(axis=1)) / 2.0

    return final_predicted_stock

# 학습 후 예측
after_gan_predictions = final_predictions(generator, X_test, batch_size=32)

# 전체 예측 출력
print(after_gan_predictions)

# 최종 예측 출력 (평균)
average_prediction = np.mean(after_gan_predictions)
print(average_prediction)

[*********************100%%**********************]  1 of 1 completed
            Bollinger    MACD     MOK     RSI    STCK      WR
Date                                                         
2014-02-13    43.8505 55.0000 40.3087 55.0434 50.1006 27.0795
2014-02-14    28.7469 55.0000 40.3868 56.6366 51.5429 25.5513
2014-02-18    42.4711 55.0000 40.3676 56.6001 53.3726 28.8534
2014-02-19    51.9447 55.0000 40.4145 57.2606 54.9226 29.2618
2014-02-20    47.6999 55.0000 40.4602 58.9275 56.5726 28.8429
...               ...     ...     ...     ...     ...     ...
2023-10-10    47.6999 60.0000 57.2054 60.5174 35.5890 51.8735
2023-10-11    47.6999 55.0000 67.9281 60.7924 38.7841 45.8897
2023-10-12    47.6999 50.0000 65.7451 60.8578 41.9000 41.5991
2023-10-13    47.6999 45.0000 55.6667 60.1984 45.1522 38.4900
2023-10-16    47.6999 40.0000 60.2106 60.6209 49.3747 40.2701

[2435 rows x 6 columns]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


  X = np.array(final_df_sequences)


0/10 [D loss: 0.730454295873642 | D accuracy: 15.625] [G loss: 0.6917109489440918]
1/10 [D loss: 0.7123456001281738 | D accuracy: 15.625] [G loss: 0.692314624786377]
2/10 [D loss: 0.6933015584945679 | D accuracy: 48.4375] [G loss: 0.6927284002304077]
3/10 [D loss: 0.6804961264133453 | D accuracy: 75.0] [G loss: 0.693375825881958]
4/10 [D loss: 0.6684160828590393 | D accuracy: 93.75] [G loss: 0.6934597492218018]
5/10 [D loss: 0.6569936573505402 | D accuracy: 100.0] [G loss: 0.694181501865387]
6/10 [D loss: 0.6436269581317902 | D accuracy: 100.0] [G loss: 0.6949526071548462]
7/10 [D loss: 0.6321579813957214 | D accuracy: 96.875] [G loss: 0.6948208212852478]
8/10 [D loss: 0.6099732518196106 | D accuracy: 89.0625] [G loss: 0.6959587931632996]
9/10 [D loss: 0.603618860244751 | D accuracy: 93.75] [G loss: 0.6964036226272583]
[[18.858595  18.858597  18.85802   ... 18.997744  18.970577  18.923082 ]
 [16.947802  16.947803  16.947227  ... 17.08695   17.059784  17.01229  ]
 [22.86097   22.860971 