In [1]:
import pandas as pd
df = pd.read_csv("C:/Users/seonahryu/Desktop/urp/duplicated_sentiment_nasdaq.csv", index_col='date')

-1~1 정규화

In [2]:
from sklearn.preprocessing import MinMaxScaler

# 데이터 정규화
features_to_scale = ['past_day_close', 'Open', 'High', 'Low', 'Volume', 'Adj Close'] # nasdaq 지수만 정규화

scaler = MinMaxScaler(feature_range=(-1,1))
df[features_to_scale] = scaler.fit_transform(df[features_to_scale])

print(df.head(10))

                                                         text  \
date                                                            
2009-05-05  Donald Trump will be appearing on The View tom...   
2009-05-08  Donald Trump reads Top Ten Financial Tips on L...   
2009-05-09  New Blog Post: Celebrity Apprentice Finale and...   
2009-05-12  """My persona will never be that of a wallflow...   
2009-05-12  "Miss USA Tara Conner will not be fired - ""I'...   
2009-05-13  Listen to an interview with Donald Trump discu...   
2009-05-14  """Strive for wholeness and keep your sense of...   
2009-05-15  "Enter the ""Think Like A Champion"" signed bo...   
2009-05-19  """...these days...we could all use a little o...   
2009-05-20  """Always know you could be on the precipice o...   

            vader_sentiment  roberta_sentiment  past_day_close      Open  \
date                                                                       
2009-05-05                1                  0       -0.989263 -0.9

sentiment 범위 확인

In [3]:
# 최대값과 최소값 계산
vader_max = df['vader_sentiment'].max()
vader_min = df['vader_sentiment'].min()
roberta_max = df['roberta_sentiment'].max()
roberta_min = df['roberta_sentiment'].min()

# 결과 출력
print(f"VADER Sentiment - 최대값: {vader_max}, 최소값: {vader_min}")
print(f"Roberta Sentiment - 최대값: {roberta_max}, 최소값: {roberta_min}")

VADER Sentiment - 최대값: 1, 최소값: -1
Roberta Sentiment - 최대값: 1, 최소값: -1


# LSTM

In [None]:
# 하이퍼파라미터 튜닝을 위해 keras tuner 설치
!pip install keras-tuner

In [5]:
import numpy as np

# 데이터셋 생성 함수 정의
def create_dataset(X, y, time_step=30):
    Xs, ys = [], []
    for i in range(len(X) - time_step):
        Xs.append(X[i:(i + time_step)])
        ys.append(y[i + time_step])
    return np.array(Xs), np.array(ys)

In [9]:
from sklearn.metrics import mean_squared_error, r2_score

# 예측 및 성능 평가 함수 정의
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r_squared = r2_score(y_test, y_pred)
    return mse, rmse, r_squared

input : VADER

In [6]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['vader_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 1), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 1  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [12]:
import keras_tuner as kt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping

# 하이퍼파라미터 튜닝을 위한 모델 정의
def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units', min_value=10, max_value=100, step=10), 
                   return_sequences=True,
                   input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(rate=hp.Float('dropout_rate', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(LSTM(units=hp.Int('units_2', min_value=10, max_value=100, step=10)))
    model.add(Dropout(rate=hp.Float('dropout_rate_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(1, activation='tanh')) # -1~1 정규화해서 출력층 tanh func
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Keras Tuner 설정
tuner = kt.Hyperband(build_model,
                     objective='val_loss',
                     max_epochs=200,
                     factor=3,
                     directory='my_dir',
                     project_name='lstm')

# 조기 종료 콜백 정의
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

Reloading Tuner from my_dir\lstm\tuner0.json


In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

Trial 86 Complete [00h 00m 40s]
val_loss: 0.2339242845773697

Best val_loss So Far: 0.23280903697013855
Total elapsed time: 00h 53m 47s

Search: Running Trial #87

Value             |Best Value So Far |Hyperparameter
100               |60                |units
0.2               |0.5               |dropout_rate
30                |60                |units_2
0.1               |0.5               |dropout_rate_2
3                 |3                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
4                 |4                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/3
[1m260/289[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 30ms/step - loss: 0.2447

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : RoBERTa

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['roberta_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 1), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 1  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : VADER + RoBERTa

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['vader_sentiment', 'roberta_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 2), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 2  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : past_day_close

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['past_day_close']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 1), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 1  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : past_day_close + VADER

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['past_day_close', 'vader_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 2), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 2  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : past_day_close + RoBERTa

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['past_day_close', 'roberta_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 2), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 2  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : past_day_close + VADER + RoBERTa

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['past_day_close', 'vader_sentiment', 'roberta_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 3), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 3  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : Open + High + Low + Volume

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['Open', 'High', 'Low', 'Volume']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 4), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 4  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : Open + High + Low + Volume + past_day_close

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['Open', 'High', 'Low', 'Volume', 'past_day_close']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 5), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 5  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : Open + High + Low + Volume + past_day_close + VADER

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['Open', 'High', 'Low', 'Volume', 'past_day_close', 'vader_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 6), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 6  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : Open + High + Low + Volume + past_day_close + RoBERTa

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['Open', 'High', 'Low', 'Volume', 'past_day_close', 'roberta_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 6), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 6  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

input : Open + High + Low + Volume + past_day_close + VADER + RoBERTa 

In [None]:
from sklearn.model_selection import train_test_split

# df에서 input과 Adj Close 준비
features_1d = df[['Open', 'High', 'Low', 'Volume', 'past_day_close', 'vader_sentiment', 'roberta_sentiment']].values.flatten()
target_1d = df['Adj Close'].values.flatten()  # Adj Close 열 사용

# 데이터셋을 7:3 비율로 나누기
X_train, X_test, y_train, y_test = train_test_split(features_1d.reshape(-1, 7), target_1d, test_size=0.3, random_state=42)

# 데이터셋 생성
X_train, y_train = create_dataset(X_train, y_train)
X_test, y_test = create_dataset(X_test, y_test)

# feature 수 정의
num_features = 7  # X의 feature 수

# X의 형태 조정: (samples, time_steps, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], num_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], num_features)

In [None]:
# 하이퍼파라미터 탐색
# 배치 사이즈를 하이퍼파라미터로 추가
batch_sizes = [128, 256, 512, 1024, 2048]  # 다양한 배치 사이즈 설정
history_dict = {}  # 손실 기록 저장
predictions_dict = {}  # 예측값 저장

for batch_size in batch_sizes:
    tuner.search(X_train, y_train,
                 epochs=20,  # 짧은 에포크 수로 조정
                 validation_data=(X_test, y_test),
                 callbacks=[early_stopping],
                 batch_size=batch_size)

# 최적의 하이퍼파라미터 가져오기
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
for hp, value in best_hyperparameters.values.items():
    print(f"{hp}: {value}")

# 최적의 모델 가져오기
best_model = tuner.get_best_models(num_models=1)[0]

# 최적의 배치 사이즈로 모델 학습
final_batch_size = best_hyperparameters.get('batch_size', 512)  # 하이퍼파라미터에서 가져오거나 기본값 설정
history = best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=final_batch_size,
    verbose=1,
    callbacks=[early_stopping]
)

# 손실 기록 저장
history_dict[final_batch_size] = history.history

# 모델 평가 및 예측 수행
mse, rmse, r_squared = evaluate_model(best_model, X_test, y_test)

# 최종 성능 출력
print(f"Test MSE: {mse:.3f}, Test RMSE: {rmse:.3f}, R-squared: {r_squared:.2f}")

# 예측값 저장
predictions_dict[final_batch_size] = best_model.predict(X_test).flatten()

In [None]:
import matplotlib.pyplot as plt

for batch_size in batch_sizes:
    plt.figure(figsize=(20, 7))
    
    # Plot actual values
    plt.plot(y_test.flatten(), label='Actual Values', color='blue', alpha=0.5)
    
    # Plot predicted values
    plt.plot(predictions_dict[batch_size], label=f'Predicted Values (Batch Size: {batch_size})', color='red')
    
    plt.title(f'Actual vs Predicted Values (Batch Size: {batch_size})')
    plt.xlabel('Sample Index')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
for batch_size in batch_sizes:
    plt.figure(figsize=(12, 6))
    plt.plot(history_dict[batch_size]['loss'], label='Train Loss')
    plt.plot(history_dict[batch_size]['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves (Batch Size: {batch_size})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()