In [1]:
import pandas as pd
import numpy as np
import os
import joblib

from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, f_classif

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt
import seaborn as sns

2025-07-28 18:04:30.548284: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-28 18:04:30.706041: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-07-28 18:04:31.689752: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
TICKER = 'AAPL'
DATA_DIR = '../data'
SPLIT_RATIO = 0.8
LSTM_MODEL_DIR = '../models_lstm'

os.makedirs(LSTM_MODEL_DIR, exist_ok=True)


LSTM modelleri '../models_lstm' klasörüne kaydedilecek.


In [3]:
def load_and_clean_data(file_path):
    try:
        df = pd.read_csv(file_path)
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
        df.dropna(how='any', inplace=True)
        price_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
        df = df[(df[price_cols] > 0).all(axis=1)]
        return df
    except Exception as e:
        return None

def feature_engineering(df):
    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['EMA_50'] = df['Close'].ewm(span=50, adjust=False).mean()
    delta = df['Close'].diff(1); gain = delta.where(delta > 0, 0); loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean(); avg_loss = loss.rolling(window=14).mean()
    df['RSI'] = 100 - (100 / (1 + (avg_gain / avg_loss)))
    high_low = df['High'] - df['Low']; high_close = np.abs(df['High'] - df['Close'].shift()); low_close = np.abs(df['Low'] - df['Close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    df['ATR'] = np.max(ranges, axis=1).rolling(window=14).mean()
    ema_12 = df['Close'].ewm(span=12, adjust=False).mean(); ema_26 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema_12 - ema_26
    df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
    sma_20 = df['Close'].rolling(window=20).mean(); std_20 = df['Close'].rolling(window=20).std()
    df['BB_upper'] = sma_20 + (std_20 * 2); df['BB_lower'] = sma_20 - (std_20 * 2)
    df['BB_width'] = df['BB_upper'] - df['BB_lower']
    df['ROC'] = ((df['Close'] - df['Close'].shift(14)) / df['Close'].shift(14)) * 100
    df['Target'] = (df['Close'] > df['Open']).astype(int)
    df['Target'] = df['Target'].shift(-1)
    df.dropna(inplace=True)
    df['Target'] = df['Target'].astype(int)
    return df

def create_lstm_sequences(X_data, y_data, sequence_length):
    X_sequences, y_sequences = [], []
    for i in range(len(X_data) - sequence_length):
        X_sequences.append(X_data[i:(i + sequence_length)])
        y_sequences.append(y_data[i + sequence_length])
    return np.array(X_sequences), np.array(y_sequences)

In [4]:
def run_lstm_optimization(ticker, sequence_length, lstm_units, k_features):
    print("-" * 60)
    print(f"Deney Başlatılıyor: SeqLen={sequence_length}, LSTM_Units={lstm_units}, K_Features={k_features}")
    file_path = os.path.join(DATA_DIR, f"{ticker}.csv")
    df = load_and_clean_data(file_path)
    if df is None: return None
    df_featured = feature_engineering(df)
    
    X = df_featured.drop('Target', axis=1)
    y = df_featured['Target'].values

    split_index = int(len(X) * SPLIT_RATIO)
    X_train_raw, X_test_raw = X[:split_index], X[split_index:]
    y_train_raw, y_test_raw = y[:split_index], y[split_index:]
    selector = SelectKBest(f_classif, k=min(k_features, X_train_raw.shape[1]))
    selector.fit(X_train_raw, y_train_raw)
    selected_cols = X_train_raw.columns[selector.get_support()]
    X_train_selected = X_train_raw[selected_cols]
    X_test_selected = X_test_raw[selected_cols]
    
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train_selected)
    X_test_scaled = scaler.transform(X_test_selected)     
    
    X_train, y_train = create_lstm_sequences(X_train_scaled, y_train_raw, sequence_length)
    X_test, y_test = create_lstm_sequences(X_test_scaled, y_test_raw, sequence_length)

    if len(X_train) == 0 or len(X_test) == 0: return None

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(units=lstm_units, return_sequences=True), Dropout(0.3),
        LSTM(units=lstm_units), Dropout(0.3),
        Dense(units=1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, callbacks=[early_stopping], verbose=0)
    
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    
    model_filename = f"lstm_{ticker}_seq{sequence_length}_units{lstm_units}_k{k_features}.keras"
    model_filepath = os.path.join(LSTM_MODEL_DIR, model_filename)
    model.save(model_filepath)
    
    print(f"Deney Tamamlandı -> Test Doğruluğu: {test_accuracy:.4f}")

    return {
        'sequence_length': sequence_length,
        'lstm_units': lstm_units,
        'k_features': k_features,
        'test_accuracy': test_accuracy,
        'model_filepath': model_filepath
    }

In [5]:
sequence_lengths_to_try = [3,10,20, 40,100]
lstm_units_to_try = [32, 50]
k_features_to_try = [5,8, 12]

all_results = []

for seq_len in sequence_lengths_to_try:
    for units in lstm_units_to_try:
        for k in k_features_to_try:
            try:
                res = run_lstm_optimization(
                    ticker=TICKER,
                    sequence_length=seq_len, 
                    lstm_units=units, 
                    k_features=k
                )
                if res:
                    all_results.append(res)
            except Exception as e:
                print(f"HATA: {seq_len}, {units}, {k} parametreleriyle deneyde hata: {e}")

print("\n--- Optimizasyon Tamamlandı ---")

------------------------------------------------------------
Deney Başlatılıyor: SeqLen=3, LSTM_Units=32, K_Features=5
Deney Tamamlandı -> Test Doğruluğu: 0.4800
------------------------------------------------------------
Deney Başlatılıyor: SeqLen=3, LSTM_Units=32, K_Features=8
Deney Tamamlandı -> Test Doğruluğu: 0.5190
------------------------------------------------------------
Deney Başlatılıyor: SeqLen=3, LSTM_Units=32, K_Features=12
Deney Tamamlandı -> Test Doğruluğu: 0.5190
------------------------------------------------------------
Deney Başlatılıyor: SeqLen=3, LSTM_Units=50, K_Features=5
Deney Tamamlandı -> Test Doğruluğu: 0.4825
------------------------------------------------------------
Deney Başlatılıyor: SeqLen=3, LSTM_Units=50, K_Features=8
Deney Tamamlandı -> Test Doğruluğu: 0.5073
------------------------------------------------------------
Deney Başlatılıyor: SeqLen=3, LSTM_Units=50, K_Features=12
Deney Tamamlandı -> Test Doğruluğu: 0.4876
--------------------------

In [6]:
if all_results:
    results_df = pd.DataFrame(all_results)
    results_df_sorted = results_df.sort_values(by='test_accuracy', ascending=False)

    print("Hiper-parametre Optimizasyon Sonuçları:")
    display(results_df_sorted)
    
    # En iyi sonucu alan modeli yüklemek için dosya yolunu alabilirsiniz
    best_model_path = results_df_sorted.iloc[0]['model_filepath']
    print(f"\nEn iyi modelin yolu: {best_model_path}")
else:
    print("Hiçbir deney başarıyla tamamlanamadı.")

Hiper-parametre Optimizasyon Sonuçları:


Unnamed: 0,sequence_length,lstm_units,k_features,test_accuracy,model_filepath
27,100,50,5,0.521832,../models_lstm/lstm_AAPL_seq100_units50_k5.keras
1,3,32,8,0.518987,../models_lstm/lstm_AAPL_seq3_units32_k8.keras
2,3,32,12,0.518987,../models_lstm/lstm_AAPL_seq3_units32_k12.keras
8,10,32,12,0.518801,../models_lstm/lstm_AAPL_seq10_units32_k12.keras
7,10,32,8,0.518801,../models_lstm/lstm_AAPL_seq10_units32_k8.keras
24,100,32,5,0.518637,../models_lstm/lstm_AAPL_seq100_units32_k5.keras
29,100,50,12,0.518637,../models_lstm/lstm_AAPL_seq100_units50_k12.keras
28,100,50,8,0.515442,../models_lstm/lstm_AAPL_seq100_units50_k8.keras
4,3,50,8,0.507342,../models_lstm/lstm_AAPL_seq3_units50_k8.keras
15,20,50,5,0.499489,../models_lstm/lstm_AAPL_seq20_units50_k5.keras



En iyi modelin yolu: ../models_lstm/lstm_AAPL_seq100_units50_k5.keras
