### Introduction 

Implementation of senol2009stock. The paper predicts stock return direction using TSI and whether they outperform Logistic regression on the ISE-30 index of Turkey. 

Total of 27 stocks were trained on. The training period was `1998-01-05 to 2005-12-29`. The testing period was `2006-01-06 to 2007-08-31`. Data source utilised were `Daily prices` which were used to compute technical indicators. The TSI computed were: 
- *MA14*: 14-day Moving Average          
- *MA37*: 37-day Moving Average          
- *%K14*:14-day Stochastic Oscillator   
- *%D3*:3-day Moving Average of %K14   
- *RSI14*:14-day Relative Strength Index 




### Imports

In [14]:
import yfinance as yf
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf

### Tickers

We use Yahoo finance to extract relevant tickers. The istanbul ticker is .IS which stands for Istanbul Stock Exchange.

In [4]:

tickers = [
    "AKBNK.IS", "ARCLK.IS", "DOHOL.IS", "DYHOL.IS", "EREGL.IS", "FINBN.IS", "FORTS.IS",
    "GARAN.IS", "GSDHO.IS", "HURGZ.IS", "ISCTR.IS", "ISGYO.IS", "KCHOL.IS", "MIGRS.IS",
    "PETKM.IS", "PTOFS.IS", "SAHOL.IS", "SISE.IS", "SKBNK.IS", "TCELL.IS", "THYAO.IS",
    "TOASO.IS", "TSKB.IS", "TUPRS.IS", "ULKER.IS", "VESTL.IS", "YKBNK.IS"
]

# the date for which we would train
start_train = "1998-01-05"
end_test = "2007-08-31"

data = yf.download(
    tickers,
    start=start_train,
    end=end_test,
    progress=False
) # extracting historical data
# we will use return as the classification target

# before we start training, we need to check if the data is available for all tickers. Some tickers have been delisted, so we need to check. 
available_tickers = []
missing_tickers = []

for ticker in tickers:
    try:
        if not data['Close'][ticker].dropna().empty:
            available_tickers.append(ticker)
        else:
            missing_tickers.append(ticker)
    except KeyError:
        missing_tickers.append(ticker)

available_tickers, missing_tickers


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/FORTS.IS?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=FORTS.IS&crumb=5mqGWiPG4um

6 Failed downloads:
['PTOFS.IS', 'MIGRS.IS', 'DYHOL.IS', 'FINBN.IS']: YFPricesMissingError('possibly delisted; no price data found  (1d 1998-01-05 -> 2007-08-31)')
['ISCTR.IS']: YFPricesMissingError('possibly delisted; no price data found  (1d 1998-01-05 -> 2007-08-31) (Yahoo error = "Data doesn\'t exist for startDate = 883951200, endDate = 1188507600")')
['FORTS.IS']: AttributeError("'NoneType' object has no attribute 'update'")


(['AKBNK.IS',
  'ARCLK.IS',
  'DOHOL.IS',
  'EREGL.IS',
  'GARAN.IS',
  'GSDHO.IS',
  'HURGZ.IS',
  'ISGYO.IS',
  'KCHOL.IS',
  'PETKM.IS',
  'SAHOL.IS',
  'SISE.IS',
  'SKBNK.IS',
  'TCELL.IS',
  'THYAO.IS',
  'TOASO.IS',
  'TSKB.IS',
  'TUPRS.IS',
  'ULKER.IS',
  'VESTL.IS',
  'YKBNK.IS'],
 ['DYHOL.IS', 'FINBN.IS', 'FORTS.IS', 'ISCTR.IS', 'MIGRS.IS', 'PTOFS.IS'])

### UP and Down Days of individual tickers

We need to check this to ensure that our classification is not trivial - that is, our classifier is not obtaining a high result by just predicinting the majority class. 

In [6]:

train_start = "1998-01-05"
train_end = "2005-12-29"
test_start = "2006-01-06"
test_end = "2007-08-31"

# Download historical data
data = yf.download(available_tickers, start=train_start, end=test_end, progress=False)['Close']

# Function to calculate up/down counts
def count_up_down_days(close_prices):
    returns = close_prices.pct_change()
    directions = returns.applymap(lambda x: 'Up' if x > 0 else ('Down' if x < 0 else 'Same'))

    # Separate training and testing periods
    train_directions = directions[train_start:train_end]
    test_directions = directions[test_start:test_end]

    # Count up/down days
    train_counts = train_directions.apply(lambda col: col.value_counts())
    test_counts = test_directions.apply(lambda col: col.value_counts())

    return train_counts.fillna(0).astype(int), test_counts.fillna(0).astype(int)

train_counts, test_counts = count_up_down_days(data)

  directions = returns.applymap(lambda x: 'Up' if x > 0 else ('Down' if x < 0 else 'Same'))


In [12]:
train_counts.head()


Ticker,AKBNK.IS,ARCLK.IS,DOHOL.IS,EREGL.IS,GARAN.IS,GSDHO.IS,HURGZ.IS,ISGYO.IS,KCHOL.IS,PETKM.IS,...,SISE.IS,SKBNK.IS,TCELL.IS,THYAO.IS,TOASO.IS,TSKB.IS,TUPRS.IS,ULKER.IS,VESTL.IS,YKBNK.IS
Down,543,553,575,537,565,552,567,559,532,539,...,526,579,576,568,559,501,536,200,531,557
Same,356,360,313,363,320,403,316,382,393,423,...,345,397,355,375,344,432,374,1102,405,351
Up,573,559,584,572,587,517,589,531,547,510,...,601,496,541,529,569,539,562,170,536,564


In [13]:
test_counts.head()

Ticker,AKBNK.IS,ARCLK.IS,DOHOL.IS,EREGL.IS,GARAN.IS,GSDHO.IS,HURGZ.IS,ISGYO.IS,KCHOL.IS,PETKM.IS,...,SISE.IS,SKBNK.IS,TCELL.IS,THYAO.IS,TOASO.IS,TSKB.IS,TUPRS.IS,ULKER.IS,VESTL.IS,YKBNK.IS
Down,170,176,179,177,185,186,190,177,177,174,...,183,191,169,171,169,183,170,177,184,188
Same,64,71,74,59,50,52,53,69,71,79,...,60,58,61,85,61,47,72,70,76,53
Up,190,177,171,188,189,186,181,178,176,171,...,181,175,194,168,194,194,182,177,164,183


### Computing Indicators 

We 

In [16]:
def compute_indicators(df):
    df = df.copy()  # don't wanna mess up original

    # moving averages for trend
    df['MA14'] = df['Close'].rolling(14).mean()
    df['MA37'] = df['Close'].rolling(37).mean()

    # stochastic %K and %D
    low14 = df['Low'].rolling(14).min()
    high14 = df['High'].rolling(14).max()
    df['K14'] = 100 * (df['Close'] - low14) / (high14 - low14)
    df['D3'] = df['K14'].rolling(3).mean()

    # RSI
    delta = df['Close'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(14).mean()
    avg_loss = loss.rolling(14).mean()
    rs = avg_gain / avg_loss
    df['RSI14'] = 100 - (100 / (1 + rs))

    # direction label (up = 1, else 0)
    df['Direction'] = (df['Close'].diff() > 0).astype(int)

    # just drop the NaNs from all the rolling ops
    return df.dropna()


### Model

We performed a grid-search to find the optimum ANN. Here we give the structure of the ANN that performs the best: 3-11-1. 

In [None]:
# 3) build and train a simple ANN
def build_and_train_ann(X_tr, y_tr):
    # basic feedforward NN: one hidden layer, sigmoid everywhere
    model = Sequential()
    model.add(Dense(HIDDEN_NEURONS, activation='sigmoid', input_shape=(X_tr.shape[1],)))
    model.add(Dense(1, activation='sigmoid'))  # just binary output

    # use SGD with high-ish learning rate and momentum (tune later maybe)
    opt = tf.keras.optimizers.SGD(learning_rate=0.2, momentum=0.5)

    # mse is fine for now – could try binary_crossentropy later
    model.compile(optimizer=opt, loss='mean_squared_error')

    # quick train — no validation yet, just want to see if it learns
    model.fit(X_tr, y_tr, epochs=EPOCHS, verbose=0)  # silence output for now

    return model


# Training

In [17]:

TICKERS = available_tickers
START_TRAIN, END_TRAIN = "1998-01-05", "2005-12-29"
START_TEST,  END_TEST  = "2006-01-06", "2007-08-31"
FEATURES = ['RSI14','K14','D3']   
HIDDEN_NEURONS = 11
EPOCHS = 10000

raw = yf.download(TICKERS, start=START_TRAIN, end=END_TEST, progress=False)

results = []
for ticker in TICKERS:
    # assemble OHLCV
    df = pd.DataFrame({
        'Close': raw['Close'][ticker],
        'High':  raw['High'][ticker],
        'Low':   raw['Low'][ticker],
        'Volume':raw['Volume'][ticker]
    }).dropna()
    df = compute_indicators(df)

    # split indices
    tr_idx = df.index <= END_TRAIN
    te_idx = df.index >= START_TEST

    X = df[FEATURES].values
    y = df['Direction'].values

    X_tr, y_tr = X[tr_idx], y[tr_idx]
    X_te, y_te = X[te_idx], y[te_idx]

    # train & evaluate ANN
    ann = build_and_train_ann(X_tr, y_tr)
    preds = (ann.predict(X_te) > 0.5).astype(int).ravel()
    acc  = accuracy_score(y_te, preds)
    results.append({'ticker': ticker, 'accuracy': acc})
    print(f"{ticker:7s} → Test accuracy: {acc:.4f}")

# 5) Summary
res_df = pd.DataFrame(results)
print("\nAverage Test Accuracy across available stocks: "
      f"{res_df['accuracy'].mean():.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AKBNK.IS → Test accuracy: 0.7877


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
ARCLK.IS → Test accuracy: 0.8184


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
DOHOL.IS → Test accuracy: 0.8373


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
EREGL.IS → Test accuracy: 0.8325


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
GARAN.IS → Test accuracy: 0.7877


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
GSDHO.IS → Test accuracy: 0.7877


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
HURGZ.IS → Test accuracy: 0.8019


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
ISGYO.IS → Test accuracy: 0.8208


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
KCHOL.IS → Test accuracy: 0.8278


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
PETKM.IS → Test accuracy: 0.8137


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
SAHOL.IS → Test accuracy: 0.7524


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
SISE.IS → Test accuracy: 0.7807


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
SKBNK.IS → Test accuracy: 0.7123


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
TCELL.IS → Test accuracy: 0.8019


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
THYAO.IS → Test accuracy: 0.8396


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
TOASO.IS → Test accuracy: 0.7995


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
TSKB.IS → Test accuracy: 0.7948


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
TUPRS.IS → Test accuracy: 0.8066


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
ULKER.IS → Test accuracy: 0.8066


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
VESTL.IS → Test accuracy: 0.8325


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
YKBNK.IS → Test accuracy: 0.8160

Average Test Accuracy across 27 stocks: 0.8028
