In [2]:
from datetime import datetime, timedelta
import pandas as pd
import xgboost as xgb
from statsmodels.tools.eval_measures import mse, rmse
from sklearn.metrics import r2_score, mean_absolute_error
import numpy as np
from sklearn.model_selection import cross_val_predict

from app.services.dataframe_service import get_technical_data_as_dataframe, get_data_as_dataframe

In [12]:
today = datetime.today().strftime("%Y-%m-%d")

top_50_stock = ["AEFES", "AKBNK", "AKSA", "AKSEN", "ALARK", "ARCLK", "ASELS", "BERA", "BIMAS", "DOHOL",
                "EGEEN", "EKGYO", "ENJSA", "ENKAI", "EREGL", "FROTO", "GARAN", "GESAN", "GUBRF",
                "HALKB", "HEKTS", "ISCTR", "ISGYO", "KCHOL", "KONTR", "KORDS", "KOZAA", "KOZAA",
                "KOZAL", "KRDMD", "MGROS", "ODAS", "OYAKC", "PETKM", "PGSUS", "SAHOL", "SASA",
                "SISE", "SMRTG", "SOKM", "TAVHL", "TCELL", "THYAO", "TKFEN", "TOASO", "TSKB",
                "TTKOM", "TUPRS", "VAKBN", "VESTL", "YKBNK"]

top_1_stock = ["THYAO"]

for stock_symbol in top_1_stock:
    data_technical_info = get_technical_data_as_dataframe(schema_name="technical", table_name=stock_symbol)
    data_basic_info = get_data_as_dataframe(schema_name="public", table_name=stock_symbol)

In [13]:
data_df = data_technical_info.merge(data_basic_info, on="date", how="left")
data_df["percentage"] = data_df["percentage"].shift(-1)
data_df = data_df.dropna()

In [14]:
selected_columns = ["date", "RSI_14", "STOCH_Interpretation", "STOCHRSI_Interpretation", "MACD_Interpretation", "ADX_Return", "WILLR_14", 
                    "CCI_14_0.015", "Percentage_ATR", "HL_Ratio_14",
                    "UO_7_14_28", "ROC_14", "Bull_Power_13", "Bear_Power_13", "SMA_5_Interpretation", "SMA_10_Interpretation",
                    "SMA_20_Interpretation", "SMA_50_Interpretation",
                    "EMA_5_Interpretation", "EMA_10_Interpretation", "EMA_20_Interpretation", "EMA_50_Interpretation",
                    "Classic_Pivot_Interpretation", "Fibonacci_Pivot_Interpretation",
                    "Camarilla_Pivot_Interpretation", "Woodie_Pivot_Interpretation", "Demark_Pivot_Interpretation", "percentage"]

data = data_df[selected_columns]


In [15]:
categoric_interpretation_columns = ["STOCH_Interpretation", "STOCHRSI_Interpretation", "MACD_Interpretation"]
categoric_mapping = {
    "Güçlü Sat": 1,
    "Sat": 2,
    "Nötr": 3,
    "Al": 4,
    "Güçlü Al": 5
}
for column in categoric_interpretation_columns:
    data.loc[:, column] = data[column].map(categoric_mapping)
    data[column] = data[column].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column] = data[column].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column] = data[column].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column] = data[column].astype(int)


In [17]:
def model_evaluation(models, X, y):
    for i, v in models:
        #prediction = cross_val_predict(v, X, y, cv=2)
        prediction = v.predict(X)
        r2 = r2_score(y, prediction)
        n = X.shape[0]  # örnek sayısı
        p = X.shape[1]  # özellik sayısı
        adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

        print(f"----------------------------- {i} Model Evaluation -----------------------------")
        print("R-Kare değeri                     : {}".format(r2))
        print("Adj. R-Kare değeri                : {}".format(adjusted_r2))
        print("Ortalama Mutlak Hata (MAE)        : {}".format(mean_absolute_error(y, prediction)))
        print("Ortalama Kare Hata (MSE)          : {}".format(mse(y, prediction)))
        print("Kök Ortalama Kare Hata (RMSE)     : {}".format(rmse(y, prediction)))

In [18]:
from sklearn.model_selection import train_test_split


y = data["percentage"]
X = data.drop(["date", "percentage"], axis=1)

#X_train, X_test, y_train, y_test = X.iloc[:-100, :], X.iloc[-100:, :], y[:-100], y[-100:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
xgb_clf = xgb.XGBRegressor()
xgb_clf.fit(X_train, y_train)
models = []
models.append(('XGBOOST', xgb_clf))
model_evaluation(models, X_test, y_test)

predictions = xgb_clf.predict(X)
data["predictions"] = predictions

----------------------------- XGBOOST Model Evaluation -----------------------------
R-Kare değeri                     : -0.053158533042521716
Adj. R-Kare değeri                : -0.39120942019197313
Ortalama Mutlak Hata (MAE)        : 2.736998784032449
Ortalama Kare Hata (MSE)          : 13.784827351251717
Kök Ortalama Kare Hata (RMSE)     : 3.712792392694711


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["predictions"] = predictions


In [None]:
def strategy(data):
    BUDGET = 10000
    stocks = 0
    # İşaret değişimlerini ve ilgili tarihleri saklamak için boş bir liste oluşturma
    sign_changes = []
    data = data.reset_index()
    # İlk satırdan başlayarak her satırı kontrol etme
    for i in range(1, len(data)):
        try:
            current_pred = data.loc[i, 'predictions']
            previous_pred = data.loc[i - 1, 'predictions']
            
            # İşaret değişimi kontrolü
            if (current_pred > 0 and previous_pred < 0) or (current_pred < 0 and previous_pred > 0):
                date = data.loc[i, 'date']
                change_type = 'Negatiften Pozitife' if current_pred > 0 else 'Pozitiften Negatife'
                real_date = date + timedelta(days=1)
                open_value = data.loc[i+1, 'open']

                if change_type == 'Negatiften Pozitife':
                    stocks_to_buy = BUDGET // open_value
                    stocks += stocks_to_buy
                    BUDGET -= stocks_to_buy * open_value

                elif change_type == 'Pozitiften Negatife':
                    BUDGET += stocks * open_value
                    stocks = 0

                sign_changes.append((date, change_type, real_date, open_value, BUDGET, stocks))
        except:
            pass

    # İşaret değişimlerini ve ilgili tarihleri yazdırma
    for change in sign_changes:
        print(f"Tarih: {change[0]}, İşaret Değişimi: {change[1]}, Alınması yada satılması gereken tarih(Sabah): {change[2]}, Açılış Değeri: {change[3]}, Bütçe: {change[4]}, Hisseler: {change[5]}")

    return sign_changes[-1][4] + sign_changes[-1][5] * sign_changes[-1][3]


predictions = xgb_clf.predict(X)
data_df["predictions"] = predictions

BUDGET = strategy(data_df.iloc[-100:, :])
print(BUDGET)


In [47]:
observation = data[["date", "percentage", "predictions"]].iloc[-100:,:]

In [19]:
import pickle

filename = './app/ml_models/test.pkl'
pickle.dump(xgb_clf, open(filename, 'wb'))

In [20]:
for i in X.columns.values:
    print(i, end=", ")

RSI_14, STOCH_Interpretation, STOCHRSI_Interpretation, MACD_Interpretation, ADX_Return, WILLR_14, CCI_14_0.015, Percentage_ATR, HL_Ratio_14, UO_7_14_28, ROC_14, Bull_Power_13, Bear_Power_13, SMA_5_Interpretation, SMA_10_Interpretation, SMA_20_Interpretation, SMA_50_Interpretation, EMA_5_Interpretation, EMA_10_Interpretation, EMA_20_Interpretation, EMA_50_Interpretation, Classic_Pivot_Interpretation, Fibonacci_Pivot_Interpretation, Camarilla_Pivot_Interpretation, Woodie_Pivot_Interpretation, Demark_Pivot_Interpretation, 