In [2]:
import sys

src_repo = r"C:\Users\xyz\Desktop\bodi2\awesome0\src"
sys.path.append(src_repo)

import os
import datetime
from BinanceDataDownload import fetch_binance_data
from config import *
from extractLastFeaturesForClfTraining import *
from datalabelling import add_labels
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor
import joblib
import re

from config import *

def most_recent_file(directory, prefix):
    """Returns the most recently created file in the given directory with the specified prefix."""
    files = [f for f in os.listdir(directory) if f.startswith(prefix)]
    if not files:
        return None
    return max([os.path.join(directory, f) for f in files], key=os.path.getctime)

def train_and_save_models(clf_dir, symbol='ETHUSDT', timeframe='1h', retrain_time=datetime.timedelta(weeks=1)):
    
    recent_svc = most_recent_file(clf_dir, "signal_clf_")
    recent_lof = most_recent_file(clf_dir, "novelty_detection_")
    
    retrain = False
    
    for model in [recent_svc, recent_lof]:
        if not model:
            retrain = True
            break
        
        date_str = re.search(r'(\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2})', model)
        if date_str:
            recent_date = datetime.datetime.strptime(date_str.group(1), '%Y%m%d_%H%M%S')
            
            # Überprüfen, ob retrain_time seit dem letzten Training vergangen ist
            if datetime.datetime.now() - recent_date > retrain_time:
                retrain = True
                break
    
    if not retrain:
        print("No retraining required!")
        return
    
    # Daten von Binance herunterladen
    timerange = TRAIN_WINDOW_SIZE + 2 * max(feature_lookbacks)
    print("timerange", timerange)
    df = fetch_binance_data(symbol=symbol, timeframe=timeframe, timerange=timerange)
    
    # Features und Labels extrahieren
    df_with_feats = integrate_features_to_df(df)
    df_with_feats_and_labels = add_labels(df_with_feats)
    
    # Daten vorbereiten
    df_with_feats_and_labels = df_with_feats_and_labels.dropna(subset=['label'])
    
    print("training from",df_with_feats_and_labels.iloc[0].Open_time )
    print("training till",df_with_feats_and_labels.iloc[-1].Open_time )
    X = df_with_feats_and_labels[[col for col in df_with_feats_and_labels.columns if 'Feature' in col]]
    y = df_with_feats_and_labels['label']
    X = X.fillna(0)
    
    # SVC-Classifier trainieren
    svc_model = train_model_wf(X, y)
    
    # LOF-Modell trainieren
    lof_model = train_lof_model(X)
    
    # Beide Modelle speichern
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    joblib.dump(svc_model, os.path.join(clf_dir, f"signal_clf_{timestamp}.pkl"))
    joblib.dump(lof_model, os.path.join(clf_dir, f"novelty_detection_{timestamp}.pkl"))
    print(f"Models saved with timestamp {timestamp}")


def train_model_wf(X_train, y_train):
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('svc', SVC(max_iter=1000000))
    ])
    pipe.fit(X_train, y_train)
    return pipe

def train_lof_model(X_train, contamination=0.0025):
    lof = LocalOutlierFactor(novelty=True, contamination=contamination)
    lof.fit(X_train)
    return lof

# Beispielaufruf:
test_dir = "testdir"
os.makedirs(test_dir, exist_ok=True)
retrain_time = datetime.timedelta(seconds=1)
train_and_save_models(clf_dir=test_dir, symbol='ETHUSDT', timeframe='1h', retrain_time=retrain_time)


       Open time     Open     High      Low    Close      Volume  \
0  1676988000000  1669.88  1684.93  1668.00  1674.47  287928.579   
1  1676991600000  1674.47  1677.39  1656.50  1671.83  327047.798   
2  1676995200000  1671.83  1674.11  1655.00  1661.63  298489.158   
3  1676998800000  1661.62  1677.63  1661.07  1677.17  143808.598   
4  1677002400000  1677.16  1686.80  1672.35  1679.28  228363.095   

      Close time  Quote asset volume  Number of trades  \
0  1676991599999        4.828528e+08          179930.0   
1  1676995199999        5.455276e+08          205255.0   
2  1676998799999        4.967755e+08          173259.0   
3  1677002399999        2.398557e+08          103208.0   
4  1677005999999        3.835786e+08          141153.0   

   Taker buy base asset volume  Taker buy quote asset volume  Ignore  \
0                   151763.602                  2.545146e+08     0.0   
1                   156739.585                  2.614559e+08     0.0   
2                   132042

In [17]:
test_dir

'testdir'

In [22]:
df.to_csv("tst.csv")

In [12]:
def load_recent_models(clf_dir):
    """Load the most recent SVC and LOF models from the specified directory."""

    # Determine the most recent SVC and LOF models based on their filename prefixes
    recent_svc_file = most_recent_file(clf_dir, "signal_clf_")
    recent_lof_file = most_recent_file(clf_dir, "novelty_detection_")

    if not recent_svc_file or not recent_lof_file:
        print("Could not find models in the specified directory!")
        return None, None

    # Load the models using joblib
    svc_model = joblib.load(recent_svc_file)
    lof_model = joblib.load(recent_lof_file)

    return svc_model, lof_model

svc_model, lof_model = load_recent_models(test_dir)

In [16]:
lof_model.predict(X)



array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1])

In [20]:
#df_with_feats = integrate_features_to_df(df[["Close", "Volume"]])
df["Close"] = df.Close.astype("float")
df["Volume"] = df.Volume.astype(float)
df_with_feats = integrate_features_to_df(df)

0      39427.524
1      38991.601
2      49293.709
3      28408.226
4      15179.287
         ...    
145    21764.488
146    43936.229
147    25778.054
148    44918.421
149    15865.812
Name: Volume, Length: 150, dtype: float64

In [8]:
df.columns#.drop("Opentime")

Index(['Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close time',
       'Quote asset volume', 'Number of trades', 'Taker buy base asset volume',
       'Taker buy quote asset volume', 'Ignore', 'Open_time', 'Close_time'],
      dtype='object')