In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta

# Funktion zur Eingabe des Tickersymbols
def get_ticker():
    ticker = input("Bitte geben Sie das Tickersymbol der Aktie ein (Standard ist AAPL): ").strip().upper()
    if not ticker:
        ticker = 'AAPL'
    return ticker

# Funktion zur Eingabe des Datums für die Vorhersage
def get_prediction_date():
    while True:
        try:
            user_input = input("Bitte geben Sie das Datum für die Vorhersage im Format 'YYYY-MM-DD' ein: ")
            prediction_date = datetime.strptime(user_input, '%Y-%m-%d')
            return prediction_date
        except ValueError:
            print("Ungültiges Datum. Bitte geben Sie das Datum im Format 'YYYY-MM-DD' ein.")

# Funktion zur Vorhersage für ein Datum mit RandomForest-Modell
def predict_for_date(prediction_date, data, model, features):
    start_date = prediction_date - timedelta(days=10)
    end_date = prediction_date - timedelta(days=1)

    last_10_days = data.loc[start_date:end_date]
    if last_10_days.empty:
        return None, None, None, None

    last_10_days_features = last_10_days[features].tail(1)

    prediction = model.predict(last_10_days_features)
    
    if prediction_date in data.index:
        prediction_price = data.loc[prediction_date]['Adj Close']
    else:
        prediction_price = None
    
    next_day = prediction_date + timedelta(days=1)
    if next_day in data.index:
        next_day_price = data.loc[next_day]['Adj Close']
    else:
        next_day_price = None
    
    prediction_label = 'Kauf' if prediction[0] == 1 else 'Verkauf' if prediction[0] == -1 else 'Halten'
    
    return prediction_date, prediction_price, next_day_price, prediction_label

# Funktion zur Vorhersage für ein Datum mit Rohdaten-Modell
def predict_for_date_raw(prediction_date, data, model, scaler):
    start_date = prediction_date - timedelta(days=10)
    end_date = prediction_date - timedelta(days=1)

    last_10_days = data.loc[start_date:end_date]
    if last_10_days.empty:
        return None, None, None, None

    last_10_days_raw_features = last_10_days[['Open', 'High', 'Low', 'Close', 'Volume']].tail(1)
    last_10_days_raw_features = scaler.transform(last_10_days_raw_features)

    prediction = model.predict(last_10_days_raw_features)

    if prediction_date in data.index:
        prediction_price = data.loc[prediction_date]['Adj Close']
    else:
        prediction_price = None
    
    next_day = prediction_date + timedelta(days=1)
    if next_day in data.index:
        next_day_price = data.loc[next_day]['Adj Close']
    else:
        next_day_price = None
    
    prediction_label = 'Kauf' if prediction[0] == 1 else 'Verkauf' if prediction[0] == -1 else 'Halten'
    
    return prediction_date, prediction_price, next_day_price, prediction_label

# Funktion zum Training und zur Vorhersage mit Rohdaten
def train_and_predict_raw_data(data, verbose=False):
    raw_features = ['Open', 'High', 'Low', 'Close', 'Volume']

    threshold = 2
    data['Signal'] = 0
    data.loc[data['PriceChange'] > threshold, 'Signal'] = 1
    data.loc[data['PriceChange'] < -threshold, 'Signal'] = -1
    
    data.dropna(inplace=True)
    
    train_data = data.loc[train_start:train_end].copy()
    test_data = data.loc[test_start:].copy()

    X_train_raw = train_data[raw_features]
    y_train_raw = train_data['Signal']
    X_test_raw = test_data[raw_features]
    y_test_raw = test_data['Signal']

    scaler = StandardScaler()
    X_train_raw = scaler.fit_transform(X_train_raw)
    X_test_raw = scaler.transform(X_test_raw)

    model_raw = LogisticRegression(random_state=42)

    model_raw.fit(X_train_raw, y_train_raw)

    y_pred_raw = model_raw.predict(X_test_raw)

    if verbose:
        print("Ergebnisse für Rohdaten-Modell:")
    print(classification_report(y_test_raw, y_pred_raw))
    print("Accuracy:", accuracy_score(y_test_raw, y_pred_raw))

    return model_raw, scaler

# Funktion zur Durchführung der Simulation
def simulate_trading(df, initial_balance=1000, verbose=False):
    balance = initial_balance
    holdings = 0
    last_action = None
    last_action_date = None

    results = []

    for index, row in df.iterrows():
        date = row['Datum']
        price = row['Preis']
        action = row['Vorhersage_RF'] if 'Vorhersage_RF' in row else row['Vorhersage_Raw']

        if action == 'Kauf' and (last_action != 'Kauf' or last_action_date is None or (date - last_action_date).days > 1):
            if balance > 0:
                holdings = balance / price
                balance = 0
                last_action = 'Kauf'
                last_action_date = date
                if verbose:
                    print(f"{date}: Kauf für {price:.2f}. Guthaben: {balance:.2f}, Bestände: {holdings:.2f} Aktien")
        
        elif action == 'Verkauf' and (last_action != 'Verkauf' or last_action_date is None or (date - last_action_date).days > 1):
            if holdings > 0:
                balance = holdings * price
                holdings = 0
                last_action = 'Verkauf'
                last_action_date = date
                if verbose:
                    print(f"{date}: Verkauf für {price:.2f}. Guthaben: {balance:.2f}, Bestände: {holdings:.2f} Aktien")
        
        results.append({
            'Datum': date,
            'Guthaben': balance,
            'Bestände': holdings * price if holdings > 0 else 0
        })

    # Endwert berechnen
    end_value = balance + holdings * price if holdings > 0 else balance
    if verbose:
        print(f"Endwert der Simulation: {end_value:.2f} Euro")

    return balance, end_value

# Hauptprogramm
def main():
    ticker = get_ticker()
    start_date = '2010-01-01'
    end_date = '2023-08-21'
    
    data = yf.download(ticker, start=start_date, end=end_date)
    
    data['MA5'] = data['Adj Close'].rolling(window=5).mean()
    data['MA10'] = data['Adj Close'].rolling(window=10).mean()

    delta = data['Adj Close'].diff(1)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    data['RSI'] = 100 - (100 / (1 + rs))

    data['MiddleBand'] = data['Adj Close'].rolling(window=20).mean()
    data['UpperBand'] = data['MiddleBand'] + 2 * data['Adj Close'].rolling(window=20).std()
    data['LowerBand'] = data['MiddleBand'] - 2 * data['Adj Close'].rolling(window=20).std()

    data['Momentum'] = data['Adj Close'] - data['Adj Close'].shift(4)
    data['ROC'] = data['Adj Close'].pct_change(periods=10) * 100
    data['PriceChange'] = data['Adj Close'].pct_change() * 100
    data['Volatility'] = data['Adj Close'].rolling(window=10).std()

    data.dropna(inplace=True)
    
    global train_start, train_end, test_start
    train_start = '2010-01-01'
    train_end = '2020-12-31'
    test_start = '2021-01-01'

    train_data = data.loc[train_start:train_end].copy()
    test_data = data.loc[test_start:].copy()

    threshold = 2
    train_data['Signal'] = 0
    train_data.loc[train_data['PriceChange'] > threshold, 'Signal'] = 1
    train_data.loc[train_data['PriceChange'] < -threshold, 'Signal'] = -1
    test_data['Signal'] = 0
    test_data.loc[test_data['PriceChange'] > threshold, 'Signal'] = 1
    test_data.loc[test_data['PriceChange'] < -threshold, 'Signal'] = -1

    features = ['MA5', 'MA10', 'RSI', 'UpperBand', 'LowerBand', 'Momentum', 'ROC', 'Volatility']
    X_train = train_data[features]
    y_train = train_data['Signal']
    X_test = test_data[features]
    y_test = test_data['Signal']

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print("Ergebnisse für RandomForest-Modell:")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}\n")

    model_raw, scaler = train_and_predict_raw_data(data, verbose=False)

    # DataFrames für die Ergebnisse
    results_rf = []
    results_raw = []

    start_date_2022 = datetime(2022, 1, 1)
    end_date_2022 = datetime(2022, 12, 31)
    current_date = start_date_2022

    while current_date <= end_date_2022:
        rf_result = predict_for_date(current_date, data, model, features)
        raw_result = predict_for_date_raw(current_date, data, model_raw, scaler)
        
        if rf_result[0] is not None and rf_result[1] is not None:
            results_rf.append(rf_result)
        if raw_result[0] is not None and raw_result[1] is not None:
            results_raw.append(raw_result)

        current_date += timedelta(days=1)

    # Ergebnisse in DataFrames umwandeln
    df_rf = pd.DataFrame(results_rf, columns=['Datum', 'Preis', 'Nächster Tag Preis', 'Vorhersage_RF'])
    df_raw = pd.DataFrame(results_raw, columns=['Datum', 'Preis', 'Nächster Tag Preis', 'Vorhersage_Raw'])

    # Die Spalte 'Nächster Tag Preis' entfernen
    df_rf = df_rf.drop(columns=['Nächster Tag Preis'])
    df_raw = df_raw.drop(columns=['Nächster Tag Preis'])

    # Zeilen ohne Preis entfernen
    df_rf = df_rf.dropna(subset=['Preis'])
    df_raw = df_raw.dropna(subset=['Preis'])

    # DataFrames speichern
    df_rf.to_csv('results_rf_2022.csv', index=False)
    df_raw.to_csv('results_raw_2022.csv', index=False)

    print("CSV-Dateien für das Jahr 2022 wurden gespeichert.")

    # Simulation durchführen
    print("Simulation für technischer Indikator:")
    start_balance_rf, end_balance_rf = simulate_trading(df_rf, verbose=False)
    print(f"Simulation technischer Indikator: Start: 1000€, Ende: {end_balance_rf:.2f}€")
    pd.DataFrame([{'Start': 1000, 'Ende': end_balance_rf}]).to_csv('simulation_rf_2022_summary.csv', index=False)
    print("Auswertung wurde im Datei simulation_rf_2022_summary.csv aufgezeichnet")
    print("\nSimulation für Rohdaten-Indikator:")
    start_balance_raw, end_balance_raw = simulate_trading(df_raw, verbose=False)
    print(f"Simulation Rohdaten-Indikator: Start: 1000€, Ende: {end_balance_raw:.2f}€")
    pd.DataFrame([{'Start': 1000, 'Ende': end_balance_raw}]).to_csv('simulation_raw_2022_summary.csv', index=False)
    print("Auswertung wurde im Datei simulation_raw_2022_summary.csv aufgezeichnet")

    while True:
        verbose = input("Möchten Sie detaillierte Daten der letzten 10 Tage anzeigen? (ja/nein): ").strip().lower() == 'ja'
        prediction_date = get_prediction_date()
        
        pred_date, pred_price, next_day_price, pred_label = predict_for_date(prediction_date, data, model, features)
        if pred_date:
            print(f"Vorhersage für {pred_date.strftime('%Y-%m-%d')}: Preis = ${pred_price:.2f}, Nächster Tag Preis = ${next_day_price:.2f}, Vorhersage = {pred_label}")
        else:
            print(f"Keine Vorhersage möglich für {prediction_date.strftime('%Y-%m-%d')}.")

        pred_date_raw, pred_price_raw, next_day_price_raw, pred_label_raw = predict_for_date_raw(prediction_date, data, model_raw, scaler)
        if pred_date_raw:
            print(f"Rohdaten-Vorhersage für {pred_date_raw.strftime('%Y-%m-%d')}: Preis = ${pred_price_raw:.2f}, Nächster Tag Preis = ${next_day_price_raw:.2f}, Vorhersage = {pred_label_raw}")
        else:
            print(f"Keine Rohdaten-Vorhersage möglich für {prediction_date.strftime('%Y-%m-%d')}.")

        another_day = input("Möchten Sie ein weiteres Datum eingeben? (ja/nein): ").strip().lower()
        if another_day != 'ja':
            print("Programm beendet.")
            break

# Hauptprogramm starten
main()


Bitte geben Sie das Tickersymbol der Aktie ein (Standard ist AAPL):  


[*********************100%***********************]  1 of 1 completed


Ergebnisse für RandomForest-Modell:
              precision    recall  f1-score   support

          -1       0.29      0.15      0.20        74
           0       0.78      0.92      0.85       505
           1       0.41      0.13      0.20        82

    accuracy                           0.74       661
   macro avg       0.49      0.40      0.41       661
weighted avg       0.68      0.74      0.69       661

Accuracy: 0.74

              precision    recall  f1-score   support

          -1       0.55      0.62      0.58        74
           0       0.86      0.50      0.63       505
           1       0.23      0.80      0.36        82

    accuracy                           0.55       661
   macro avg       0.55      0.64      0.52       661
weighted avg       0.74      0.55      0.59       661

Accuracy: 0.5476550680786687
CSV-Dateien für das Jahr 2022 wurden gespeichert.
Simulation für technischer Indikator:
Simulation technischer Indikator: Start: 1000€, Ende: 738.12€

Simula