In [2]:
pip install -q category_encoders pandas catboost scikit-learn

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [20]:
from category_encoders.cat_boost import CatBoostEncoder
import pandas as pd
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split
import numpy as np

In [11]:
df_with_ta = pd.read_csv("stocks_with_indicators_20.csv").dropna()

In [12]:
df_with_ta.Date = pd.to_datetime(df_with_ta.Date)

In [13]:
df_with_ta.isna().sum()

Unnamed: 0,0
Date,0
Open,0
High,0
Low,0
Close,0
Volume,0
symbol,0
future_price,0
annual_return,0
SMA_50,0


In [29]:
split_date = 2023
target_col='annual_return'

df_train = df_with_ta[df_with_ta.Date.dt.year < split_date]
df_test = df_with_ta[df_with_ta.Date.dt.year >= split_date]

X_train = df_train.drop(columns=[target_col, 'Date', 'future_price'])
y_train = df_train[target_col]

X_test = df_test.drop(columns=[target_col, 'Date', 'future_price'])
y_test = df_test[target_col]

encoder = CatBoostEncoder(cols=['symbol'])

X_train_encoded = encoder.fit_transform(X_train, y_train)
X_test_encoded = encoder.transform(X_test)

X_train_encoded, X_val_encoded, y_train, y_val = train_test_split(
    X_train_encoded, y_train, test_size=0.2, random_state=42
)

In [33]:
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split

def select_features_with_catboost(df, target_col='annual_return', top_n=15):
    """Выбирает топ-N наиболее важных фичей"""
    # Подготовка данных
    X = df.drop(columns=[target_col, 'Date', 'symbol', 'future_price'])
    y = df[target_col]

    # Удаляем строки с пропусками
    X = X.dropna(axis=1)
    X = X.fillna(0)

    # Разделяем данные
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Обучаем CatBoost
    model = CatBoostRegressor(
        iterations=500,
        learning_rate=0.1,
        depth=6,
        verbose=0
    )
    model.fit(X_train, y_train, eval_set=(X_test, y_test))

    # Получаем важность фичей
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)

    return feature_importance.head(top_n)['feature'].tolist()

top_15_features = select_features_with_catboost(df_train, target_col='annual_return', top_n=15)
top_15_features

['EMA_200',
 'BBL_20_2.0',
 'Volume',
 'ADX_50',
 'BBB_20_2.0',
 'DMP_50',
 'ATRr_14',
 'SMA_50',
 'BBM_20_2.0',
 'Low',
 'BBU_20_2.0',
 'Close',
 'MACD_signal_',
 'Open',
 'DMN_50']

In [31]:
from sklearn.metrics import mean_squared_error, r2_score
def catboost_train(X_train, X_val, y_train, y_val, target_col='annual_return'):

    # Обучаем CatBoost
    model = CatBoostRegressor(
        iterations=500,
        learning_rate=0.1,
        depth=6,
        verbose=0
    )
    model.fit(X_train, y_train,
              eval_set=(X_val, y_val))

    y_train_predict = model.predict(X_train)
    y_val_predict = model.predict(X_val)

    rmse_train = np.sqrt(mean_squared_error(y_train, y_train_predict))
    rmse_val = np.sqrt(mean_squared_error(y_val, y_val_predict))

    r2_train = r2_score(y_train, y_train_predict)
    r2_val = r2_score(y_val, y_val_predict)

    print(f"MSE Train: {rmse_train}, MSE Test: {rmse_val}")
    print(f"R2 Train: {r2_train}, R2 Test: {r2_val}")

    return model

model = catboost_train(X_train_encoded , X_val_encoded, y_train, y_val,
                                           target_col='annual_return')

MSE Train: 0.1009728920563348, MSE Test: 0.11403130484477353
R2 Train: 0.9730292059270236, R2 Test: 0.9671843529742955


In [24]:
pip install -q prophet

In [None]:
a

In [32]:
import prophet
def prepare_prophet_data_with_features(df, important_features):
    """Подготавливает данные для Prophet с отобранными фичами"""
    # Базовые колонки
    prophet_data = df[['Date', 'symbol', 'annual_return']].copy()
    prophet_data = prophet_data.rename(columns={
        'Date': 'ds',
        'annual_return': 'y'
    })

    # Добавляем отобранные фичи
    for feature in important_features:
        prophet_data[feature] = df[feature]

    return prophet_data.dropna()

def train_prophet_with_features(data, important_features):
    """Обучает Prophet с отобранными фичами"""
    models = {}

    for symbol in data['symbol'].unique():
        symbol_data = data[data['symbol'] == symbol].copy()

        model = Prophet(
            yearly_seasonality=True,
            changepoint_prior_scale=0.05,
            seasonality_mode='additive'
        )

        # Добавляем регрессоры
        for feature in important_features:
            model.add_regressor(feature)

        model.fit(symbol_data)
        models[symbol] = model

    return models

In [34]:
%%time
from prophet import Prophet
prophet_data = prepare_prophet_data_with_features(df_train, top_15_features)
prophet_model = train_prophet_with_features(prophet_data, top_15_features)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpifl71tbb/d4rww23p.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpifl71tbb/few5f5z6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=14729', 'data', 'file=/tmp/tmpifl71tbb/d4rww23p.json', 'init=/tmp/tmpifl71tbb/few5f5z6.json', 'output', 'file=/tmp/tmpifl71tbb/prophet_modelyrht7guy/prophet_model-20250415145515.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:55:15 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:55:16 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpif

CPU times: user 6 s, sys: 269 ms, total: 6.26 s
Wall time: 29.1 s


In [35]:
prophet_model

{'AAPL': <prophet.forecaster.Prophet at 0x7a8df9002410>,
 'AMZN': <prophet.forecaster.Prophet at 0x7a8df9578e10>,
 'AVGO': <prophet.forecaster.Prophet at 0x7a8e21cb2010>,
 'BRK-B': <prophet.forecaster.Prophet at 0x7a8df9582490>,
 'COST': <prophet.forecaster.Prophet at 0x7a8e1f70a410>,
 'GOOG': <prophet.forecaster.Prophet at 0x7a8df90ce5d0>,
 'GOOGL': <prophet.forecaster.Prophet at 0x7a8e1f741150>,
 'JPM': <prophet.forecaster.Prophet at 0x7a8e1f681390>,
 'LLY': <prophet.forecaster.Prophet at 0x7a8df953aa50>,
 'MA': <prophet.forecaster.Prophet at 0x7a8df89b9510>,
 'META': <prophet.forecaster.Prophet at 0x7a8e21a1dd50>,
 'MSFT': <prophet.forecaster.Prophet at 0x7a8dfa5377d0>,
 'NFLX': <prophet.forecaster.Prophet at 0x7a8e21d25210>,
 'NVDA': <prophet.forecaster.Prophet at 0x7a8e1f808e50>,
 'PG': <prophet.forecaster.Prophet at 0x7a8df88851d0>,
 'TSLA': <prophet.forecaster.Prophet at 0x7a8df8533f50>,
 'UNH': <prophet.forecaster.Prophet at 0x7a8df8827710>,
 'V': <prophet.forecaster.Prophet at

In [37]:
pip install -q pyportfolioopt

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/62.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/220.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.1/220.1 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25h