# Importing Modules

In [None]:
import requests
import datetime
import numpy as np
import pandas as pd
import tti.indicators as ti  # Импорт всех индикаторов
import inspect
import warnings
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.metrics import make_scorer, root_mean_squared_error, mean_absolute_percentage_error, roc_auc_score
from scipy.stats import randint, uniform
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor
import tensorflow as tf
from sklearn.tree import DecisionTreeRegressor
from tensorflow.keras.optimizers import AdamW # type: ignore
from sklearn.model_selection import TimeSeriesSplit

<hr>

# Data

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()  # Загружаем переменные из .env файла
api_key = os.getenv("API_KEY")


url = 'https://api.coingecko.com/api/v3/coins/bitcoin/market_chart'

headers = {"accept": "application/json",
    "x-cg-api-key": api_key}

# Параметры запроса
params = {
    'vs_currency': 'usd',  # Валюта для отображения цены (например, USD)
    'days': '365',         # Данные за последний год
    'interval': 'daily'    # Получение данных на ежедневной основе
}
response = requests.get(url, params=params, headers=headers)
data = response.json()


In [None]:
# Define the parameters
coin_id = 'bitcoin'
vs_currency = 'usd'
days = '365'

# Make the API request
url_ohlc = f'https://api.coingecko.com/api/v3/coins/{coin_id}/ohlc?vs_currency={vs_currency}&days={days}'
response = requests.get(url_ohlc, headers=headers)
ohlc_data = response.json()
ohlc_data[:5]

[[1697932800000, 29677.0, 30253.0, 29484.0, 29920.0],
 [1698278400000, 29920.0, 35066.0, 29741.0, 34472.0],
 [1698624000000, 34498.0, 34819.0, 33450.0, 34556.0],
 [1698969600000, 34525.0, 35878.0, 34108.0, 34924.0],
 [1699315200000, 34937.0, 35366.0, 34123.0, 35031.0]]

In [None]:
columns = ['timestamp', 'open', 'high', 'low', 'close']

df_ohlc = pd.DataFrame(ohlc_data, columns=columns)
df_ohlc.timestamp = pd.to_datetime(df_ohlc.timestamp, unit='ms')
df_ohlc.head()

Unnamed: 0,timestamp,open,high,low,close
0,2023-10-22,29677.0,30253.0,29484.0,29920.0
1,2023-10-26,29920.0,35066.0,29741.0,34472.0
2,2023-10-30,34498.0,34819.0,33450.0,34556.0
3,2023-11-03,34525.0,35878.0,34108.0,34924.0
4,2023-11-07,34937.0,35366.0,34123.0,35031.0


In [None]:
# Создаем список для хранения отформатированных данных
formatted_data = []

# Извлекаем данные
for i in range(len(data["prices"])):
    # Извлекаем timestamp (одинаковый для всех ключей)
    timestamp = data["prices"][i][0]
    
    # Переводим метку времени из миллисекунд в секунды
    timestamp_in_seconds = timestamp / 1000
    
    # Преобразуем timestamp в объект даты
    date = datetime.datetime.fromtimestamp(timestamp_in_seconds).strftime('%Y-%m-%d')
    
    # Извлекаем уникальные значения для каждого ключа
    price = data["prices"][i][1]
    market_cap = data["market_caps"][i][1]
    total_volumes = data["total_volumes"][i][1]
    
    # Добавляем отформатированные данные в список
    formatted_data.append([date, price, market_cap, total_volumes])

# Создаем DataFrame с колонками "timestamp", "price", "market_cap", "total_volumes"
df = pd.DataFrame(formatted_data, columns=['timestamp', 'prices', 'market_caps', 'total_volumes'])
df.timestamp = pd.to_datetime(df.timestamp)
df = df.iloc[:, [0, -1]]
# Выводим DataFrame
df.head()

Unnamed: 0,timestamp,total_volumes
0,2023-10-22,7452489000.0
1,2023-10-23,11330300000.0
2,2023-10-24,35558860000.0
3,2023-10-25,46464710000.0
4,2023-10-26,23840460000.0


In [None]:
df_final = pd.merge(df_ohlc, df[:365], on='timestamp', how='inner')
df_final = df_final.rename(columns={'timestamp':'date', 'total_volumes':'volume'}).set_index('date').sort_index(ascending=False)
df_final_copy = df_final.copy()
df_final_copy.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-10-20,67103.0,68970.0,66739.0,68389.0,14110890000.0
2024-10-16,62465.0,67803.0,62060.0,66962.0,51797930000.0
2024-10-12,62211.0,63362.0,58935.0,62392.0,32008660000.0
2024-10-08,60749.0,64500.0,60470.0,62287.0,33878880000.0
2024-10-04,65603.0,65603.0,59954.0,60728.0,37711140000.0


<hr>

# Feature engineering

## To-do
- try log 
- try data sampling 
- try forecast target

In [None]:
warnings.simplefilter(action='ignore', category=FutureWarning)

# Создадим новый DataFrame, который будет содержать ваши исходные данные OHLSW
combined_df = df_final.copy()

# Получаем все классы из tti.indicators, которые являются индикаторами
indicator_classes = [cls for _, cls in inspect.getmembers(ti, inspect.isclass)]

# Проходим по каждому индикатору и добавляем его данные в основной DataFrame
for indicator_class in indicator_classes:
    try:
        # Инициализируем индикатор с вашим OHLSW DataFrame
        indicator = indicator_class(input_data=df_final)
        
        # Получаем рассчитанные данные индикатора
        indicator_data = indicator.getTiData()

        # Присоединяем данные индикатора к основному DataFrame
        # Примечание: добавляем как новые колонки (проверяем пересечение по индексам)
        combined_df = combined_df.join(indicator_data, how='left')
        
        #print(f"Добавлен индикатор: {indicator_class.__name__}")
    except Exception as e:
        pass
        #print(f"Ошибка при вычислении {indicator_class.__name__}: {e}")

# Выводим объединённый DataFrame
combined_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,adl,middle_band,upper_band,lower_band,cmf,...,tp,uosc,vhf,vch,vosc,vrc,wc,ws,wad,wr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-10-16,62465.0,67803.0,62060.0,66962.0,51797930000.0,361902942122,61053.7,66929.4047,55177.9953,0.186,...,65608.3333,59.0712,0.4556,-11.2464,8234233000.0,98.5535,65946.75,62896.2072,4902.0,-9.4835
2024-10-12,62211.0,63362.0,58935.0,62392.0,32008660000.0,325275499076,61108.35,67218.5053,54998.1947,-0.112,...,61563.0,55.9873,0.5272,-19.7979,4416763000.0,147.3988,61770.25,61879.759,3457.0,-53.9312
2024-10-08,60749.0,64500.0,60470.0,62287.0,33878880000.0,307293674868,61285.85,67731.8764,54839.8236,-0.1993,...,62419.0,57.4873,0.3999,-24.7038,11082120000.0,-0.9134,62386.0,61751.6988,1817.0,-57.4512
2024-10-04,65603.0,65603.0,59954.0,60728.0,37711140000.0,310622715799,61506.0,68361.5014,54650.4986,-0.0346,...,62095.0,60.8172,0.4822,-20.0747,554574500.0,18.1436,61753.25,61617.8734,-4936.0,-64.3131
2024-09-30,63146.0,66439.0,62812.0,65664.0,12948710000.0,337999840733,61711.35,68705.9634,54716.7366,0.4622,...,64971.6667,65.5435,0.5498,-23.6821,-4098893000.0,-63.6838,65144.75,61840.3418,2852.0,-7.1323


In [None]:
combined_df['price_change'] = combined_df['close'].pct_change()
combined_df.sort_index(ascending=True)
combined_df['close_target'] = combined_df['close'].shift(-1)
combined_df = combined_df[:-1]
combined_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,adl,middle_band,upper_band,lower_band,cmf,...,vhf,vch,vosc,vrc,wc,ws,wad,wr,price_change,close_target
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-10-16,62465.0,67803.0,62060.0,66962.0,51797930000.0,361902942122,61053.7,66929.4047,55177.9953,0.186,...,0.4556,-11.2464,8234233000.0,98.5535,65946.75,62896.2072,4902.0,-9.4835,,62392.0
2024-10-12,62211.0,63362.0,58935.0,62392.0,32008660000.0,325275499076,61108.35,67218.5053,54998.1947,-0.112,...,0.5272,-19.7979,4416763000.0,147.3988,61770.25,61879.759,3457.0,-53.9312,-0.068248,62287.0
2024-10-08,60749.0,64500.0,60470.0,62287.0,33878880000.0,307293674868,61285.85,67731.8764,54839.8236,-0.1993,...,0.3999,-24.7038,11082120000.0,-0.9134,62386.0,61751.6988,1817.0,-57.4512,-0.001683,60728.0
2024-10-04,65603.0,65603.0,59954.0,60728.0,37711140000.0,310622715799,61506.0,68361.5014,54650.4986,-0.0346,...,0.4822,-20.0747,554574500.0,18.1436,61753.25,61617.8734,-4936.0,-64.3131,-0.025029,65664.0
2024-09-30,63146.0,66439.0,62812.0,65664.0,12948710000.0,337999840733,61711.35,68705.9634,54716.7366,0.4622,...,0.5498,-23.6821,-4098893000.0,-63.6838,65144.75,61840.3418,2852.0,-7.1323,0.08128,63152.0
