In [29]:
# !pip install feedparser

## Notebooks for data loading

In [1]:

import requests
import pandas as pd
import time
from datetime import datetime, timedelta

import concurrent.futures




Source: **coingecko** + **Binance**

In [3]:
#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
#--- 1. Get top crypto list from coingecko
#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
def get_top_cryptos(count=1000):
    url = "https://api.coingecko.com/api/v3/coins/markets"
    per_page_limit = 250  

    all_coins = []
    page = 1
    coins_needed = count

    while coins_needed > 0:
        
        batch_size = min(coins_needed, per_page_limit)
        
        params = {
            'vs_currency': 'usd',
            'order': 'market_cap_desc',
            'per_page': batch_size,
            'page': page,
            'sparkline': False
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            if not data:
          
                break
        
            for coin in data:
                pair = coin['symbol'].upper() + 'USDT'
                all_coins.append(pair)
         
            coins_needed -= batch_size
            page += 1
        else:
            print(f"Ошибка получения данных: {response.status_code}")
            break
    
    return all_coins

#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# --- 2. Hourly data from Binance API ---
#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
def get_binance_historical_data(symbol, interval, start_time, end_time):
   
    url = "https://api.binance.com/api/v3/klines"
    params = {
        "symbol": symbol,
        "interval": interval,
        "startTime": start_time,
        "endTime": end_time,
        "limit": 1000  
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        if not data:  #
            print(f"Нет данных для {symbol} в указанный период.")
            return None
        columns = ['Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 
                   'Close time', 'Quote asset volume', 'Number of trades', 
                   'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore']
        df = pd.DataFrame(data, columns=columns)
        df['Open time'] = pd.to_datetime(df['Open time'], unit='ms')
        return df
    else:
        print(f"Ошибка для {symbol}: {response.status_code}")
        return None

#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# --- 3. Сбор данных за указанный период ---
#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
def get_full_historical_data(symbol, interval, start_time, end_time):
    """
    Собирает данные за указанный период.
    """
    all_data = []
    while start_time < end_time:
        data = get_binance_historical_data(symbol, interval, start_time, end_time)
        if data is None or len(data) == 0:
            break
        all_data.append(data)
        start_time = data['Close time'].iloc[-1] + 1  
        time.sleep(0.1)  # не превышаем лимит запросов
    
    if all_data:
        return pd.concat(all_data, ignore_index=True)
    else:
        print(f"⚠️ Нет данных для {symbol} за указанный период.")
        return None

#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# --- 4. Finala pipeline ---
#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
def collect_data_for_top(start_date, end_date):
    top_cryptos = get_top_cryptos()
    all_data = []

    # преобразование дат
    start_time = int(datetime.strptime(start_date, '%Y-%m-%d').timestamp()) * 1000
    end_time = int(datetime.strptime(end_date, '%Y-%m-%d').timestamp()) * 1000

    for symbol in top_cryptos:
        print(f"Загружаем данные для {symbol}...")
        df = get_full_historical_data(symbol, '1h', start_time, end_time)
        if df is not None and not df.empty:
            df['symbol'] = symbol  
            all_data.append(df)
        
        time.sleep(1)  

    if all_data:
        final_df = pd.concat(all_data)
        final_df.to_csv("cryptos_hourly_data.csv", index=False)
        print("Данные сохранены в top_1000_cryptos_hourly_data.csv")
        return final_df
    else:
        print("Не удалось собрать данные.")
        return None

#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# - Data collection
#––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
if __name__ == "__main__":
    
    start_date = '2024-01-01'
    end_date = '2025-09-17'

    print("Начинаем сбор данных...")
    df = collect_data_for_top(start_date, end_date)
    print("Сбор данных завершён!")

Начинаем сбор данных...


In [13]:
# cryp_df = pd.read_csv('top_1000_cryptos_hourly_data.csv')

In [16]:
# cryp_df = pd.read_csv('top_100_cryptos_hourly_data.csv')