In [None]:
import ccxt
import pandas as pd
import os
import logging
from datetime import datetime, timedelta

# Config
COINS = ['BTC/USDT']
TIMEFRAME = '1d'
START_DATE = '2020-12-01T00:00:00Z'
END_DATE = '2022-03-31T23:59:59Z'
SAVE_PATH = r'C:\Users\Kasper Hassing\Desktop\Speciale_KryptoSentiment\data\market_data'

# Setup
exchange = ccxt.binance()
logging.basicConfig(level=logging.INFO)
os.makedirs(SAVE_PATH, exist_ok=True)

def fetch_data():
    start_timestamp = exchange.parse8601(START_DATE)
    end_timestamp = exchange.parse8601(END_DATE)
    extended_end_timestamp = end_timestamp + (24 * 60 * 60 * 1000)  
    
    for coin in COINS:
        all_data = []
        current_timestamp = start_timestamp

        while current_timestamp <= end_timestamp:
            try:
                logging.info(f"Henter data for {coin} fra {datetime.utcfromtimestamp(current_timestamp / 1000)}...")
                ohlcv = exchange.fetch_ohlcv(coin, timeframe=TIMEFRAME, since=current_timestamp, limit=1000)
                
                if not ohlcv:
                    logging.warning(f"Ingen data for {coin} ved {datetime.utcfromtimestamp(current_timestamp / 1000)}")
                    break
                
                df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
                df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
                all_data.append(df)

                # Opdater tidsstempel til næste batch og tjek om det overskrider den udvidede slutdato
                next_timestamp = int(df['timestamp'].iloc[-1].timestamp() * 1000) + 1
                if next_timestamp > extended_end_timestamp:
                    break
                current_timestamp = next_timestamp

            except Exception as e:
                logging.error(f"Fejl ved {coin}: {str(e)}")
                break
        
        # Saml alle dataframes og gem som CSV
        if all_data:
            final_df = pd.concat(all_data, ignore_index=True)
            
            # Filtrer eventuelle data efter den oprindelige slutdato
            final_df = final_df[final_df['timestamp'] <= pd.to_datetime(end_timestamp, unit='ms')]
            
            final_df[['open', 'high', 'low', 'close']] = final_df[['open', 'high', 'low', 'close']].round(2)
            final_df['volume'] = final_df['volume'].round(0)

            # Log første og sidste dato for at bekræfte dækning
            logging.info(f"Første dato i datasæt: {final_df['timestamp'].min()}")
            logging.info(f"Sidste dato i datasæt: {final_df['timestamp'].max()}")

            filename = coin.replace('/', '') + '_ohlcv.csv'
            filepath = os.path.join(SAVE_PATH, filename)
            final_df.to_csv(filepath, index=False)
            logging.info(f"Data gemt til {filepath}")
        else:
            logging.warning(f"Ingen data at gemme for {coin}")

if __name__ == "__main__":
    fetch_data()
    logging.info("Process completed")

INFO:root:Henter data for BTC/USDT fra 2020-12-01 00:00:00...
INFO:root:Første dato i datasæt: 2020-12-01 00:00:00
INFO:root:Sidste dato i datasæt: 2022-03-31 00:00:00
INFO:root:Data gemt til C:\Users\Kasper Hassing\Desktop\Speciale_KryptoSentiment\data\market_data\BTCUSDT_ohlcv.csv
INFO:root:Process completed
