In [17]:

import pandas as pd

import ccxt
import os
import time
interval_mapping = {
    '1M': {'name': '1M', 'dataOffset': pd.Timedelta(days=31)},
    '2w': {'name': '1w', 'dataOffset': pd.Timedelta(days=14)},
    '1w': {'name': '1w', 'dataOffset': pd.Timedelta(days=7)},
    '3d': {'name': '3d', 'dataOffset': pd.Timedelta(days=3)},
    '2d': {'name': '2d', 'dataOffset': pd.Timedelta(days=2)},
    '1d': {'name': '1d', 'dataOffset': pd.Timedelta(days=1)},
    '12h': {'name': '12h', 'dataOffset': pd.Timedelta(hours=12)},
    '8h': {'name': '8h', 'dataOffset': pd.Timedelta(hours=8)},
    '7h': {'name': '7h', 'dataOffset': pd.Timedelta(hours=7)},
    '6h': {'name': '6h', 'dataOffset': pd.Timedelta(hours=6)},
    '4h': {'name': '4h', 'dataOffset': pd.Timedelta(hours=4)},
    '2h': {'name': '2h', 'dataOffset': pd.Timedelta(hours=2)},
    '1h': {'name': '1h', 'dataOffset': pd.Timedelta(hours=1)},
    '30m': {'name': '30m', 'dataOffset': pd.Timedelta(minutes=30)},
    '15m': {'name': '15m', 'dataOffset': pd.Timedelta(minutes=15)},
    '10m': {'name': '10m', 'dataOffset': pd.Timedelta(minutes=10)},
    '5m': {'name': '5m', 'dataOffset': pd.Timedelta(minutes=5)},
    '3m': {'name': '3m', 'dataOffset': pd.Timedelta(minutes=3)},
    '1m': {'name': '1m', 'dataOffset': pd.Timedelta(minutes=1)},
}

def download_newest_data(symbol, timeframe, since='2000-01-01 00:00:00', end='2030-02-01 00:00:00', batch=3, dataOffset=pd.Timedelta(days=1)):
    # Ensure since and end are pd.Timestamp objects
    since = pd.to_datetime(since)
    end = pd.to_datetime(end)
    
    # Utwórz obiekt dostawcy danych
    exchange = ccxt.binance()
    next_since = None
    # Utwórz pusty DataFrame do przechowywania danych
    data_frames = []
    trials=0
    # Utwórz pętlę do wczytywania danych partiami
    while True:
        try:
        # Pobierz dane OHLCV
            if next_since:
                since = next_since
            since = exchange.parse8601(since.isoformat())
            data = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=batch)
            df = pd.DataFrame(data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
            data_frames.append(df)
            trrials=0
            # Sprawdź, czy są jeszcze dane
            if len(data) == batch:
                next_since = df.iloc[-1]['timestamp'] + dataOffset
                if 'M' in timeframe:
                    next_since = next_since.replace(day=1)
                # Ustaw dzień na 1, aby uzyskać pierwszy dzień następnego miesiąca
                next_since = pd.to_datetime(next_since, unit='ms')
                if next_since >= end:
                    break
            else:
                break
        except exchange.exceptions.RequestException as e:
            print(f"⚠️ Błąd podczas pobierania danych: {e}")
            if trials<3:
                print("⏳ Czekam 2 sekundy przed ponowną próbą...")
                trials+=1
                time.sleep(2)
                continue
            else:
                print("❌ Nie udało się pobrać danych po kilku próbach.")
                return None
        except ccxt.NetworkError as e:
            print("Błąd sieci:", e)
        except ccxt.BaseError as e:
            print("Inny błąd CCXT:", e)
    # Połącz wszystkie ramki danych w jedną
    if data_frames:
        result_df = pd.concat(data_frames, ignore_index=True)
        result_df = result_df[result_df['timestamp'] < end]  # Zapisz dane mniejsze od daty end, ale nie równe
        return result_df
    else:
        return None
    
    

    
def upload_data( folder,ticker, interval, start_date, end_date, add_indicators=False):

    symbol=ticker.replace('/', '.')
    # Utwórz ścieżkę do pliku
    symbol_directory=os.path.join(folder, symbol)
    if not os.path.exists(symbol_directory):
    # Jeśli nie istnieje, utwórz go
        os.makedirs(symbol_directory)
    file_path = os.path.join(symbol_directory, interval+'.csv')
    
    combined_data = download_newest_data(ticker, interval, start_date, end_date, batch=1000, dataOffset =interval_mapping[interval]['dataOffset'])
    combined_data.to_csv(file_path, index=False)
    return combined_data
    
def save_resampled_data(folder, df, ticker, interval):
    
    symbol=ticker.replace('/', '.')
    symbol_directory=os.path.join(folder, symbol)
    file_path = os.path.join(symbol_directory, interval+'.csv')
    df.to_csv(file_path, index=False)
    return df

import os
import pandas as pd

def load_data(ticker, interval, folder):
    # Replace '/' with '.' in the ticker symbol
    symbol = ticker.replace('/', '.')
    
    # Create the directory path
    symbol_directory = os.path.join(folder, symbol)
    
    # Create the directory if it doesn't exist
    if not os.path.exists(symbol_directory):
        os.makedirs(symbol_directory)
    
    # Create the file path
    file_path = os.path.join(symbol_directory, interval + '.csv')
    
    # Load the data from the CSV file if it exists
    if os.path.exists(file_path):
        df = pd.read_csv(file_path, parse_dates=['timestamp'])
        return df
    else:
        print(f"File {file_path} does not exist.")
        return None

# Example usage
# Assuming you have a folder named 'data' and you want to load data for 'BTC/USD' with '1h' interval



def resample_data(df, interval):
    df['timestamp'] = pd.to_datetime(df['timestamp'])

# Ustawienie kolumny 'timestamp' jako indeks

    new_df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
    if interval.endswith('m'):
        interval = interval[:-1] + 't'
    new_df.set_index('timestamp', inplace=True)
    resampled_df = new_df.resample(interval).agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    }).dropna()
    resampled_df.reset_index(inplace=True)
    return resampled_df


def load_row_data_and_save(folder, symbol, intervals, start_date, end_date):
    basic_data = upload_data(folder,symbol,intervals[-1], start_date, end_date, False)
    save_resampled_data(folder, basic_data, symbol, intervals[-1])
    return basic_data


def resame_data_and_save(basic_data,folder, symbol, intervals):   
        for interval in intervals:
            print(basic_data)
            data_resampled=resample_data(basic_data,interval)
            save_resampled_data(folder,data_resampled, symbol, interval)


def save_row_data_1m_1d(ticker):
    basic_data=load_row_data_and_save('row_data', ticker, ['1m'], '2006-01-01 00:00:00', '2025-03-01 00:00:00')
    resame_data_and_save(basic_data,'row_data', ticker, ['5m', '15m', '30m', '1h', '2h', '4h', '6h', '12h' ,'1d'])

def save_row_data_5m_1d(ticker):
    basic_data=load_row_data_and_save('row_data', ticker, ['5m'], '2006-01-01 00:00:00', '2025-03-01 00:00:00')
    resame_data_and_save(basic_data,'row_data', ticker, [ '15m', '30m', '1h', '2h', '4h', '6h', '12h' ,'1d'])
def save_row_data_15m_1d(ticker):
    basic_data=load_row_data_and_save('row_data', ticker, ['15m'], '2006-01-01 00:00:00', '2025-03-01 00:00:00')
    resame_data_and_save(basic_data,'row_data', ticker, [ '30m', '1h', '2h', '4h', '6h', '12h' ,'1d'])


In [18]:
def resample_daily_to_weekly(df, interval, reference_date=None):
    # Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Set the index to timestamp
    df.set_index('timestamp', inplace=True)
    first_entry = df.iloc[0]
    closest_diff=-1
    # Resample data to weekly, starting from Monday, November 11, 2024
    if interval=='1w':
        
        df_resampled = df.resample('W-MON', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()

    elif interval == '2w':
        df_resampled = df.resample('2W-MON', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()
                
        reference_date = reference_date

# Filtracja danych do zakresu +/- 2 dni od reference_date
        closest_row = df_resampled[(df_resampled.index >= reference_date - pd.Timedelta(days=12)) & 
                        (df_resampled.index <= reference_date + pd.Timedelta(days=12))]

        # Jeśli znaleziono dopasowanie, oblicz różnicę w dniach
        if not closest_row.empty:
            closest_match = closest_row.index[abs((closest_row.index - reference_date).days).argmin()]
            closest_diff = abs((closest_match - reference_date).days)
        if closest_diff>0:
            df_new= df.copy()
            for i in range(1, closest_diff+1):
                new_index =  df_new.index[0]- pd.Timedelta(days=1)
                new_entry = pd.DataFrame({
                    'open': [first_entry.open],
                    'high': [first_entry.open],
                    'low': [first_entry.open],
                    'close': [first_entry.open],
                    'volume': [0]
                }, index=[new_index])
        
                df_new = pd.concat([new_entry, df_new]).sort_index()
            df_resampled = df_new.resample('2W-MON', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
            }).dropna()



    elif interval == '1M':
        df_resampled = df.resample('MS', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()
        
    elif interval == '2M':
        df_resampled = df[df.index.month % 2 == 0].resample('MS').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()




    elif interval == '2d':

        df_resampled = df.resample('2D', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()
                
        reference_date = reference_date

# Filtracja danych do zakresu +/- 2 dni od reference_date
        closest_row = df_resampled[(df_resampled.index >= reference_date - pd.Timedelta(days=1)) & 
                        (df_resampled.index <= reference_date + pd.Timedelta(days=1))]

        # Jeśli znaleziono dopasowanie, oblicz różnicę w dniach
        if not closest_row.empty:
            closest_match = closest_row.index[abs((closest_row.index - reference_date).days).argmin()]
            closest_diff = abs((closest_match - reference_date).days)
        if closest_diff>0:
            df_new= df.copy()
            for i in range(1, closest_diff+1):
                new_index =  df_new.index[0]- pd.Timedelta(days=1)
                new_entry = pd.DataFrame({
                    'open': [first_entry.open],
                    'high': [first_entry.open],
                    'low': [first_entry.open],
                    'close': [first_entry.open],
                    'volume': [0]
                }, index=[new_index])
        
                df_new = pd.concat([new_entry, df_new]).sort_index()
            df_resampled = df_new.resample('2D', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
            }).dropna()
    


    elif interval == '3d':
        df_resampled = df.resample('3D', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()
                
        reference_date = reference_date

# Filtracja danych do zakresu +/- 2 dni od reference_date
        closest_row = df_resampled[(df_resampled.index >= reference_date - pd.Timedelta(days=2)) & 
                        (df_resampled.index <= reference_date + pd.Timedelta(days=2))]

        # Jeśli znaleziono dopasowanie, oblicz różnicę w dniach
        if not closest_row.empty:
            closest_match = closest_row.index[abs((closest_row.index - reference_date).days).argmin()]
            closest_diff = abs((closest_match - reference_date).days)
        if closest_diff>0:
            df_new= df.copy()
            for i in range(0, closest_diff):
                new_index =  df_new.index[0]- pd.Timedelta(days=1)
                new_entry = pd.DataFrame({
                    'open': [first_entry.open],
                    'high': [first_entry.open],
                    'low': [first_entry.open],
                    'close': [first_entry.open],
                    'volume': [0]
                }, index=[new_index])
        
                df_new = pd.concat([new_entry, df_new]).sort_index()
            df_resampled = df_new.resample('3D', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
            }).dropna()

    elif interval == '3M':
        df_resampled = df.resample('QS', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()
        reference_date = reference_date

# Filtracja danych do zakresu +/- 2 dni od reference_date
        closest_row = df_resampled[(df_resampled.index >= reference_date - pd.Timedelta(days=60)) & 
                        (df_resampled.index <= reference_date + pd.Timedelta(days=60))]

        # Jeśli znaleziono dopasowanie, oblicz różnicę w dniach
        if not closest_row.empty:
            closest_match = closest_row.index[abs((closest_row.index - reference_date).days).argmin()]
            closest_diff = abs((closest_match - reference_date).days)
        if closest_diff>0:
            df_new= df.copy()
            for i in range(1, closest_diff+1):
                new_index =  df_new.index[0]- pd.Timedelta(days=1)
                new_entry = pd.DataFrame({
                    'open': [first_entry.open],
                    'high': [first_entry.open],
                    'low': [first_entry.open],
                    'close': [first_entry.open],
                    'volume': [0]
                }, index=[new_index])
        
                df_new = pd.concat([new_entry, df_new]).sort_index()
            df_resampled = df_new.resample('Q', label='left', closed='left').agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
            }).dropna()
            print("BBB")
            print(df_resampled)

    
    # Reset index to get timestamp as a column
    df_resampled.reset_index(inplace=True)


    return df_resampled

# Load your data (Assuming CSV format, adjust if JSON)


def resample_data_and_save(folder, symbol, interval, reference_data):
    basic_data = load_data(symbol, '1d', folder)
    resampled_data = resample_daily_to_weekly(basic_data, interval, reference_date=reference_data)

    save_resampled_data(folder, resampled_data, symbol, interval)

    


In [19]:
folder = 'row_data'
symbol="BTCUSDT"
basic_data = load_data(symbol, '1m', folder)
basic_data.tail()

Unnamed: 0,timestamp,open,high,low,close,volume
86,2024-10-01,63327.6,73620.12,58946.0,70292.01,756010.9
87,2024-11-01,70292.01,99588.01,66835.0,96407.99,1343559.0
88,2024-12-01,96407.99,108353.0,90500.0,93576.0,1019450.0
89,2025-01-01,93576.0,109588.0,89256.69,102429.56,864534.7
90,2025-02-01,102429.56,102783.71,78258.52,84349.94,810850.2


In [20]:
folder = 'row_data'
tickers=['BTCUSDT', 'ETHUSDT', 'DOGEUSDT', 'LINKUSDT', 'SUIUSDT', 'ZROUSDT', 'ADAUSDT', 'TRXUSDT', 'WLDUSDT' , 'PEPEUSDT', 'DYMUSDT']
tickers=['WLDUSDT'  ,'PEPEUSDT', 'DYMUSDT']
tickers=['BTCUSDT']
for ticker in tickers:
    save_row_data_1m_1d(ticker)
    resample_data_and_save(folder, ticker, "2d", reference_data=pd.Timestamp("2025-01-31"))
    resample_data_and_save(folder, ticker, "3d", reference_data=pd.Timestamp("2025-01-31"))
    resample_data_and_save(folder, ticker, "1w", reference_data=pd.Timestamp("2024-12-30"))
    resample_data_and_save(folder, ticker, "2w", reference_data=pd.Timestamp("2024-12-30"))
    resample_data_and_save(folder, ticker, "1M", reference_data=pd.Timestamp("2024-12-01"))
    resample_data_and_save(folder, ticker, "3M", reference_data=pd.Timestamp("2024-10-01"))


                  timestamp      open      high       low     close     volume
0       2017-08-17 04:00:00   4261.48   4261.48   4261.48   4261.48   1.775183
1       2017-08-17 04:01:00   4261.48   4261.48   4261.48   4261.48   0.000000
2       2017-08-17 04:02:00   4280.56   4280.56   4280.56   4280.56   0.261074
3       2017-08-17 04:03:00   4261.48   4261.48   4261.48   4261.48   0.012008
4       2017-08-17 04:04:00   4261.48   4261.48   4261.48   4261.48   0.140796
...                     ...       ...       ...       ...       ...        ...
3955443 2025-02-28 23:55:00  84488.61  84488.62  84439.66  84439.67   5.438710
3955444 2025-02-28 23:56:00  84439.66  84439.66  84428.00  84433.11   3.063730
3955445 2025-02-28 23:57:00  84433.11  84433.12  84394.94  84394.95   5.789020
3955446 2025-02-28 23:58:00  84394.94  84414.55  84394.94  84414.55   2.353340
3955447 2025-02-28 23:59:00  84414.55  84414.55  84349.94  84349.94  10.462880

[3955448 rows x 6 columns]
                  timest

In [None]:
folder = 'row_data'
tickers=[ 'ETHUSDT', 'DOGEUSDT', 'LINKUSDT', 'SUIUSDT', 'ZROUSDT', 'ADAUSDT', 'TRXUSDT', 'WLDUSDT' , 'PEPEUSDT', 'DYMUSDT'] # 'PNUTUSDT', 'AUCTIONUSDT', 'API3USDT', 'EIGENUSDT', 'COOKIEUSDT', 'AIXBTUSDT', 'OMUSDT', 'LTCUSDT', 'TONUSDT', 'AVAXUSDT', 'BERAUSDT', 'TRUMPUSDT', 'ENAUSDT', 'KASUSDT', 'RENDERUSDT', 'BONKUSDT', 'INJUSDT', 'JUPUSDT', 'TIAUSDT', 'VIRTUALUSDT',
tickers=[  'SOLUSDT', 'ARBUSDT', 'JUPUSDT', 'TAOUSDT', 'PYTHUSDT']


import requests
import pandas as pd

# Endpoint API Binance dla symboli giełdowych
url = "https://api.binance.com/api/v3/exchangeInfo"

# Wysłanie żądania
response = requests.get(url)
data = response.json()

# Wyciągnięcie listy symboli
symbols = [s['symbol'] for s in data['symbols']]

# (Opcjonalnie) zapis do DataFrame
df_symbols = pd.DataFrame(symbols, columns=['Ticker'])

df_usdt = df_symbols[df_symbols['Ticker'].str.endswith('USDT')]

tickers = df_usdt['Ticker'].tolist()


dane_gieldowe = pd.read_excel('Market_data.xlsx')
tickers_do_usuniecia = dane_gieldowe['Plik'].dropna().tolist()

# Usuń te tickery z listy
tickers = [t for t in tickers if t not in tickers_do_usuniecia]

# Wynik

for ticker in tickers:
    save_row_data_15m_1d(ticker)
    print(ticker)
    resample_data_and_save(folder, ticker, "2d", reference_data=pd.Timestamp("2025-02-20")) # Niektóre dane nie resamplują się, ponieważ zostały zdelistowane i brakuje okolicznych referende data
    resample_data_and_save(folder, ticker, "3d", reference_data=pd.Timestamp("2025-02-27"))
    resample_data_and_save(folder, ticker, "1w", reference_data=pd.Timestamp("2025-02-24"))
    resample_data_and_save(folder, ticker, "2w", reference_data=pd.Timestamp("2025-02-17"))
    resample_data_and_save(folder, ticker, "1M", reference_data=pd.Timestamp("2025-02-01"))
    resample_data_and_save(folder, ticker, "3M", reference_data=pd.Timestamp("2025-01-01"))

                timestamp    open     high     low   close      volume
0     2017-11-11 08:15:00  802.00   802.00  802.00  802.00     1.00000
1     2017-11-11 08:30:00  949.99   949.99  949.99  949.99     0.27318
2     2017-11-11 08:45:00  949.99   949.99  949.99  949.99     0.00000
3     2017-11-11 09:00:00  949.99   988.00  949.99  988.00     2.89093
4     2017-11-11 09:15:00  988.00  1000.00  955.00  955.00    10.83094
...                   ...     ...      ...     ...     ...         ...
35186 2018-11-15 15:45:00  428.01   435.52  422.65  433.11  1691.56335
35187 2018-11-15 16:00:00  432.95   464.00  427.96  451.72  3161.07002
35188 2018-11-15 16:15:00  452.85   462.99  446.00  447.00  4192.07152
35189 2018-11-15 16:30:00  447.00   458.25  435.00  448.70  4215.63296
35190 2018-11-20 02:45:00  448.70   448.70  448.70  448.70     0.00000

[35191 rows x 6 columns]
                timestamp    open     high     low   close      volume
0     2017-11-11 08:15:00  802.00   802.00  802.00 