In [1]:
import pandas as pd
import pickle

# Define a list of CSV file paths
csv_file_paths = [
    'data/csv/1m/BTC_1m2020.csv',
    'data/csv/1m/BTC_1m2021.csv',
    'data/csv/1m/BTC_1m2022.csv',
    'data/csv/1m/BTC_1m2023.csv'
]

# Define the columns you want to select from the CSV
columns_to_select = ['unix', 'symbol', 'open', 'high', 'low', 'close', 'volume']

# Create an empty DataFrame to store the combined data
combined_df = pd.DataFrame()

# Loop through the CSV files and append their data to the combined DataFrame
for csv_file_path in csv_file_paths:
    # Load each CSV into a temporary DataFrame
    temp_df = pd.read_csv(csv_file_path, usecols=columns_to_select)
    
    # Convert column names to lowercase
    temp_df.columns = temp_df.columns.str.lower()
    
    temp_df.rename(columns={'unix': 'date'}, inplace=True)
    
    # Append the temporary DataFrame to the combined DataFrame
    combined_df = combined_df.append(temp_df, ignore_index=True)

# Display the combined DataFrame with lowercase column names and the 'Volume' column
print(combined_df)

# Save the combined DataFrame to a pickle file
pickle_file_path = 'data/pickle/1m_data_all.pkl'
with open(pickle_file_path, 'wb') as file:
    pickle.dump(combined_df, file)

# Load the combined DataFrame from the pickle file (optional)
with open(pickle_file_path, 'rb') as file:
    loaded_df = pickle.load(file)

# Display the loaded combined DataFrame (optional)
print("\nLoaded DataFrame:")
print(loaded_df)


                  date    symbol      open      high       low     close  \
0        1609459140000  BTC/USDT  28923.66  28952.28  28903.86  28923.63   
1        1609459080000  BTC/USDT  28975.03  28977.10  28923.15  28923.67   
2        1609459020000  BTC/USDT  28975.06  28981.41  28955.35  28975.03   
3        1609458960000  BTC/USDT  28978.91  28988.06  28969.99  28975.06   
4        1609458900000  BTC/USDT  28977.83  28983.62  28956.76  28979.72   
...                ...       ...       ...       ...       ...       ...   
1959791  1672531440000   BTCUSDT  16534.12  16536.08  16527.51  16535.38   
1959792  1672531380000   BTCUSDT  16536.43  16537.28  16531.00  16533.65   
1959793  1672531320000   BTCUSDT  16539.31  16541.17  16534.52  16536.43   
1959794  1672531260000   BTCUSDT  16543.04  16544.41  16538.48  16539.31   
1959795  1672531200000   BTCUSDT  16541.77  16544.76  16538.45  16543.67   

               volume  
0        1.501321e+06  
1        1.044267e+06  
2        6.1783

In [2]:
# import pandas as pd
# import pickle

# Load the DataFrame from the pickle file
pickle_file_path = 'data/pickle/1m_data_all.pkl'

with open(pickle_file_path, 'rb') as file:
    loaded_df = pickle.load(file)

# Convert the 'date' column to datetime (assuming it's in milliseconds)
loaded_df['date'] = pd.to_datetime(loaded_df['date'], unit='ms')

# Set the 'date' column as the index
loaded_df.set_index('date', inplace=True)

# Display the loaded DataFrame with the updated index
print("\nLoaded DataFrame:")
print(loaded_df)



Loaded DataFrame:
                       symbol      open      high       low     close  \
date                                                                    
2020-12-31 23:59:00  BTC/USDT  28923.66  28952.28  28903.86  28923.63   
2020-12-31 23:58:00  BTC/USDT  28975.03  28977.10  28923.15  28923.67   
2020-12-31 23:57:00  BTC/USDT  28975.06  28981.41  28955.35  28975.03   
2020-12-31 23:56:00  BTC/USDT  28978.91  28988.06  28969.99  28975.06   
2020-12-31 23:55:00  BTC/USDT  28977.83  28983.62  28956.76  28979.72   
...                       ...       ...       ...       ...       ...   
2023-01-01 00:04:00   BTCUSDT  16534.12  16536.08  16527.51  16535.38   
2023-01-01 00:03:00   BTCUSDT  16536.43  16537.28  16531.00  16533.65   
2023-01-01 00:02:00   BTCUSDT  16539.31  16541.17  16534.52  16536.43   
2023-01-01 00:01:00   BTCUSDT  16543.04  16544.41  16538.48  16539.31   
2023-01-01 00:00:00   BTCUSDT  16541.77  16544.76  16538.45  16543.67   

                           volu

In [3]:
import pandas as pd
import pickle

# Define the list of timeframes
timeframes = ["10m", "15m", "24m", "30m"]
frame = ["10T", "15T", "24T", "30T"]
# Define a function to change the timeframe
def changeTime(data, t, frame):
    try:
        data.index = pd.to_datetime(data.index, utc=True)
        print(f'Changing the 1m to {t}')
        
        ohlc = {
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }
        
        data = data.resample(frame, label='left', kind='timestamp').apply(ohlc).dropna()
        
    except pd.errors.EmptyDataError:
        print('Empty data received while changing the timeframe.')
        return None

    except pd.errors.OutOfBoundsDatetime:
        print('Error encountered in the datetime bounds while changing the timeframe.')
        return None

    except pd.errors.OutOfBoundsTimedelta:
        print('Error encountered in the timedelta bounds while changing the timeframe.')
        return None

    except pd.errors.ResampleError:
        print('Error encountered while resampling the data.')
        return None

    except Exception as e:
        print(f'An unexpected error occurred while changing the timeframe: {e}')
        return None
    
    return data

# Loop through the list of timeframes, transform the data, and save to pickle files
for tm, t_frame in zip(timeframes, frame):
    df = changeTime(loaded_df.copy(), tm, t_frame)
    pickle_file_path = f'data/pickle/all/{tm}_data_all.pkl'
    
    # Save the DataFrame to a pickle file
    with open(pickle_file_path, 'wb') as file:
        pickle.dump(df, file)
    
    # Optionally save to CSV
    df.to_csv(f'data/csv/all/{tm}_data_all.csv')

    # Optionally, load the DataFrame from the pickle file and display it
    with open(pickle_file_path, 'rb') as file:
        loaded_df_ = pickle.load(file)
        print(f"\nLoaded DataFrame for {tm}:")
        print(loaded_df_)

Changing the 1m to 10m

Loaded DataFrame for 10m:
                               open      high       low     close  \
date                                                                
2020-01-01 00:00:00+00:00   7182.43   7186.69   7172.94   7186.69   
2020-01-01 00:10:00+00:00   7186.83   7188.10   7172.43   7173.68   
2020-01-01 00:20:00+00:00   7173.50   7179.04   7170.69   7172.36   
2020-01-01 00:30:00+00:00   7172.79   7179.45   7170.61   7172.80   
2020-01-01 00:40:00+00:00   7172.98   7177.11   7171.96   7177.11   
...                             ...       ...       ...       ...   
2023-09-26 23:10:00+00:00  26168.27  26206.81  26168.27  26183.98   
2023-09-26 23:20:00+00:00  26183.98  26188.48  26175.00  26178.46   
2023-09-26 23:30:00+00:00  26178.46  26235.56  26176.35  26214.53   
2023-09-26 23:40:00+00:00  26214.53  26240.00  26214.53  26221.73   
2023-09-26 23:50:00+00:00  26221.74  26225.53  26210.06  26221.67   

                                 volume  
date      