In [4]:
from pathlib import Path
from typing import List, Optional, Union
import os
import numpy as np
import pandas as pd
from gluonts.dataset.arrow import ArrowWriter

In [29]:
def convert_to_arrow(
    path: Union[str, Path],
    time_series: Union[List[np.ndarray], np.ndarray],
    start_times: Optional[Union[List[np.datetime64], np.ndarray]] = None,
    compression: str = "lz4",
):
    if start_times is None:
        # Set an arbitrary start time
        start_times = [np.datetime64("2000-01-01 00:00", "s")] * len(time_series)
    
    assert len(time_series) == len(start_times)
    
    dataset = [
        {"start": start, "target": ts} for ts, start in zip(time_series, start_times)
    ]
    ArrowWriter(compression=compression).write_to_file(
        dataset,
        path=path,
    )

def open_txt_files_in_folder(read_path, save_path):
    # List all files in the specified folder
    files = os.listdir(read_path)
    
    # Filter the list to only include .txt files
    txt_files = [file for file in files if file.endswith('.txt')]
    
    time_series_list = []
    start_times_list = []
    
    # Iterate through each .txt file and open it
    for txt_file in txt_files:
        file_path = os.path.join(read_path, txt_file)
        
        df = pd.read_csv(file_path, header=0)
        
        df = df.astype({'datetime': 'datetime64[ns]'})
        
        # Filter the DataFrame to only include dates before 2022
        df = df[df['datetime'] < '2022-01-01']
        
        time_series = df['close'].values
        time_series_list.append(time_series)
                       
        start_time = df['datetime'].iloc[0]
        start_times_list.append(start_time)

    time_series_list = np.array(time_series_list, dtype=object)
    start_times_list = np.array(start_times_list)
    arrows_location = os.path.join(save_path, "crypto_all.arrow")
    convert_to_arrow(arrows_location, time_series_list, start_times_list)

In [30]:
if __name__ == "__main__":
    # Replace 'your_folder_path_here' with the actual path to your folder
    folder_30min = r"..\Prepared_data\Firstrate_data_30min"
    save_30min = r"..\Arrows"
    open_txt_files_in_folder(folder_30min, save_30min)
    