In [2]:
import os
import logging
from pathlib import Path
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import sys
from autogluon.timeseries import TimeSeriesDataFrame
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
file_path = Path("/Users/akramchakrouni/Projects/time-series-forecasting-cluserting/data/chronos/ADBE.parquet")
data = pd.read_parquet(file_path) 
ticker = file_path.stem

print(ticker)

data

ADBE


Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Open,High,Low,Volume
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ADBE,2022-03-21 09:30:00,0.476894,0.486494,0.506573,0.481384,0.090194
ADBE,2022-03-21 10:30:00,0.501947,0.476800,0.501381,0.480549,0.051298
ADBE,2022-03-21 11:30:00,0.498228,0.501588,0.502154,0.500097,0.041643
ADBE,2022-03-21 12:30:00,0.474711,0.498923,0.497156,0.476100,0.040267
ADBE,2022-03-21 13:30:00,0.494116,0.474452,0.492151,0.478992,0.043123
ADBE,...,...,...,...,...,...
ADBE,2025-02-14 11:30:00,0.510413,0.509736,0.511903,0.511331,0.022217
ADBE,2025-02-14 12:30:00,0.510400,0.510191,0.510853,0.514918,0.024441
ADBE,2025-02-14 13:30:00,0.514571,0.510191,0.515769,0.515168,0.026723
ADBE,2025-02-14 14:30:00,0.508093,0.514307,0.512455,0.510608,0.028794


In [11]:
def check_for_missing_values(data, ticker):
    try: 
        missing_values = data.isnull().sum() 
        total_missing = missing_values.sum() 

        if total_missing > 0:
            logging.warning(f"❌ The data for {ticker} contains missing values and requires further processing before it can be added to the chronos directory.")
            return False
        else:
            logging.info(f"✅ The data of {ticker} has no missing values.")
            return True
    
    except ValueError as e:
        logging.error(f"❌ An error occurred while checking missing values for {ticker}: {e}", exc_info=True)
        return False

if(check_for_missing_values(data, ticker)):
    print("Ja")
else:
    print("Nee")

Ja


In [12]:
def normalize_data(data, ticker):
    try:
        required_columns = ["Open", "High", "Low", "Close", "Volume"]
        missing_cols = [col for col in required_columns if col not in data.columns]

        if missing_cols:
            logging.error(f"❌ Missing columns in {ticker}: {missing_cols}")
            return None

        scaler = MinMaxScaler()
        data[required_columns] = scaler.fit_transform(data[required_columns])

        logging.info(f"✅ Data for {ticker} normalized successfully.")
        return data

    except Exception as e:
        logging.error(f"❌  Normalization failed for {ticker}: {e}", exc_info=True)
        return None

data_norm = normalize_data(data, ticker)

data


Unnamed: 0,Date,Open,High,Low,Close,Volume,item_id
0,2022-03-21 09:30:00,0.793242,0.796854,0.777098,0.784052,0.144648,BAC
1,2022-03-21 10:30:00,0.784243,0.784840,0.780157,0.783174,0.060604,BAC
2,2022-03-21 11:30:00,0.783145,0.781127,0.778846,0.785588,0.045272,BAC
3,2022-03-21 12:30:00,0.785779,0.780690,0.770105,0.771321,0.073013,BAC
4,2022-03-21 13:30:00,0.771293,0.769331,0.771853,0.776567,0.051724,BAC
...,...,...,...,...,...,...,...
5081,2025-02-14 11:30:00,0.947985,0.945391,0.948427,0.953945,0.022130,BAC
5082,2025-02-14 12:30:00,0.953911,0.948012,0.951923,0.953725,0.021505,BAC
5083,2025-02-14 13:30:00,0.953691,0.952818,0.955569,0.957676,0.031171,BAC
5084,2025-02-14 14:30:00,0.957862,0.954565,0.958916,0.959871,0.032044,BAC


In [20]:
def convert_to_chronos_format(data, ticker):
    try:
        if "item_id" not in data.columns:
            data["item_id"] = ticker

        # Re-order and rename columns for Chronos
        chronos_data = data[["item_id", "Date", "Close", "Open", "High", "Low", "Volume"]].copy()
        chronos_data.rename(columns={"Date": "timestamp"}, inplace=True)

        # Convert the modified DataFrame to a TimeSeriesDataFrame
        ts_data = TimeSeriesDataFrame(chronos_data)

        logging.info(f"✅ The data of {ticker} successfully converted to Chronos format.")
        return ts_data

    except Exception as e:
        logging.error(f"❌ An error occurred when converting the data for {ticker} to Chronos format: {e}", exc_info=True)
        return None

chronos_data = convert_to_chronos_format(data_norm, ticker)

chronos_data


Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Open,High,Low,Volume
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BAC,2022-03-21 09:30:00,0.784052,0.793242,0.796854,0.777098,0.144648
BAC,2022-03-21 10:30:00,0.783174,0.784243,0.784840,0.780157,0.060604
BAC,2022-03-21 11:30:00,0.785588,0.783145,0.781127,0.778846,0.045272
BAC,2022-03-21 12:30:00,0.771321,0.785779,0.780690,0.770105,0.073013
BAC,2022-03-21 13:30:00,0.776567,0.771293,0.769331,0.771853,0.051724
BAC,...,...,...,...,...,...
BAC,2025-02-14 11:30:00,0.953945,0.947985,0.945391,0.948427,0.022130
BAC,2025-02-14 12:30:00,0.953725,0.953911,0.948012,0.951923,0.021505
BAC,2025-02-14 13:30:00,0.957676,0.953691,0.952818,0.955569,0.031171
BAC,2025-02-14 14:30:00,0.959871,0.957862,0.954565,0.958916,0.032044


In [24]:
def saving_ts_chronos(data, ticker, path="nb_data"):
    try:
        os.makedirs(Path(path), exist_ok=True)
        file_path = os.path.join(path, f"{ticker}.parquet")

        data.to_parquet(file_path, index=True)
        logging.info(f"✅ Chronos data saved for {ticker} at {file_path}.")

    except Exception as e:
        logging.error(f"❌ Saving Chronos data for {ticker} was unsuccessful. Error: {e}")

saving_ts_chronos(chronos_data, ticker)


In [1]:
file_path = Path("/Users/akramchakrouni/Projects/time-series-forecasting-cluserting/data/chronos/ADBE.parquet")
data = pd.read_parquet(file_path) 
ticker = file_path.stem

print(ticker)

data

NameError: name 'Path' is not defined