In [17]:
import pandas as pd
import os

# Path where your raw CSV files are saved
DATA_FOLDER = '../data/raw'  # Adjust if needed

def load_price_data(folder=DATA_FOLDER):
    all_dfs = []

    for filename in os.listdir(folder):
        if filename.endswith(".csv"):
            ticker = filename.replace(".csv", "")
            path = os.path.join(folder, filename)

            # Read with multi-index header
            df = pd.read_csv(path, header=[0, 1], index_col=0, parse_dates=True)

            # Ensure the second level of columns (ticker) is correctly set
            # If for any reason the file has multiple tickers, this preserves it
            all_dfs.append(df)

    # Combine all tickers into a single dataframe (wide format)
    combined = pd.concat(all_dfs, axis=1).sort_index()

    return combined

# Load the data
price_data = load_price_data()

# Check structure
print(price_data.columns.names)  # ['Price', 'Ticker']
print(price_data.head())


['Price', 'Ticker']
Price       Adj Close      Close       High        Low       Open   Volume  \
Ticker         BNP.PA     BNP.PA     BNP.PA     BNP.PA     BNP.PA   BNP.PA   
Date                                                                         
2018-01-02  35.684517  62.090000  62.700001  61.660000  62.680000  2785984   
2018-01-03  36.000614  62.639999  63.270000  61.980000  62.320000  3766640   
2018-01-04  36.650051  63.770000  64.290001  62.709999  62.709999  4272372   
2018-01-05  36.719017  63.889999  64.589996  63.669998  64.139999  3125691   
2018-01-08  37.069603  64.500000  64.839996  64.010002  64.290001  3030090   

Price       Adj Close      Close       High        Low  ...       High  \
Ticker          ML.PA      ML.PA      ML.PA      ML.PA  ...      SU.PA   
Date                                                    ...              
2018-01-02  23.039497  30.012501  30.012501  29.350000  ...  71.040001   
2018-01-03  23.298582  30.350000  30.400000  29.962500  ...

In [19]:
# Create the folder if it doesn't exist
os.makedirs('../data/preprocessed', exist_ok=True)

# Save the DataFrame as parquet (recommended)
price_data.to_parquet('../data/preprocessed/prices.parquet')


In [20]:
price_data.to_csv('../data/preprocessed/prices.csv')
