In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import os

In [20]:
def read_data(data_path, excluded_files):
    dataframes = [
    pd.read_csv(f"{data_path}/{csv}", on_bad_lines='skip') 
    for csv in os.listdir(data_path) 
    if csv not in excluded_files
            ]
    names = [
        csv 
        for csv in os.listdir(data_path)
        if csv not in excluded_files
            ]
    return dataframes, names

In [23]:
from pathlib import Path

root_dir = Path("uncompressed")
excluded_files = {"info.csv"}

for directory in root_dir.iterdir():
    if directory.is_dir():
        try:
            print(root_dir.name + "/" + directory.name + ":")
            dataframes, names = read_data(root_dir.name + "/" + directory.name, excluded_files)
            
            # print("Shapes:")
            # for df in dataframes:
            #     print(df.shape)
            
            # print("\nFile names:")
            # print(names)
            # print()

            max_len = max(df.shape[0] for df in dataframes)
            # print(max_len)

            padded = []
            for df in dataframes:
                numeric_df = df.select_dtypes(include=[np.number])
                arr = numeric_df.to_numpy(dtype=float)
                padded.append(
                    np.pad(
                        arr, 
                        pad_width = ((0, max_len - arr.shape[0]), (0, 0)),
                        mode = "constant",
                        constant_values = np.nan
                    )
                )
            # print(len(padded), padded[0].shape)

            combined_array = np.concatenate(padded, axis=1)

            np.save(f"numpy_data/{directory.name}.npy", combined_array)
        except FileNotFoundError:
            print(f"Warning: 'expot_adatok' not found in {directory.name}")
            continue 

uncompressed/2025_06_17_14_29_felvetel:
uncompressed/2025_06_20_10_44_felvetel:
uncompressed/2025_05_30_15_15_felvetel:
uncompressed/2025_06_13_13_12_felvetel:
uncompressed/2025_07_09_11_00_felvetel:
uncompressed/2025_06_16_13_18_felvetel:
uncompressed/2025_07_10_13_40_felvetel:
uncompressed/2025_07_09_09_00_felvetel:
uncompressed/2025_05_16_felvetel:
uncompressed/2025_06_16_14_41_felvetel:
uncompressed/2025_05_29_10_30_2_felvetel:
uncompressed/2025_05_29_10_30_felvetel:
uncompressed/2025_07_09_14_00_felvetel:
uncompressed/2025_07_10_14_00_felvetel:
uncompressed/2025_07_09_13_30_felvetel:
uncompressed/2025_04_16_felvetel:
uncompressed/2025_05_20_15_00_felvetel:
uncompressed/2025_05_30_14_50_felvetel:
uncompressed/2025_06_13_12_43_felvetel:
uncompressed/2025_06_16_13_46_felvetel:
uncompressed/2025_06_11_14_30_felvetel:
uncompressed/2025_05_20_12_30_felvetel:
uncompressed/2025_06_10_15_10_felvetel:
uncompressed/2025_06_13_12_13_felvetel:
uncompressed/2025_05_28_14_00_felvetel:
uncompress