In [12]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import copy
from math import sqrt,ceil
from scipy.stats import linregress
import libraries as lib
import questionary
import click

In [13]:
hoofdmap = "../../../../OneDrive - HvA/Logfiles"

# Specificeer de belangrijke submappen
belangrijke_submappen = ['H2A', 'SOLAR']


In [14]:
data = []

for main_submap in belangrijke_submappen:
    main_submap_path = os.path.join(hoofdmap, main_submap)
    if not os.path.exists(main_submap_path):
        print(f"Waarschuwing: {main_submap_path} bestaat niet.")
        continue  # Sla deze submap over als deze niet bestaat
    
    for root, dirs, files in os.walk(main_submap_path):
        # Relative path ten opzichte van de hoofdmap
        relative_path = os.path.relpath(root, hoofdmap)
        # Splits de relative path in onderdelen
        path_parts = relative_path.split(os.sep) if relative_path != '.' else []
        
        # Identificeer de hoofdsubmap
        main_folder = path_parts[0] if path_parts else main_submap  # Veilig stellen
        
        # Voeg alleen bestanden toe
        for file_name in files:
            file_path = os.path.join(root, file_name)
            file_relative_path = os.path.relpath(file_path, hoofdmap)
            file_path_parts = file_relative_path.split(os.sep)
            
            # Verwijder de hoofdsubmap uit de path_parts
            if len(file_path_parts) > 1:
                sub_path_parts = file_path_parts[1:-1]  # Verwijder hoofdsubmap en bestandsnaam
            else:
                sub_path_parts = []
            
            data.append({
                'main_folder': main_folder,
                'pad': file_path,
                'path_parts': sub_path_parts,
                'filename': file_name
            })


In [15]:
# Maak een DataFrame van de verzamelde data
df = pd.DataFrame(data)

# Bepaal de maximale diepte van de mappenstructuur (exclusief de hoofdsubmap en bestandsnaam)
max_depth = df['path_parts'].apply(len).max()

# Beperk tot maximaal 6 niveaus zoals gewenst
max_levels = 6
max_depth = min(max_depth, max_levels)

# Maak kolomnamen aan voor elk niveau
kolomnamen = [f'map_level_{i+1}' for i in range(max_depth)]  # map_level_1, map_level_2, ..., map_level_6

# Splits de path_parts in aparte kolommen, vul aan met lege strings indien nodig
path_df = pd.DataFrame(df['path_parts'].tolist(), columns=kolomnamen)
for col in kolomnamen:
    if col not in path_df.columns:
        path_df[col] = ''

# Selecteer alleen de benodigde kolommen (max 6)
path_df = path_df.iloc[:, :max_levels]

# Combineer met de rest van de DataFrame en voeg de bestandsnaam toe
df_final = pd.concat([df[['main_folder', 'pad']], path_df, df['filename']], axis=1)

# Optioneel: Vul NaN waarden met lege strings voor betere leesbaarheid
df_final.fillna('', inplace=True)

# Herstructureer de kolom volgorde voor duidelijkheid
kolom_volgorde = ['main_folder', 'pad'] + kolomnamen + ['filename']
df_final = df_final[kolom_volgorde]

# Bekijk de DataFrame
df_final


Unnamed: 0,main_folder,pad,map_level_1,map_level_2,map_level_3,map_level_4,map_level_5,filename
0,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2020\1...,2020,,,,,1_10-2020.zip
1,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2021\2...,2021,20211202_nulmeting_h2a,0024,,,4.csv
2,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2021\2...,2021,20211202_nulmeting_h2a,0024,,,F.csv
3,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2021\2...,2021,20211202_nulmeting_h2a,0024,,,I.csv
4,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2021\2...,2021,20211202_nulmeting_h2a,0024_new,,,4.csv
...,...,...,...,...,...,...,...,...
9132,SOLAR,../../../../OneDrive - HvA/Logfiles\SOLAR\2024...,2024,old,terugvaart_monaco-old,8-7-2024,0232,8.csv
9133,SOLAR,../../../../OneDrive - HvA/Logfiles\SOLAR\2024...,2024,old,terugvaart_monaco-old,8-7-2024,0232,cmlogger-status.log
9134,SOLAR,../../../../OneDrive - HvA/Logfiles\SOLAR\2024...,2024,old,terugvaart_monaco-old,8-7-2024,0232,F.csv
9135,SOLAR,../../../../OneDrive - HvA/Logfiles\SOLAR\2024...,2024,old,terugvaart_monaco-old,8-7-2024,0232,I.csv


In [16]:
def filter_dataframe(df, filter_conditions):
    filtered_df = df.copy()
    for column, value in filter_conditions.items():
        if isinstance(value, list):
            filtered_df = filtered_df[filtered_df[column].isin(value)]
        else:
            filtered_df = filtered_df[filtered_df[column] == value]
    return filtered_df

In [32]:
def get_file_paths(df):
    """
    Get the full file paths from the DataFrame.

    Parameters:
    - df (pd.DataFrame): The DataFrame containing the file paths.

    Returns:
    - list: A list of file paths.
    """
    return df['pad'].tolist()


In [33]:
def load_data_files(file_paths):
    """
    Load data files from the given file paths.

    Parameters:
    - file_paths (list): A list of file paths.

    Returns:
    - list: A list of DataFrames loaded from the files.
    """
    data_list = []
    for path in file_paths:
        try:
            data = lib.DataInladen(path)
            data_list.append(data)
        except Exception as e:
            print(f"Error loading {path}: {e}")
    return data_list


In [17]:
# Assume df is your DataFrame
filter_conditions = {
    'main_folder': 'H2A',  # Filter for main_folder 'H2A'
    'map_level_1': ['2021', '2022'],  # Filter for multiple years
    'map_level_2': '20220628-20220701_race_nogaro',  # Filter for specific subfolder
    'filename': '1.csv'  # Filter for specific filename
}

filtered_df = filter_dataframe(df_final, filter_conditions)
print(len(filtered_df))
filtered_df

22


Unnamed: 0,main_folder,pad,map_level_1,map_level_2,map_level_3,map_level_4,map_level_5,filename
682,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0002,,,1.csv
690,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0003,,,1.csv
703,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0005,,,1.csv
710,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0006,,,1.csv
716,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0007,,,1.csv
724,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0008,,,1.csv
732,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0009,,,1.csv
740,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0010,,,1.csv
747,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0011,,,1.csv
754,H2A,../../../../OneDrive - HvA/Logfiles\H2A\2022\2...,2022,20220628-20220701_race_nogaro,0012,,,1.csv


In [21]:
file_paths = get_file_paths(filtered_df)
data_files = load_data_files(file_paths)

Error loading ../../../../OneDrive - HvA/Logfiles\H2A\2022\20220628-20220701_race_nogaro\0006\1.csv: index 0 is out of bounds for axis 0 with size 0
Error loading ../../../../OneDrive - HvA/Logfiles\H2A\2022\20220628-20220701_race_nogaro\0016\1.csv: No columns to parse from file


In [37]:
data_files[8]

Unnamed: 0,Datalogger port,"Dataloggertijd, in s",Format header (>08|03),NMEA type($GPRMC),Tijd,Status,Latitude,Noord of Zuid (N/S),Longitude,Oost of West (E/W),...,Mode indicator (A),Data in balancers,DegreesTrue,T,DegreesMagnetic,M,Snelheid tenopzichte van het water in Knots,N,Snelheid tenopzichte van het water in Kilometers,K
0,1,70.634,08|03,$GPRMC,120058.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
1,1,71.6338,08|03,$GPRMC,120059.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
2,1,72.634,08|03,$GPRMC,120100.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
3,1,73.6339,08|03,$GPRMC,120101.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
4,1,74.6342,08|03,$GPRMC,120102.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
5,1,75.634,08|03,$GPRMC,120103.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
6,1,76.6345,08|03,$GPRMC,120104.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
7,1,77.635,08|03,$GPRMC,120105.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
8,1,78.6349,08|03,$GPRMC,120106.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
9,1,79.635,08|03,$GPRMC,120107.0,V,0.0,N,0.0,E,...,N,1100000011,,,,,,,,
