In [4]:
import sys  
import os
import importlib
sys.path.insert(1, os.path.abspath('../scripts'))
sys.path.insert(1, os.path.abspath('../configs'))
import data_processor
importlib.reload(data_processor)
from data_processor import DataProcessor
import config
import pandas as pd
import numpy as np
from IPython.display import display


def main():

    processor = DataProcessor(config.file_path)
    df = processor.load_data()

    processor.combine_columns(17, 18)
    processor.drop_empty_columns()
    processor.set_headers(config.new_headers)
    processor.apply_corrections('Geschlecht', config.typos)
    processor.apply_corrections('Priorisierte Hand', config.typos)

    df.loc[:75, 'Type'] = 'Studierende'
    df.loc[76:, 'Type'] = 'simulierte Daten'
    df.loc[:75, 'Gruppe'] = 1
    

    processor.calculate_and_update_bmi()
    processor.calculate_or_correct_age()
    processor.correct_column()
    processor.convert_columns_to_numeric(config.numeric_columns)
    processor.standardize_data('Häufigkeit Blinzeln (/min)', std_unit=60, rel_unit=116)



    df_invalid = processor.get_invalid_rows(config.expected_types)

    processor.mark_invalid_data(config.expected_types, placeholder=np.nan)


    # Save the DataFrame
    processor.save_to_excel()

    pd.set_option('display.max_rows', None)  
    pd.set_option('display.max_columns', None)  
    pd.set_option('display.width', None)  
    pd.set_option('display.max_colwidth', None)
    #display(processor.df)


if __name__ == "__main__":
    main()


INFO:root:Loading data from Excel file.
INFO:root:Combining columns: 17 and 18
INFO:root:Dropping empty columns.
INFO:root:Setting new headers.
INFO:root:Applying typo corrections to Geschlecht.
INFO:root:Applying typo corrections to Priorisierte Hand.
INFO:root:Calculating and updating BMI.
INFO:root:Missing or incorrect ages calculated/corrected using 'Geb.-Datum'.
INFO:root:Converting specified columns to numeric values.
INFO:root:Standardizing data in column Häufigkeit Blinzeln (/min) to per 60 seconds, relative to 116 seconds.
INFO:root:Getting invalid rows.
INFO:root:Type-based invalid rows: 54
INFO:root:Custom invalid rows: 32
INFO:root:Combined invalid rows: 55
INFO:root:Marking invalid data.
