In [105]:
import os
import pandas as pd
from datetime import datetime

def _get_cell_location_by_value(df: pd.DataFrame, value_to_locate="-"):
	mask = df.eq(value_to_locate)
	result = df.where(mask)
	locations = result.stack().index
	return list(locations)

def _get_humidity_levels(df: pd.DataFrame):
	humidities = 0
	locations = _get_cell_location_by_value(df)
	columns = list(df.columns)
	for item in locations:
		row, col = item
		idx = columns.index(col) + 2
		humidities += float(df[columns[idx]][row])
	return round(humidities/len(locations), 6)

def _get_time_in_seconds(time_object: datetime):
	return time_object.hour * 3600 + time_object.minute * 60 + time_object.second + time_object.microsecond / 1e6

def _convert_time_column_to_relative_seconds(df:pd.DataFrame, time_column_name='Time'):
	time_col = df[time_column_name].copy()
	for i, time in enumerate(time_col):
		time_obj = datetime.strptime(str(time), '%Y-%m-%d %H:%M:%S.%f')
		time_col[i] = float(_get_time_in_seconds(time_obj))
	time_col = time_col - time_col.min()
	response = df.copy()
	response[time_column_name] = pd.to_numeric(time_col)
	return response

def _get_closest_line(target_in_seconds:float, df:pd.DataFrame, time_in_seconds_column_name='Time'):
	abs_diff = abs(df[time_in_seconds_column_name] - target_in_seconds)
	min_index = abs_diff.idxmin()
	return min_index

def diggest_files_into_single_dataframe(data_path:str):
	data_df = pd.DataFrame()
	final_df = pd.DataFrame()

	for filename in os.listdir(data_path):
		f = os.path.join(data_path, filename)
		if os.path.isfile(f):
			if (filename.startswith('E')):
				df = pd.read_excel(f)
				df = _convert_time_column_to_relative_seconds(df)
				data_df = df.copy()

			if (filename.startswith('U')):
				time = int(filename[5:filename.find('_min')]) * 60
				closest_line = _get_closest_line(time, data_df)

				df = pd.read_excel(f, dtype=str)
				humidity = _get_humidity_levels(df)

				line = dict(data_df.loc[closest_line])
				line['Umidade Produto [%]'] = humidity
				final_df = pd.concat([final_df, pd.DataFrame([line])], ignore_index=True)

	return final_df

data_path = '../data/collected_data/09_06_2023'

display(diggest_files_into_single_dataframe(data_path))

Unnamed: 0,Time,PT100 1 [ºC],PT100 2 [ºC],Temp. TH 1 [ºC],Umidade 1 [%],Temp. TH 2 [ºC],Umidade 2 [%],Umidade Produto [%]
0,0.0,23.681101,23.62614,23.174812,57.70434,24.255182,73.746678,55.223333
1,899.564,27.150231,27.212689,24.787157,47.790284,28.432172,87.234795,48.006667
2,1798.938,26.887455,26.675792,26.097392,44.201039,26.974166,92.332055,41.003333
3,2700.298,26.897853,26.742584,27.033274,42.788991,26.88222,93.655657,7.083333
4,3601.595,43.864988,39.788933,27.676899,40.073266,36.175378,36.28046,2.283333
