In [1]:
import pandas as pd
import numpy as np
import math
import os

In [2]:
def slice_params(df, param_name):
    param_dict = {}
    
    dfs = [df.iloc[[i]].reset_index(drop=True) for i in range(len(df))]
    
    for idx, single_row_df in enumerate(dfs):
        param_dict[f'{param_name}_peak{idx + 1}'] = single_row_df
        
    return param_dict

In [3]:
def identify_mean_and_percent_diff(dfs, value, param):
    mean = {}
    percent_diffs = {}
    
    for peak, df in dfs.items():
        if value == "r":
            mean[peak] = df.iloc[0, 1]
        elif value == "a":
            mean[peak] = df.iloc[0, 7]
        
        percent_diffs[peak] = []
        for col_name in df.columns:
            if col_name not in ["datetime", f"reference_{param}", f"mean_{param}"]:
                col_value = df.iloc[0][col_name]
                if param == "temp":
                    percent_diff = col_value - mean[peak]
                    percent_diffs[peak].append(f"{percent_diff:.2f}")
                else:
                    percent_diff = (col_value - mean[peak]) / ((col_value + mean[peak]) / 2)
                    percent_diffs[peak].append(f"{percent_diff*100:.2f}%")
    
    return mean, percent_diffs

In [4]:
def evaluate_result_1(mean, sliced_df, param):
    results_dict = {}
    
    for peak, df in sliced_df.items():
        result_list = []
        mean_value = mean.get(peak)
        
        for col_name in df.columns:
            if col_name not in ["datetime", f"reference_{param}", f"mean_{param}"]:
                value = df.iloc[0][col_name]
                
                if param == "temp":
                    if mean_value - 1 <= value <= mean_value + 1:
                        result_list.append("Pass")
                    else:
                        result_list.append("Fail")
                
                elif param == "humidity":
                    if mean_value * 0.92 <= value <= mean_value * 1.08:
                        result_list.append("Pass")
                    else:
                        result_list.append("Fail")
        
        results_dict[peak] = result_list
    
    return results_dict

In [5]:
def evaluate_result_2(mean, sliced_df, param):
    results_dict = {}
    
    for peak, df in sliced_df.items():
        result_list = []
        mean_value = mean.get(peak)
        
        if mean_value is None:
            raise ValueError(f"No mean value found for peak: {peak}")
        
        for col_name in df.columns:
            if col_name not in ["datetime", f"reference_{param}", f"mean_{param}"]:
                value = df.iloc[0][col_name]
    
                if param == "co2":
                    low = 400
                    mid = 2000
                    high = 5000
                    error_percent_1 = 0.03
                    error_percent_2 = 0.05
                    error_number = 50
        
                elif param == "tvoc":
                    low = 65
                    mid = 440
                    high = 2180
                    error_percent_1 = 0.15
                    error_percent_2 = 0.20
                    error_number = 8.7
        
                elif param == "pm2p5":
                    low = 0
                    mid = 150
                    high = 500
                    error_percent_1 = 0.15
                    error_percent_2 = 0.20
                    error_number = 5
    
                if low <= value <= mid:
                    if mean_value * (1 - error_percent_1) - error_number <= value <= mean_value * (1 + error_percent_1) + error_number:
                        result_list.append("Pass")
                    else:
                        result_list.append("Fail")
                    
                elif mid < value <= high:
                    if mean_value * (1 - error_percent_2) - error_number <= value <= mean_value * (1 + error_percent_2) + error_number:
                        result_list.append("Pass")
                    else:
                        result_list.append("Fail")
        
        results_dict[peak] = result_list
    
    return results_dict

In [6]:
def transform_and_concatenate(percent_diffs_a, pass_fail_a, percent_diffs_r, pass_fail_r, param_df):
    
    df_columns = param_df.columns.tolist()
    df_columns = [col for col in df_columns if col != 'datetime']
    param_columns = [col for col in df_columns if not col.startswith(('reference_', 'mean_'))]
    df_dict = {}
    
    for key in percent_diffs_a.keys():
        data = {
            'percent_diff': percent_diffs_a[key],
            'pass_fail': pass_fail_a[key],
            'reference': percent_diffs_r[key],
            'reference_pass_fail': pass_fail_r[key]
        }
        
        df = pd.DataFrame(data, index=param_columns).T
        
        df_dict[key] = df
    
    return df_dict

In [7]:
co2 = pd.read_csv("../1_Data/2_peak/co2_peak.csv")
tvoc = pd.read_csv("../1_Data/2_peak/tvoc_peak.csv")
pm2p5 = pd.read_csv("../1_Data/2_peak/pm2p5_peak.csv")
temp = pd.read_csv("../1_Data/2_peak/temp_peak.csv")
humidity = pd.read_csv("../1_Data/2_peak/humidity_peak.csv")

In [8]:
params = {
    "co2": co2,
    "tvoc": tvoc,
    "pm2p5": pm2p5,
    "temp": temp,
    "humidity": humidity
}

In [9]:
sliced_params = {name: slice_params(df, name) for name, df in params.items()}

In [10]:
results_a = {name: identify_mean_and_percent_diff(sliced_value, "a", name) 
           for name, sliced_value in sliced_params.items()}

In [11]:
results_r = {name: identify_mean_and_percent_diff(sliced_value, "r", name) 
           for name, sliced_value in sliced_params.items()}

In [12]:
params = ["co2", "tvoc", "pm2p5", "temp", "humidity"]
types = ["a", "r"]

eval_functions = {
    "co2": evaluate_result_2,
    "tvoc": evaluate_result_2,
    "pm2p5": evaluate_result_2,
    "temp": evaluate_result_1,
    "humidity": evaluate_result_1
}

results = {}
pass_fails = {}
df_dicts = {}

for param in params:
    results[param] = {}
    pass_fails[param] = {}
    
    for type in types:
        mean, diffs = identify_mean_and_percent_diff(sliced_params[param], type, param)
        results[param][type] = (mean, diffs)
        
        eval_func = eval_functions[param]
        pass_fails[param][type] = eval_func(mean, sliced_params[param], param)
    
    df_dicts[param] = transform_and_concatenate(
        results[param]["a"][1],  # diffs_a
        pass_fails[param]["a"],
        results[param]["r"][1],  # diffs_r
        pass_fails[param]["r"],
        globals()[param]  # original DataFrame
    )


In [13]:
dict_co2 = df_dicts["co2"]
dict_tvoc = df_dicts["tvoc"]
dict_pm2p5 = df_dicts["pm2p5"]
dict_temp = df_dicts["temp"]
dict_humidity = df_dicts["humidity"]

In [14]:
base_directory = '../1_Data/3_report'
dict_to_directory = {
    'co2': 'co2',
    'tvoc': 'tvoc',
    'pm2p5': 'pm2p5',
    'temp': 'temp',
    'humidity': 'humidity'
}

df_dicts = {
    'co2': dict_co2,
    'tvoc': dict_tvoc,
    'pm2p5': dict_pm2p5,
    'temp': dict_temp,
    'humidity': dict_humidity
}

for key, directory_name in dict_to_directory.items():
    
    directory = os.path.join(base_directory, directory_name)
    df_dict = df_dicts[key]
    
    for filename, df in df_dict.items():
        file_path = os.path.join(directory, f"{filename}.csv")
        df.to_csv(file_path, index=True)


In [15]:
dict_co2 = df_dicts["co2"]
dict_co2['co2_peak1']

Unnamed: 0,Device1_co2,Device2_co2,Device3_co2,Device4_co2,Device5_co2
percent_diff,-0.17%,1.04%,-1.43%,-1.40%,1.92%
pass_fail,Pass,Pass,Pass,Pass,Pass
reference,13.81%,15.01%,12.56%,12.58%,15.89%
reference_pass_fail,Fail,Fail,Fail,Fail,Fail


In [16]:
dict_tvoc = df_dicts["tvoc"]
dict_tvoc['tvoc_peak1']

Unnamed: 0,Device1_tvoc,Device2_tvoc,Device3_tvoc,Device4_tvoc,Device5_tvoc
percent_diff,1.51%,0.20%,-6.25%,4.02%,0.25%
pass_fail,Pass,Pass,Pass,Pass,Pass
reference,-42.76%,-44.01%,-50.10%,-40.35%,-43.96%
reference_pass_fail,Fail,Fail,Fail,Fail,Fail


In [17]:
dict_temp = df_dicts["temp"]
dict_temp['temp_peak1']

Unnamed: 0,Device1_temp,Device2_temp,Device3_temp,Device4_temp,Device5_temp
percent_diff,0.04,0.07,-0.07,0.05,-0.09
pass_fail,Pass,Pass,Pass,Pass,Pass
reference,0.73,0.77,0.62,0.75,0.61
reference_pass_fail,Pass,Pass,Pass,Pass,Pass
