In [None]:
import pandas as pd
from scipy import stats

In [None]:
prod = pd.read_csv('./data/feature_data/Op303Mobile/prod.csv')
exp = pd.read_csv('./data/feature_data/Op303Mobile/exp.csv')

In [None]:
prod['status'] = 'production'
exp['status'] = 'experiment'

print(len(prod.columns))
print(len(exp.columns))

df = pd.concat([prod, exp], ignore_index = True)

In [None]:
cols = ['mean_PID', 'std_PID', 'min_PID', '0.25_PID', '0.50_PID', '0.75_PID', 'max_PID',
         'Quart1_ampl_PID', 'Quart2_ampl_PID', 'Quart3_ampl_PID', 'crest_factor_PID', 'roughnessAverage_PID',
         'rms_PID', 'shape_factor_PID', 'peak_PID', 'clearance_factor_PID', 'impulse_factor_PID', 'Skewness_value_PID',
         'Kurtosis_value_PID', 'mean_r_PID', 'std_r_PID', 'min_r_PID',
         '0.25_r_PID', '0.50_r_PID', '0.75_r_PID', 'max_r_PID', 'Quart1_ampl_r_PID', 'Quart2_ampl_r_PID',
         'Quart3_ampl_r_PID', 'crest_factor_r_PID', 'roughnessAverage_r_PID', 'rms_r_PID', 'shape_factor_r_PID',
         'peak_r_PID', 'clearance_factor_r_PID', 'impulse_factor_r_PID', 'Skewness_value_r_PID', 
         'Kurtosis_value_r_PID', 'mean_f_PID', 'std_f_PID', 'min_f_PID',
         '0.25_f_PID', '0.50_f_PID', '0.75_f_PID', 'max_f_PID', 'Quart1_ampl_f_PID', 'Quart2_ampl_f_PID',
         'Quart3_ampl_f_PID', 'crest_factor_f_PID', 'roughnessAverage_f_PID', 'rms_f_PID', 'shape_factor_f_PID',
         'peak_f_PID', 'clearance_factor_f_PID', 'impulse_factor_f_PID', 'Skewness_value_f_PID',
         'Kurtosis_value_f_PID', 'integral_PID', 'integral_r_PID', 'integral_f_PID']

In [None]:
def t_test_run(features_list, training_df, p_val_cutoff=0.05):
    # TODO: Change name of `significant_parameter_df` - this is all parameters

    # Creating empty lists for t-test outputs
    variable_name = []
    t_statistic_val = []
    p_val = []
    significance = []
    degrees_of_freedom = []

 

    # Separate into production and experiment
    production_features = training_df[training_df['status'] == 'production']
    experiment_features = training_df[training_df['status'] == 'experiment']

 

    # Cycle through each feature
    for feature in features_list:
        production_sample = production_features[feature]
        experiment_sample = experiment_features[feature]

 

        t_statistic, p_value = stats.ttest_ind(
            production_sample, experiment_sample, equal_var=False)

 

        variable_name.append(feature)
        t_statistic_val.append(t_statistic)
        p_val.append(p_value)
        if p_value < p_val_cutoff:
            significance.append('***')
        else:
            significance.append('.')

 

        degrees_freedom = len(production_sample) + len(experiment_sample) - 2
        degrees_of_freedom.append(degrees_freedom)

 

    t_test_result = pd.DataFrame({'Variable': variable_name,
                                  'Test statistic': t_statistic_val,
                                  'P values': p_val,
                                  'Degrees of freedom': degrees_of_freedom,
                                  'Significance': significance})

 

    significant_parameter_df = list(
        t_test_result[t_test_result['P values'] < p_val_cutoff]['Variable'])

 

    return t_test_result, significant_parameter_df

In [None]:
t_test_result, significant_parameter_df = t_test_run(cols, df, 0.05)

In [None]:
print(t_test_result)

In [None]:
print(significant_parameter_df)