In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import mahalanobis

import statsmodels.api as sm
from statsmodels.stats.stattools import durbin_watson
from statsmodels.stats.diagnostic import linear_reset, het_breuschpagan
from statsmodels.tools.tools import add_constant

# Load data

In [2]:
research = pd.read_csv('./research.csv')
design = pd.read_csv('./design.csv')
improvement = pd.read_csv('./improvement.csv')

In [3]:
survey = pd.read_csv('./survey.csv')

# Log analysis

In [4]:
def mahalanobis_distance(row, mean, inv_cov_matrix):
    diff = row - mean
    return np.sqrt(diff.T @ inv_cov_matrix @ diff)

In [102]:
def get_iqr_outlier_indexes(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1

    # Determine the outlier range
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Get the indexes of outliers
    outlier_indexes = df[(df[column] < lower_bound) | (df[column] > upper_bound)].index
    return outlier_indexes

## Research

In [103]:

results_d = []
results_h3 = []
for kkk,j in enumerate(df2.columns):
    r2 = pd.DataFrame()
    r2['research_time_sum'] = research_time.sum(axis=1)
    r2['research_click_sum'] = research_click.sum(axis=1)

    # Step 1: Remove outliers from 'research_time_sum'
    idx = get_iqr_outlier_indexes(r2, 'research_time_sum')
    r3 = r2.drop(r2.index[idx]).reset_index(drop=True)  # Remove outliers and reset index
    df_filtered = df2.drop(df2.index[idx])[j].reset_index(drop=True)  # Drop corresponding outliers in df2[j]

    # Step 2: Remove outliers from 'research_click_sum' based on updated r3
    idx = get_iqr_outlier_indexes(r3, 'research_click_sum')
    r3 = r3.drop(r3.index[idx]).reset_index(drop=True)  # Drop outliers in 'research_click_sum'
    df_filtered = df_filtered.drop(df_filtered.index[idx]).reset_index(drop=True)  # Sync df_filtered with remaining indices

    # Step 3: Normalize data with MinMaxScaler and prepare for OLS model
    min_max_scaler = MinMaxScaler()
    X_MinMax_train = min_max_scaler.fit_transform(r3)
    r = pd.DataFrame(X_MinMax_train, columns=r3.columns)  # Scale r3 and ensure columns match

    # Add constant and select columns for OLS
    r = sm.add_constant(r[['research_time_sum', 'research_click_sum']])

    df3 = pd.DataFrame()
    df3[j] = df_filtered


    model_filtered = sm.OLS(df3[j], r)
    
    results = model_filtered.fit()
    results_robust = model_filtered.fit(cov_type='HC3')

    results_d.append(results)
    results_h3.append(results_robust)

    # 1. 선형성 검정 (RESET 테스트 사용)
    def linearity_test():
        reset_test = linear_reset(results_robust, power=2, test_type='fitted')
        p_value = reset_test.pvalue
        # print(f"RESET test p-value: {p_value}")
        return p_value
    
    # 2. 정규성 검정
    def normality_test():
        _, p_value = stats.normaltest(residuals)
        # print(f"Normality test p-value: {p_value}")
        return p_value
    
    # 3. 등분산성 검정
    def homoscedasticity_test():
        _, p_value, _, _ = het_breuschpagan(residuals, results_robust.model.exog)
        # print(f"Heteroscedasticity test p-value: {p_value}")
        return p_value
    
    # 4. 독립성 검정
    def independence_test():
        # dw_statistic = durbin_watson(residuals)
        dw_statistic=dg.acorr_breusch_godfrey(results_robust, nlags=3)[1]
        # dw_statistic = het_white(results_robust.resid, results_robust.model.exog)[1]
        # print(f"Durbin-Watson statistic: {dw_statistic}")
        return dw_statistic
    
    if any(ele <= 0.05 for ele in results.pvalues[1:].values):               
        residuals = results.resid
        fitted_values = results.fittedvalues    
        # 테스트 실행
        # print("1. Linearity Test")
        linearity_p_value = linearity_test()
        
        # print("\n2. Normality Test")
        normality_p_value = normality_test()
        
        # print("\n3. Homoscedasticity Test")
        homoscedasticity_p_value = homoscedasticity_test()
        
        # print("\n4. Independence Test")
        dw_statistic = independence_test()
        if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            # 결과 해석
            print("\nResults Interpretation:")
            print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
            print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
            print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
            # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
            print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
            print(j)
            print(results_robust.summary())
        
            print('========================================================================')


Results Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
웹이 디자인 업무/공부에 효과적일 수 있다고 생각하나요?
                                   OLS Regression Results                                  
Dep. Variable:     웹이 디자인 업무/공부에 효과적일 수 있다고 생각하나요?   R-squared:                       0.245
Model:                                         OLS   Adj. R-squared:                  0.185
Method:                              Least Squares   F-statistic:                     3.344
Date:                             Tue, 12 Nov 2024   Prob (F-statistic):             0.0517
Time:                                     15:05:16   Log-Likelihood:                -44.147
No. Observations:                               28   AIC:                             94.29
Df Residuals:                                   25   BIC:                             98.29
Df Model:                                        2                                         
Covariance Type:     

In [43]:

# VIF 계산 함수
def calculate_vif(X):
    X = add_constant(X)
    vif = pd.DataFrame()
    vif["variables"] = X.columns
    vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    return vif

vif = calculate_vif(r)
print("VIF 값:")
print(vif)

# VIF 기준으로 변수 선택 (예: VIF < 5)
selected_vars = vif[vif['VIF'] < 5]['variables'].tolist()
selected_vars = [var for var in selected_vars if var != 'const']
selected_vars

VIF 값:
            variables       VIF
0               const  4.637059
1   research_time_sum  2.559600
2  research_click_sum  2.559600


['research_time_sum', 'research_click_sum']

In [44]:
import warnings
warnings.filterwarnings('ignore')

In [104]:

results_d = []
results_h3 = []
for kkk,j in enumerate(df2.columns):
    d2 = pd.DataFrame()
    d2['design_time_sum'] = design_time.sum(axis=1)
    d2['design_click_sum'] = design_click.sum(axis=1)

    # Step 1: Remove outliers from 'research_time_sum'
    idx = get_iqr_outlier_indexes(d2, 'design_time_sum')
    d3 = d2.drop(d2.index[idx]).reset_index(drop=True)  # Remove outliers and reset index
    df_filtered = df2.drop(df2.index[idx])[j].reset_index(drop=True)  # Drop corresponding outliers in df2[j]

    # Step 2: Remove outliers from 'research_click_sum' based on updated r3
    idx = get_iqr_outlier_indexes(d3, 'design_click_sum')
    d3 = d3.drop(d3.index[idx]).reset_index(drop=True)  # Drop outliers in 'research_click_sum'
    df_filtered = df_filtered.drop(df_filtered.index[idx]).reset_index(drop=True)  # Sync df_filtered with remaining indices

    # Step 3: Normalize data with MinMaxScaler and prepare for OLS model
    min_max_scaler = MinMaxScaler()
    X_MinMax_train = min_max_scaler.fit_transform(d3)
    d = pd.DataFrame(X_MinMax_train, columns=d3.columns)  # Scale r3 and ensure columns match

    # Add constant and select columns for OLS
    d = sm.add_constant(d[['design_time_sum', 'design_click_sum']])

    df3 = pd.DataFrame()
    df3[j] = df_filtered


    model_filtered = sm.OLS(df3[j], d)
    
    results = model_filtered.fit()
    results_robust = model_filtered.fit(cov_type='HC3')

    results_d.append(results)
    results_h3.append(results_robust)

    # 1. 선형성 검정 (RESET 테스트 사용)
    def linearity_test():
        reset_test = linear_reset(results_robust, power=2, test_type='fitted')
        p_value = reset_test.pvalue
        # print(f"RESET test p-value: {p_value}")
        return p_value
    
    # 2. 정규성 검정
    def normality_test():
        _, p_value = stats.normaltest(residuals)
        # print(f"Normality test p-value: {p_value}")
        return p_value
    
    # 3. 등분산성 검정
    def homoscedasticity_test():
        _, p_value, _, _ = het_breuschpagan(residuals, results_robust.model.exog)
        # print(f"Heteroscedasticity test p-value: {p_value}")
        return p_value
    
    # 4. 독립성 검정
    def independence_test():
        # dw_statistic = durbin_watson(residuals)
        dw_statistic=dg.acorr_breusch_godfrey(results_robust, nlags=3)[1]
        # dw_statistic = het_white(results_robust.resid, results_robust.model.exog)[1]
        # print(f"Durbin-Watson statistic: {dw_statistic}")
        return dw_statistic
    
    if any(ele <= 0.05 for ele in results_robust.pvalues[1:].values):               
        residuals = results_robust.resid
        fitted_values = results_robust.fittedvalues    
        # 테스트 실행
        # print("1. Linearity Test")
        linearity_p_value = linearity_test()
        
        # print("\n2. Normality Test")
        normality_p_value = normality_test()
        
        # print("\n3. Homoscedasticity Test")
        homoscedasticity_p_value = homoscedasticity_test()
        
        # print("\n4. Independence Test")
        dw_statistic = independence_test()
        if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            # 결과 해석
            print("\nResults Interpretation:")
            print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
            print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
            print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
            # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
            print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
            print(j)
            print(results_robust.summary())
        
            print('========================================================================')


Results Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
웹이 디자인 업무/공부를 수행하는데 성취감을 줄 수 있다고 생각하나요?
                                       OLS Regression Results                                      
Dep. Variable:     웹이 디자인 업무/공부를 수행하는데 성취감을 줄 수 있다고 생각하나요?   R-squared:                       0.184
Model:                                                 OLS   Adj. R-squared:                  0.119
Method:                                      Least Squares   F-statistic:                     3.096
Date:                                     Tue, 12 Nov 2024   Prob (F-statistic):             0.0629
Time:                                             15:07:06   Log-Likelihood:                -49.908
No. Observations:                                       28   AIC:                             105.8
Df Residuals:                                           25   BIC:                             109.8
Df Model:                                

In [46]:

# VIF 계산 함수
def calculate_vif(X):
    X = add_constant(X)
    vif = pd.DataFrame()
    vif["variables"] = X.columns
    vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    return vif

vif = calculate_vif(research)
print("VIF 값:")
print(vif)

# VIF 기준으로 변수 선택 (예: VIF < 5)
selected_vars = vif[vif['VIF'] < 5]['variables'].tolist()
selected_vars = [var for var in selected_vars if var != 'const']
selected_vars

VIF 값:
                       variables        VIF
0                          const  12.246960
1          cat_attr_mission_time   2.232852
2           cluster_mission_time   7.404230
3            naming_mission_time   1.194162
4  final_catattr_mission_clicked   2.119388
5  final_cluster_mission_clicked  10.573838
6   final_naming_mission_clicked   3.188962


['cat_attr_mission_time',
 'naming_mission_time',
 'final_catattr_mission_clicked',
 'final_naming_mission_clicked']

In [106]:

results_d = []
results_h3 = []
for kkk,j in enumerate(df2.columns):
    i2 = pd.DataFrame()
    i2['improvement_time_sum'] = improvement_time.sum(axis=1)
    i2['improvement_click_sum'] = improvement_click.sum(axis=1)

    # Step 1: Remove outliers from 'research_time_sum'
    idx = get_iqr_outlier_indexes(i2, 'improvement_time_sum')
    i3 = i2.drop(i2.index[idx]).reset_index(drop=True)  # Remove outliers and reset index
    df_filtered = df2.drop(df2.index[idx])[j].reset_index(drop=True)  # Drop corresponding outliers in df2[j]

    # Step 2: Remove outliers from 'research_click_sum' based on updated r3
    idx = get_iqr_outlier_indexes(i3, 'improvement_click_sum')
    i3 = i3.drop(i3.index[idx]).reset_index(drop=True)  # Drop outliers in 'research_click_sum'
    df_filtered = df_filtered.drop(df_filtered.index[idx]).reset_index(drop=True)  # Sync df_filtered with remaining indices

    # Step 3: Normalize data with MinMaxScaler and prepare for OLS model
    min_max_scaler = MinMaxScaler()
    X_MinMax_train = min_max_scaler.fit_transform(i3)
    i = pd.DataFrame(X_MinMax_train, columns=i3.columns)  # Scale r3 and ensure columns match

    # Add constant and select columns for OLS
    i = sm.add_constant(i[['improvement_time_sum', 'improvement_click_sum']])

    df3 = pd.DataFrame()
    df3[j] = df_filtered


    model_filtered = sm.OLS(df3[j], i)
    
    results = model_filtered.fit()
    results_robust = model_filtered.fit(cov_type='HC3')

    results_d.append(results)
    results_h3.append(results_robust)

    # 1. 선형성 검정 (RESET 테스트 사용)
    def linearity_test():
        reset_test = linear_reset(results_robust, power=2, test_type='fitted')
        p_value = reset_test.pvalue
        # print(f"RESET test p-value: {p_value}")
        return p_value
    
    # 2. 정규성 검정
    def normality_test():
        _, p_value = stats.normaltest(residuals)
        # print(f"Normality test p-value: {p_value}")
        return p_value
    
    # 3. 등분산성 검정
    def homoscedasticity_test():
        _, p_value, _, _ = het_breuschpagan(residuals, results_robust.model.exog)
        # print(f"Heteroscedasticity test p-value: {p_value}")
        return p_value
    
    # 4. 독립성 검정
    def independence_test():
        # dw_statistic = durbin_watson(residuals)
        dw_statistic=dg.acorr_breusch_godfrey(results_robust, nlags=3)[1]
        # dw_statistic = het_white(results_robust.resid, results_robust.model.exog)[1]
        # print(f"Durbin-Watson statistic: {dw_statistic}")
        return dw_statistic
    
    if any(ele <= 0.05 for ele in results.pvalues[1:].values):               
        residuals = results.resid
        fitted_values = results.fittedvalues    
        # 테스트 실행
        # print("1. Linearity Test")
        linearity_p_value = linearity_test()
        
        # print("\n2. Normality Test")
        normality_p_value = normality_test()
        
        # print("\n3. Homoscedasticity Test")
        homoscedasticity_p_value = homoscedasticity_test()
        
        # print("\n4. Independence Test")
        dw_statistic = independence_test()
        if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            # 결과 해석
            print("\nResults Interpretation:")
            print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
            print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
            print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
            # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
            print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
            print(j)
            print(results.summary())
        
            print('========================================================================')

In [49]:

# VIF 계산 함수
def calculate_vif(X):
    X = add_constant(X)
    vif = pd.DataFrame()
    vif["variables"] = X.columns
    vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    return vif

vif = calculate_vif(r)
print("VIF 값:")
print(vif)

# VIF 기준으로 변수 선택 (예: VIF < 5)
selected_vars = vif[vif['VIF'] < 5]['variables'].tolist()
selected_vars = [var for var in selected_vars if var != 'const']
selected_vars

VIF 값:
            variables       VIF
0               const  4.637059
1   research_time_sum  2.559600
2  research_click_sum  2.559600


['research_time_sum', 'research_click_sum']

In [188]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

results_d = []
results_h3 = []
for kkk,j in enumerate(df2.columns):
    i2 = pd.DataFrame()
    i2['research_time_sum'] = research_time.sum(axis=1)
    i2['research_click_sum'] = research_click.sum(axis=1)

    scaler = StandardScaler()
    X_standard_scaler = scaler.fit_transform(i2)
    i2 = pd.DataFrame(X_standard_scaler, columns=i2.columns)
    min_max_scaler = MinMaxScaler()
    X_MinMax_train = min_max_scaler.fit_transform(i2)
    i2 = pd.DataFrame(X_MinMax_train, columns=i2.columns)  # Scale r3 and ensure columns match
    i3 = sm.add_constant(i2[['research_time_sum', 'research_click_sum']])
    model_filtered = sm.OLS(df2[j], i3)
    results = model_filtered.fit()
    results_robust = model_filtered.fit(cov_type='HC3')


    # # IQR
    # idx = get_iqr_outlier_indexes(i2, 'improvement_time_sum')
    # i3 = i2.drop(i2.index[idx]).reset_index(drop=True)  # Remove outliers and reset index
    # df_filtered = df2.drop(df2.index[idx])[j].reset_index(drop=True)  # Drop corresponding outliers in df2[j]
    # idx = get_iqr_outlier_indexes(i3, 'improvement_click_sum')
    # i3 = i3.drop(i3.index[idx]).reset_index(drop=True)  # Drop outliers in 'research_click_sum'
    # df_filtered = df_filtered.drop(df_filtered.index[idx]).reset_index(drop=True)  # Sync df_filtered with remaining indices
    # min_max_scaler = MinMaxScaler()
    # X_MinMax_train = min_max_scaler.fit_transform(i3)
    # i = pd.DataFrame(X_MinMax_train, columns=i3.columns)  # Scale r3 and ensure columns match
    # i = sm.add_constant(i[['improvement_time_sum', 'improvement_click_sum']])
    # df3 = pd.DataFrame()
    # df3[j] = df_filtered
    # model_filtered = sm.OLS(df3[j], i)
    # results = model_filtered.fit()
    # results_robust = model_filtered.fit(cov_type='HC2')
    # results_d.append(results)
    # results_h3.append(results_robust)


    # 1. 선형성 검정 (RESET 테스트 사용)
    def linearity_test(res):
        reset_test = linear_reset(res, power=2, test_type='fitted')
        p_value = reset_test.pvalue
        # print(f"RESET test p-value: {p_value}")
        return p_value
    
    # 2. 정규성 검정
    def normality_test(residuals):
        _, p_value = stats.normaltest(residuals)
        # print(f"Normality test p-value: {p_value}")
        return p_value
    
    # 3. 등분산성 검정
    def homoscedasticity_test(res, residuals):
        _, p_value, _, _ = het_breuschpagan(residuals, res.model.exog)
        # print(f"Heteroscedasticity test p-value: {p_value}")
        return p_value
    
    # 4. 독립성 검정
    def independence_test(res):
        # dw_statistic = durbin_watson(residuals)
        dw_statistic=dg.acorr_breusch_godfrey(res, nlags=3)[1]
        # dw_statistic = het_white(results_robust.resid, results_robust.model.exog)[1]
        # print(f"Durbin-Watson statistic: {dw_statistic}")
        return dw_statistic

    
    if any(ele <= 0.05 for ele in results_robust.pvalues[1:].values):      
        print(j)
        print('RESULT_ROBUST')
        
        residuals = results_robust.resid
        fitted_values = results_robust.fittedvalues    
        linearity_p_value = linearity_test(results_robust)
        normality_p_value = normality_test(residuals)
        homoscedasticity_p_value = homoscedasticity_test(results_robust, residuals)
        dw_statistic = independence_test(results_robust)
        
        if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            # 결과 해석
            print("\nResults Interpretation:")
            print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
            print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
            print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
            # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
            print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
            print(j)
            print(results_robust.summary())
        
            print('========================================================================')

    # if any(ele <= 0.05 for ele in results.pvalues[1:].values):      
    #     print('RESULT', j)
        
    #     residuals = results_robust.resid
    #     fitted_values = results.fittedvalues    
    #     linearity_p_value = linearity_test(results)
    #     normality_p_value = normality_test(residuals)
    #     homoscedasticity_p_value = homoscedasticity_test(results, residuals)
    #     dw_statistic = independence_test(results)
    #     if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            
    #         # 결과 해석
    #         print("\nResults Interpretation:")
    #         print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
    #         print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
    #         print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
    #         # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
    #         print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
    #         print(results.summary())
        
    #         print('========================================================================')

웹이 친절하다고 생각하나요?
RESULT_ROBUST

Results Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
웹이 친절하다고 생각하나요?
                            OLS Regression Results                            
Dep. Variable:        웹이 친절하다고 생각하나요?   R-squared:                       0.124
Model:                            OLS   Adj. R-squared:                  0.060
Method:                 Least Squares   F-statistic:                     2.897
Date:                Tue, 12 Nov 2024   Prob (F-statistic):             0.0725
Time:                        16:44:49   Log-Likelihood:                -49.659
No. Observations:                  30   AIC:                             105.3
Df Residuals:                      27   BIC:                             109.5
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                         coef    std err     

In [151]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

results_d = []
results_h3 = []
for kkk,j in enumerate(df2.columns):
    i2 = pd.DataFrame()
    i2['design_time_sum'] = design_time.sum(axis=1)
    i2['design_click_sum'] = design_click.sum(axis=1)

    # # scaler = StandardScaler()
    X_standard_scaler = scaler.fit_transform(i2)
    i2 = pd.DataFrame(X_standard_scaler, columns=i2.columns)
    min_max_scaler = MinMaxScaler()
    X_MinMax_train = min_max_scaler.fit_transform(i2)
    i2 = pd.DataFrame(X_MinMax_train, columns=i2.columns)  # Scale r3 and ensure columns match
    i3 = sm.add_constant(i2[['design_time_sum', 'design_click_sum']])
    model_filtered = sm.OLS(df2[j], i3)
    results = model_filtered.fit()
    results_robust = model_filtered.fit(cov_type='HC2')


    # # IQR
    # idx = get_iqr_outlier_indexes(i2, 'improvement_time_sum')
    # i3 = i2.drop(i2.index[idx]).reset_index(drop=True)  # Remove outliers and reset index
    # df_filtered = df2.drop(df2.index[idx])[j].reset_index(drop=True)  # Drop corresponding outliers in df2[j]
    # idx = get_iqr_outlier_indexes(i3, 'improvement_click_sum')
    # i3 = i3.drop(i3.index[idx]).reset_index(drop=True)  # Drop outliers in 'research_click_sum'
    # df_filtered = df_filtered.drop(df_filtered.index[idx]).reset_index(drop=True)  # Sync df_filtered with remaining indices
    # min_max_scaler = MinMaxScaler()
    # X_MinMax_train = min_max_scaler.fit_transform(i3)
    # i = pd.DataFrame(X_MinMax_train, columns=i3.columns)  # Scale r3 and ensure columns match
    # i = sm.add_constant(i[['improvement_time_sum', 'improvement_click_sum']])
    # df3 = pd.DataFrame()
    # df3[j] = df_filtered
    # model_filtered = sm.OLS(df3[j], i)
    # results = model_filtered.fit()
    # results_robust = model_filtered.fit(cov_type='HC2')
    # results_d.append(results)
    # results_h3.append(results_robust)


    # 1. 선형성 검정 (RESET 테스트 사용)
    def linearity_test(res):
        reset_test = linear_reset(res, power=2, test_type='fitted')
        p_value = reset_test.pvalue
        # print(f"RESET test p-value: {p_value}")
        return p_value
    
    # 2. 정규성 검정
    def normality_test(residuals):
        _, p_value = stats.normaltest(residuals)
        # print(f"Normality test p-value: {p_value}")
        return p_value
    
    # 3. 등분산성 검정
    def homoscedasticity_test(res, residuals):
        _, p_value, _, _ = het_breuschpagan(residuals, res.model.exog)
        # print(f"Heteroscedasticity test p-value: {p_value}")
        return p_value
    
    # 4. 독립성 검정
    def independence_test(res):
        # dw_statistic = durbin_watson(residuals)
        dw_statistic=dg.acorr_breusch_godfrey(res, nlags=3)[1]
        # dw_statistic = het_white(results_robust.resid, results_robust.model.exog)[1]
        # print(f"Durbin-Watson statistic: {dw_statistic}")
        return dw_statistic

    
    if any(ele <= 0.05 for ele in results_robust.pvalues[1:].values):      
        print(j)
        print('RESULT_ROBUST')
        
        residuals = results_robust.resid
        fitted_values = results_robust.fittedvalues    
        linearity_p_value = linearity_test(results_robust)
        normality_p_value = normality_test(residuals)
        homoscedasticity_p_value = homoscedasticity_test(results_robust, residuals)
        dw_statistic = independence_test(results_robust)
        
        if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            # 결과 해석
            print("\nResults Interpretation:")
            print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
            print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
            print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
            # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
            print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
            print(j)
            print(results_robust.summary())
        
            print('========================================================================')

    # if any(ele <= 0.05 for ele in results.pvalues[1:].values):      
    #     print('RESULT', j)
        
    #     residuals = results_robust.resid
    #     fitted_values = results.fittedvalues    
    #     linearity_p_value = linearity_test(results)
    #     normality_p_value = normality_test(residuals)
    #     homoscedasticity_p_value = homoscedasticity_test(results, residuals)
    #     dw_statistic = independence_test(results)
    #     if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            
    #         # 결과 해석
    #         print("\nResults Interpretation:")
    #         print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
    #         print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
    #         print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
    #         # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
    #         print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
    #         print(results.summary())
        
    #         print('========================================================================')

웹이 디자인 업무/공부를 수월하게 할 수 있다고 생각하나요?
RESULT_ROBUST
웹이 디자인 업무/공부를 수행하는데 성취감을 줄 수 있다고 생각하나요?
RESULT_ROBUST

Results Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
웹이 디자인 업무/공부를 수행하는데 성취감을 줄 수 있다고 생각하나요?
                                       OLS Regression Results                                      
Dep. Variable:     웹이 디자인 업무/공부를 수행하는데 성취감을 줄 수 있다고 생각하나요?   R-squared:                       0.146
Model:                                                 OLS   Adj. R-squared:                  0.083
Method:                                      Least Squares   F-statistic:                     2.707
Date:                                     Tue, 12 Nov 2024   Prob (F-statistic):             0.0848
Time:                                             16:12:48   Log-Likelihood:                -53.629
No. Observations:                                       30   AIC:                             113.3
Df Residuals:                          

In [152]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

results_d = []
results_h3 = []
for kkk,j in enumerate(df2.columns):
    i2 = pd.DataFrame()
    i2['improvement_time_sum'] = improvement_time.sum(axis=1)
    i2['improvement_click_sum'] = improvement_click.sum(axis=1)

    # # scaler = StandardScaler()
    X_standard_scaler = scaler.fit_transform(i2)
    i2 = pd.DataFrame(X_standard_scaler, columns=i2.columns)
    min_max_scaler = MinMaxScaler()
    X_MinMax_train = min_max_scaler.fit_transform(i2)
    i2 = pd.DataFrame(X_MinMax_train, columns=i2.columns)  # Scale r3 and ensure columns match
    i3 = sm.add_constant(i2[['improvement_time_sum', 'improvement_click_sum']])
    model_filtered = sm.OLS(df2[j], i3)
    results = model_filtered.fit()
    results_robust = model_filtered.fit(cov_type='HC2')


    # # IQR
    # idx = get_iqr_outlier_indexes(i2, 'improvement_time_sum')
    # i3 = i2.drop(i2.index[idx]).reset_index(drop=True)  # Remove outliers and reset index
    # df_filtered = df2.drop(df2.index[idx])[j].reset_index(drop=True)  # Drop corresponding outliers in df2[j]
    # idx = get_iqr_outlier_indexes(i3, 'improvement_click_sum')
    # i3 = i3.drop(i3.index[idx]).reset_index(drop=True)  # Drop outliers in 'research_click_sum'
    # df_filtered = df_filtered.drop(df_filtered.index[idx]).reset_index(drop=True)  # Sync df_filtered with remaining indices
    # min_max_scaler = MinMaxScaler()
    # X_MinMax_train = min_max_scaler.fit_transform(i3)
    # i = pd.DataFrame(X_MinMax_train, columns=i3.columns)  # Scale r3 and ensure columns match
    # i = sm.add_constant(i[['improvement_time_sum', 'improvement_click_sum']])
    # df3 = pd.DataFrame()
    # df3[j] = df_filtered
    # model_filtered = sm.OLS(df3[j], i)
    # results = model_filtered.fit()
    # results_robust = model_filtered.fit(cov_type='HC2')
    # results_d.append(results)
    # results_h3.append(results_robust)


    # 1. 선형성 검정 (RESET 테스트 사용)
    def linearity_test(res):
        reset_test = linear_reset(res, power=2, test_type='fitted')
        p_value = reset_test.pvalue
        # print(f"RESET test p-value: {p_value}")
        return p_value
    
    # 2. 정규성 검정
    def normality_test(residuals):
        _, p_value = stats.normaltest(residuals)
        # print(f"Normality test p-value: {p_value}")
        return p_value
    
    # 3. 등분산성 검정
    def homoscedasticity_test(res, residuals):
        _, p_value, _, _ = het_breuschpagan(residuals, res.model.exog)
        # print(f"Heteroscedasticity test p-value: {p_value}")
        return p_value
    
    # 4. 독립성 검정
    def independence_test(res):
        # dw_statistic = durbin_watson(residuals)
        dw_statistic=dg.acorr_breusch_godfrey(res, nlags=3)[1]
        # dw_statistic = het_white(results_robust.resid, results_robust.model.exog)[1]
        # print(f"Durbin-Watson statistic: {dw_statistic}")
        return dw_statistic

    
    if any(ele <= 0.05 for ele in results_robust.pvalues[1:].values):      
        print(j)
        print('RESULT_ROBUST')
        
        residuals = results_robust.resid
        fitted_values = results_robust.fittedvalues    
        linearity_p_value = linearity_test(results_robust)
        normality_p_value = normality_test(residuals)
        homoscedasticity_p_value = homoscedasticity_test(results_robust, residuals)
        dw_statistic = independence_test(results_robust)
        
        if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            # 결과 해석
            print("\nResults Interpretation:")
            print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
            print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
            print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
            # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
            print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
            print(j)
            print(results_robust.summary())
        
            print('========================================================================')

    # if any(ele <= 0.05 for ele in results.pvalues[1:].values):      
    #     print('RESULT', j)
        
    #     residuals = results_robust.resid
    #     fitted_values = results.fittedvalues    
    #     linearity_p_value = linearity_test(results)
    #     normality_p_value = normality_test(residuals)
    #     homoscedasticity_p_value = homoscedasticity_test(results, residuals)
    #     dw_statistic = independence_test(results)
    #     if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            
    #         # 결과 해석
    #         print("\nResults Interpretation:")
    #         print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
    #         print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
    #         print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
    #         # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
    #         print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
    #         print(results.summary())
        
    #         print('========================================================================')

웹이 디자인 업무/공부를 진행함에 만족감을 줄 수 있다고 생각하나요?
RESULT_ROBUST
웹이 주는 결과를 이해하였나요?
RESULT_ROBUST
웹이 창의력을 필요로 하는 업무/공부에 도움이 된다고 생각하나요?
RESULT_ROBUST

Results Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
웹이 창의력을 필요로 하는 업무/공부에 도움이 된다고 생각하나요?
                                     OLS Regression Results                                     
Dep. Variable:     웹이 창의력을 필요로 하는 업무/공부에 도움이 된다고 생각하나요?   R-squared:                       0.089
Model:                                              OLS   Adj. R-squared:                  0.022
Method:                                   Least Squares   F-statistic:                     2.411
Date:                                  Tue, 12 Nov 2024   Prob (F-statistic):              0.109
Time:                                          16:12:57   Log-Likelihood:                -53.413
No. Observations:                                    30   AIC:                             112.8
Df Residuals:                

## Research

In [5]:
# from statsmodels.stats import linear_reset, het_breuschpagan, durbin_watson
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

results_i = []
results_h3 = []

research

for kkk, j in enumerate(survey.columns):

    # Outlier Removal using Mahalanobis Distance
    data_mean = np.mean(research, axis=0)
    cov_matrix = np.cov(research.values.T)
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    mahal_distances = research.apply(mahalanobis_distance, axis=1, args=(data_mean, inv_cov_matrix))
    threshold = np.percentile(mahal_distances, 97.5)
    research_filtered = research[mahal_distances < threshold]
    survey_tmp = pd.DataFrame(survey[j][research_filtered.index]).dropna()

    # Normalize Data
    min_max_scaler = MinMaxScaler()
    research_scaled = min_max_scaler.fit_transform(research_filtered)
    research_scaled = pd.DataFrame(research_scaled, columns=research_filtered.columns)
    survey_tmp_scaled = min_max_scaler.fit_transform(survey_tmp)

    # Model Fitting
    research_scaled = sm.add_constant(research_scaled[['research_time_sum', 'research_click_sum']])
    model_filtered = sm.OLS(survey_tmp_scaled, research_scaled)
    results_robust = model_filtered.fit(cov_type='HC3')

    # Append Results
    results_h3.append(results_robust)

    # Statistical Tests
    residuals = results_robust.resid
    def linearity_test():
        reset_test = linear_reset(results_robust, power=2, test_type='fitted')
        return reset_test.pvalue

    def normality_test():
        _, p_value = stats.normaltest(residuals)
        return p_value

    def homoscedasticity_test():
        _, p_value, _, _ = het_breuschpagan(residuals, results_robust.model.exog)
        return p_value

    def independence_test():
        return durbin_watson(residuals)

    # Filter based on p-values
    linearity_p_value = linearity_test()
    normality_p_value = normality_test()
    homoscedasticity_p_value = homoscedasticity_test()
    independence_statistic = independence_test()

    # Interpretation of Results
    if (
        linearity_p_value > 0.05 and
        normality_p_value > 0.05 and
        homoscedasticity_p_value > 0.05 and
        independence_statistic > 0.05
    ):
        print(f"Column: {j}")   
        print("\nInterpretation:")
        print("Linearity: Satisfied")
        print("Normality: Satisfied")
        print("Homoscedasticity: Satisfied")
        print("Independence: Satisfied")
        print("=" * 80)
        print(results_robust.summary())


Column: Ease

Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.201
Model:                            OLS   Adj. R-squared:                  0.140
Method:                 Least Squares   F-statistic:                     2.669
Date:                Sun, 08 Dec 2024   Prob (F-statistic):             0.0883
Time:                        15:54:37   Log-Likelihood:                 2.5597
No. Observations:                  29   AIC:                            0.8806
Df Residuals:                      26   BIC:                             4.982
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                         coef    std err          z      P>|z|      [0.025      0.975]

In [6]:
# from statsmodels.stats import linear_reset, het_breuschpagan, durbin_watson
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import statsmodels.api as sm

results_i = []
results_h3 = []

research

for kkk, j in enumerate(survey.columns):

    # Outlier Removal using Mahalanobis Distance
    data_mean = np.mean(design, axis=0)
    cov_matrix = np.cov(design.values.T)
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    mahal_distances = design.apply(mahalanobis_distance, axis=1, args=(data_mean, inv_cov_matrix))
    threshold = np.percentile(mahal_distances, 97.5)
    design_filtered = design[mahal_distances < threshold]
    survey_tmp = pd.DataFrame(survey[j][design_filtered.index]).dropna()

    # Normalize Data
    min_max_scaler = MinMaxScaler()
    design_scaled = min_max_scaler.fit_transform(design_filtered)
    design_scaled = pd.DataFrame(design_scaled, columns=design_filtered.columns)
    survey_tmp_scaled = min_max_scaler.fit_transform(survey_tmp)

    # Model Fitting
    design_scaled = sm.add_constant(design_scaled[['design_time_sum', 'design_click_sum']])
    model_filtered = sm.OLS(survey_tmp_scaled, design_scaled)
    results_robust = model_filtered.fit(cov_type='HC3')

    # Append Results
    results_h3.append(results_robust)

    # Statistical Tests
    residuals = results_robust.resid
    def linearity_test():
        reset_test = linear_reset(results_robust, power=2, test_type='fitted')
        return reset_test.pvalue

    def normality_test():
        _, p_value = stats.normaltest(residuals)
        return p_value

    def homoscedasticity_test():
        _, p_value, _, _ = het_breuschpagan(residuals, results_robust.model.exog)
        return p_value

    def independence_test():
        return durbin_watson(residuals)

    # Filter based on p-values
    linearity_p_value = linearity_test()
    normality_p_value = normality_test()
    homoscedasticity_p_value = homoscedasticity_test()
    independence_statistic = independence_test()

    # Interpretation of Results
    if (
        linearity_p_value > 0.05 and
        normality_p_value > 0.05 and
        homoscedasticity_p_value > 0.05 and
        independence_statistic > 0.05
    ):
        print(f"Column: {j}")   
        print("\nInterpretation:")
        print("Linearity: Satisfied")
        print("Normality: Satisfied")
        print("Homoscedasticity: Satisfied")
        print("Independence: Satisfied")
        print("=" * 80)
        print(results_robust.summary())


Column: Effectiveness

Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.117
Model:                            OLS   Adj. R-squared:                  0.049
Method:                 Least Squares   F-statistic:                     1.129
Date:                Sun, 08 Dec 2024   Prob (F-statistic):              0.339
Time:                        15:54:38   Log-Likelihood:               -0.90917
No. Observations:                  29   AIC:                             7.818
Df Residuals:                      26   BIC:                             11.92
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                       coef    std err          z      P>|z|      [0.025     

In [7]:
# from statsmodels.stats import linear_reset, het_breuschpagan, durbin_watson
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import statsmodels.api as sm

results_i = []
results_h3 = []

research

for kkk, j in enumerate(survey.columns):

    # Outlier Removal using Mahalanobis Distance
    data_mean = np.mean(improvement, axis=0)
    cov_matrix = np.cov(improvement.values.T)
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    mahal_distances = improvement.apply(mahalanobis_distance, axis=1, args=(data_mean, inv_cov_matrix))
    threshold = np.percentile(mahal_distances, 97.5)
    improvement_filtered = improvement[mahal_distances < threshold]
    survey_tmp = pd.DataFrame(survey[j][improvement_filtered.index]).dropna()

    # Normalize Data
    min_max_scaler = MinMaxScaler()
    improvement_scaled = min_max_scaler.fit_transform(improvement_filtered)
    improvement_scaled = pd.DataFrame(improvement_scaled, columns=improvement_filtered.columns)
    survey_tmp_scaled = min_max_scaler.fit_transform(survey_tmp)

    # Model Fitting
    improvement_scaled = sm.add_constant(improvement_scaled[['improvement_time_sum', 'improvement_click_sum']])
    model_filtered = sm.OLS(survey_tmp_scaled, improvement_scaled)
    results_robust = model_filtered.fit(cov_type='HC3')

    # Append Results
    results_h3.append(results_robust)

    # Statistical Tests
    residuals = results_robust.resid
    def linearity_test():
        reset_test = linear_reset(results_robust, power=2, test_type='fitted')
        return reset_test.pvalue

    def normality_test():
        _, p_value = stats.normaltest(residuals)
        return p_value

    def homoscedasticity_test():
        _, p_value, _, _ = het_breuschpagan(residuals, results_robust.model.exog)
        return p_value

    def independence_test():
        return durbin_watson(residuals)

    # Filter based on p-values
    linearity_p_value = linearity_test()
    normality_p_value = normality_test()
    homoscedasticity_p_value = homoscedasticity_test()
    independence_statistic = independence_test()

    # Interpretation of Results
    if (
        linearity_p_value > 0.05 and
        normality_p_value > 0.05 and
        homoscedasticity_p_value > 0.05 and
        independence_statistic > 0.05
    ):
        print(f"Column: {j}")   
        print("\nInterpretation:")
        print("Linearity: Satisfied")
        print("Normality: Satisfied")
        print("Homoscedasticity: Satisfied")
        print("Independence: Satisfied")
        print("=" * 80)
        print(results_robust.summary())


Column: Usefulness

Interpretation:
Linearity: Satisfied
Normality: Satisfied
Homoscedasticity: Satisfied
Independence: Satisfied
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.051
Model:                            OLS   Adj. R-squared:                 -0.021
Method:                 Least Squares   F-statistic:                    0.5738
Date:                Sun, 08 Dec 2024   Prob (F-statistic):              0.570
Time:                        15:54:39   Log-Likelihood:                -5.4194
No. Observations:                  29   AIC:                             16.84
Df Residuals:                      26   BIC:                             20.94
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                            coef    std err          z      P>|z|      [0.025   

In [191]:

results_i = []
results_h3 = []
for kkk,j in enumerate(df2.columns):
    df2 = df[col1]
    i2 = pd.DataFrame()
    i2['improvement_time_sum'] = improvement_time.sum(axis=1)
    i2['improvement_click_sum'] = improvement_click.sum(axis=1)

    i = pd.DataFrame(i2)
    i.columns = i2.columns

    data_mean = np.mean(i, axis=0)
    cov_matrix = np.cov(i.values.T)
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    def mahalanobis_distance(row, mean, inv_cov_matrix):
        diff = row - mean
        return np.sqrt(diff.T @ inv_cov_matrix @ diff)
    
    mahal_distances = i.apply(mahalanobis_distance, axis=1, args=(data_mean, inv_cov_matrix))
    threshold = np.percentile(mahal_distances, 97.5)
    i = i[mahal_distances < threshold]
    df2_tmp = df2[j][i.index]
    df2_tmp = pd.DataFrame(df2_tmp).dropna(axis=0)

    min_max_scaler = MinMaxScaler()
    X_MinMax_train = min_max_scaler.fit_transform(i)
    i = pd.DataFrame(X_MinMax_train)
    df2_tmp = min_max_scaler.fit_transform(pd.DataFrame(df2_tmp))
    i.columns = i2.columns
    i = sm.add_constant(i[['improvement_time_sum', 'improvement_click_sum']])

    model_filtered = sm.OLS(df2_tmp, i)
    
    results = model_filtered.fit()
    results_robust = model_filtered.fit(cov_type='HC3')

    results_i.append(results)
    results_h3.append(results_robust)
    df2 = df[col1] # 점수

    # 1. 선형성 검정 (RESET 테스트 사용)
    def linearity_test():
        reset_test = linear_reset(results_robust, power=2, test_type='fitted')
        p_value = reset_test.pvalue
        print(f"RESET test p-value: {p_value}")
        return p_value
    
    # 2. 정규성 검정
    def normality_test():
        _, p_value = stats.normaltest(residuals)
        print(f"Normality test p-value: {p_value}")
        return p_value
    
    # 3. 등분산성 검정
    def homoscedasticity_test():
        _, p_value, _, _ = het_breuschpagan(residuals, results_robust.model.exog)
        print(f"Heteroscedasticity test p-value: {p_value}")
        return p_value
    
    # 4. 독립성 검정
    def independence_test():
        # dw_statistic = durbin_watson(residuals)
        dw_statistic=dg.acorr_breusch_godfrey(results_robust, nlags=3)[1]
        # dw_statistic = het_white(results_robust.resid, results_robust.model.exog)[1]
        print(f"Durbin-Watson statistic: {dw_statistic}")
        return dw_statistic

    # print(results.summary())
    if any(ele <= 0.05 for ele in results_robust.pvalues[1:].values):   
        print(j)
        print(results_robust.summary())
        # if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
        print("J: "+j)
        print('KKK'+str(kkk))
    
            
        residuals = results_robust.resid
        fitted_values = results_robust.fittedvalues    
        # 테스트 실행
        print("1. Linearity Test")
        linearity_p_value = linearity_test()
        
        print("\n2. Normality Test")
        normality_p_value = normality_test()
        
        print("\n3. Homoscedasticity Test")
        homoscedasticity_p_value = homoscedasticity_test()
        
        print("\n4. Independence Test")
        dw_statistic = independence_test()
        if linearity_p_value > 0.05 and normality_p_value > 0.05 and homoscedasticity_p_value > 0.05 and dw_statistic > 0.05:
            # 결과 해석
            print("\nResults Interpretation:")
            print(f"Linearity: {'Satisfied' if linearity_p_value > 0.05 else 'Not satisfied'}")
            print(f"Normality: {'Satisfied' if normality_p_value > 0.05 else 'Not satisfied'}")
            print(f"Homoscedasticity: {'Satisfied' if homoscedasticity_p_value > 0.05 else 'Not satisfied'}")
            # print(f"Independence: {'Satisfied' if 1.5 < dw_statistic < 2.5 else 'Not satisfied'}")
            print(f"Independence: {'Satisfied' if dw_statistic > 0.05 else 'Not satisfied'}")
        
        
            print('========================================================================')

웹이 디자인 업무/공부 효율성에 도움을 줄 수 있다고 생각하나요?
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.158
Model:                            OLS   Adj. R-squared:                  0.093
Method:                 Least Squares   F-statistic:                     3.687
Date:                Tue, 12 Nov 2024   Prob (F-statistic):             0.0389
Time:                        16:49:07   Log-Likelihood:               -0.21262
No. Observations:                  29   AIC:                             6.425
Df Residuals:                      26   BIC:                             10.53
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------