In [None]:
import pandas as pd
import numpy as np
from scipy.stats import anderson

# Read data from an Excel file
file_path = '/content/size outliers removed.xlsx'  # Replace with your file path
data = pd.read_excel(file_path)

# Specify the dataset columns you want to analyze
datasets = ['s1', 's2', 's3', 'w1','w2','w3']

# Initialize a dictionary to store the results
ad_test_results = {}

# Perform the Anderson-Darling test for each dataset
for ds in datasets:
    group = data[ds].dropna().values  # Remove NaN values
    result = anderson(group)

    # Compare AD statistic to critical values and note significance level
    significance_results = []
    for stat, sig_level in zip(result.critical_values, result.significance_level):
        significance_results.append("Reject" if result.statistic > stat else "Fail to Reject")

    # Estimate p-value based on the AD statistic
    # Assign p-value categories based on the AD statistic
    if result.statistic > result.critical_values[0]:  # 15% level
        p_value = '< 0.15'
    if result.statistic > result.critical_values[1]:  # 10% level
        p_value = '< 0.10'
    if result.statistic > result.critical_values[2]:  # 5% level
        p_value = '< 0.05'
    if result.statistic > result.critical_values[3]:  # 2.5% level
        p_value = '< 0.025'
    if result.statistic > result.critical_values[4]:  # 1% level
        p_value = '< 0.01'
    else:
        p_value = '>= 0.15'  # Default if none of the conditions met

    # Store the results in a format that includes significance details
    ad_test_results[ds] = {
        'AD Statistic': result.statistic,
        'Significance Levels': result.significance_level,
        'Critical Values': result.critical_values,
        'Normality Result': significance_results,
        'Estimated p-value': p_value  # Added estimated p-value
    }

# Convert the dictionary to a DataFrame and transpose it for easier readability
ad_test_results_df = pd.DataFrame(ad_test_results).T

# Output the transposed results to an Excel file
output_file_path = 'ad_test_results_transposed.xlsx'
ad_test_results_df.to_excel(output_file_path, index=True)

# Display the transposed results in Colab
print(ad_test_results_df)



   AD Statistic          Significance Levels  \
s1     1.061919  [15.0, 10.0, 5.0, 2.5, 1.0]   
s2     2.706902  [15.0, 10.0, 5.0, 2.5, 1.0]   
s3     1.881243  [15.0, 10.0, 5.0, 2.5, 1.0]   
w1     0.670946  [15.0, 10.0, 5.0, 2.5, 1.0]   
w2     0.464925  [15.0, 10.0, 5.0, 2.5, 1.0]   
w3    11.731744  [15.0, 10.0, 5.0, 2.5, 1.0]   

                        Critical Values  \
s1    [0.57, 0.649, 0.779, 0.908, 1.08]   
s2  [0.573, 0.653, 0.783, 0.914, 1.087]   
s3  [0.574, 0.654, 0.784, 0.915, 1.088]   
w1  [0.573, 0.653, 0.784, 0.914, 1.087]   
w2  [0.572, 0.651, 0.782, 0.912, 1.084]   
w3  [0.573, 0.653, 0.783, 0.914, 1.087]   

                                     Normality Result Estimated p-value  
s1   [Reject, Reject, Reject, Reject, Fail to Reject]           >= 0.15  
s2           [Reject, Reject, Reject, Reject, Reject]            < 0.01  
s3           [Reject, Reject, Reject, Reject, Reject]            < 0.01  
w1  [Reject, Reject, Fail to Reject, Fail to Rejec...           >