In [2]:
import pandas as pd
from scipy.stats import anderson, pearsonr
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the data
data = pd.read_csv('Continuous ST R.dat', sep='\t')

# 1. Anderson-Darling Test for Normality
def check_normality(group):
    result = anderson(group)
    return result.statistic, result.significance_level

failed_normality_tests = []
for (operator, part), group in data.groupby(['Operator', 'Part']):
    statistic, critical_value = check_normality(group['Value'])
    if statistic > critical_value[2]:  # 5% significance level
        failed_normality_tests.append((operator, part))

num_failed_normality_tests = len(failed_normality_tests)
print(f'Number of failed normality tests: {num_failed_normality_tests}')

# 2. Range Chart for Each Operator
data['Range'] = data.groupby(['Operator', 'Part'])['Value'].transform(lambda x: x.max() - x.min())

# Check for stability in range
range_in_control = data.groupby('Operator')['Range'].apply(lambda x: x.mean() < 3 * x.std())
print(f'Is the range chart in control? {all(range_in_control)}')

# 3. Correlation and Significance Test
def correlation_and_pvalue(operator):
    operator_data = data[data['Operator'] == operator]
    means = operator_data.groupby('Part')['Value'].mean()
    std_devs = operator_data.groupby('Part')['Value'].std()
    correlation, p_value = pearsonr(means, std_devs)
    return correlation, p_value

corr_op1, pval_op1 = correlation_and_pvalue(1)
corr_op2, pval_op2 = correlation_and_pvalue(2)

print(f'Operator 1 - Correlation: {corr_op1}, P-value: {pval_op1}')
print(f'Operator 2 - Correlation: {corr_op2}, P-value: {pval_op2}')

# 4. ANOVA to Determine Significant Factors
model = ols('Value ~ C(Part) * C(Operator)', data=data).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)

significant_factors = anova_table[anova_table['PR(>F)'] < 0.05].index.tolist()
print(f'Significant factors: {significant_factors}')

# 5. Largest and Smallest Components of Variance
anova_table['mean_sq'] = anova_table['sum_sq'] / anova_table['df']
variance_components = anova_table['mean_sq']
largest_component = variance_components.idxmax()
smallest_component = variance_components.idxmin()

print(f'Largest component of variance: {largest_component}')
print(f'Smallest component of variance: {smallest_component}')

# 6. Percentage Contribution for Repeatability and Reproducibility
total_variance = variance_components.sum()
repeatability_variance = anova_table.loc['Residual', 'mean_sq']
reproducibility_variance = anova_table.loc['C(Operator)', 'mean_sq']

repeatability_pct = (repeatability_variance / total_variance) * 100
reproducibility_pct = (reproducibility_variance / total_variance) * 100

print(f'Repeatability Percent Contribution: {repeatability_pct}')
print(f'Reproducibility Percent Contribution: {reproducibility_pct}')


Number of failed normality tests: 0
Is the range chart in control? True
Operator 1 - Correlation: -0.10168781276747861, P-value: 0.6286230095963571
Operator 2 - Correlation: 0.11299674143571033, P-value: 0.5907246893253008
                          sum_sq     df            F         PR(>F)
C(Part)              2541.985116   24.0  5328.384867  1.052296e-266
C(Operator)             3.576040    1.0   179.902083   1.113457e-29
C(Part):C(Operator)     1.128474   24.0     2.365452   6.458536e-04
Residual                3.975540  200.0          NaN            NaN
Significant factors: ['C(Part)', 'C(Operator)', 'C(Part):C(Operator)']
Largest component of variance: C(Part)
Smallest component of variance: Residual
Repeatability Percent Contribution: 0.018143379281200547
Reproducibility Percent Contribution: 3.264031729861098
