In [1]:
import numpy as np
import pandas as pd

from scipy import stats


## Quality Control Experiment: Inspection of Surface Quality of Strip

A manufacturing company performed experimental trials to measure the efficacy of a new AI camera vision inspection system for their customers who have extermely low tolerances for surface defects on their brass strip.  

QC Inspectors were randomly assigned to two groups: a control group who performed manual inspection of the strip as usual, and a linescan camera inspection group who used the AI system to identify surface defects.  The inspection results for both groups was audited for a period of two weeks and the results (in % of defects missed) were recorded in the output data file.


In [2]:
data = pd.read_csv('data_experiments_QCInsp.csv')
data = data.set_index('QC_ID')
data

Unnamed: 0_level_0,Group,Audit_Results
QC_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1000,Manual Inspection,0.023413
1002,Linescan Camera Inspection,0.031036
1006,Linescan Camera Inspection,0.036586
1014,Linescan Camera Inspection,0.017999
1018,Manual Inspection,0.040784
...,...,...
1953,Manual Inspection,0.024757
1958,Linescan Camera Inspection,0.038208
1962,Manual Inspection,0.036437
1966,Linescan Camera Inspection,0.041254


In [10]:
mean_audit_results = data.groupby("Group")["Audit_Results"].mean()
mean_audit_results

Group
Linescan Camera Inspection    0.036227
Manual Inspection             0.040451
Name: Audit_Results, dtype: float64

In [11]:
difference_in_means = abs(mean_audit_results["Manual Inspection"] \
                          - mean_audit_results["Linescan Camera Inspection"])
difference_in_means

0.004224295298701296

## Independent Samples T-test


In [6]:
A = data[data['Group'] == 'Manual Inspection']
B = data[data['Group'] == 'Linescan Camera Inspection']

stats.ttest_ind(A['Audit_Results'], B['Audit_Results'])


TtestResult(statistic=2.739837786326116, pvalue=0.006882937710926736, df=152.0)