# QuickPrint Processing Time Analysis - Solution

This notebook provides a complete solution for analyzing the effectiveness of QuickPrint's new AI-powered pre-processing system, including:
1. Hypothesis Testing
2. Statistical Analysis
3. Visualization
4. Business Recommendations

In [None]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from utils.testing.hypothesis_tests import (
    check_test_statistic,
    check_p_value,
    check_conclusion,
    check_assumptions
)

# Set plotting style
plt.style.use('seaborn')
sns.set_palette('husl')

## Part 1: Hypothesis Formation

First, let's define our hypotheses and identify the appropriate test.

In [None]:
# Define data and parameters
processing_times = np.array([685, 695, 701, 688, 692, 679, 683, 698, 691, 687])
mu_0 = 720  # Standard processing time
alpha = 0.05  # Significance level

# Print hypotheses
print("Hypothesis Test Setup:")
print("-" * 50)
print("Null Hypothesis (H₀): μ = 720 seconds")
print("Alternative Hypothesis (H₁): μ < 720 seconds")
print("Test Type: One-tailed t-test")
print("Significance Level (α): 0.05")

# Check assumptions
# 1. Normality test
_, normality_p = stats.shapiro(processing_times)

# 2. Create Q-Q plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Histogram
sns.histplot(processing_times, kde=True, ax=ax1)
ax1.set_title('Distribution of Processing Times')
ax1.set_xlabel('Time (seconds)')

# Q-Q plot
stats.probplot(processing_times, dist="norm", plot=ax2)
ax2.set_title('Q-Q Plot')

plt.tight_layout()
plt.show()

print("\nAssumption Testing:")
print("-" * 50)
print(f"Shapiro-Wilk test p-value: {normality_p:.4f}")
print(f"Normality assumption {'satisfied' if normality_p > 0.05 else 'violated'}")

# Check assumptions using utility function
assumptions = {
    'normality_p': normality_p,
    'independence': True,  # Assuming independence based on problem context
    'sample_size': len(processing_times)
}

if check_assumptions(assumptions):
    print("✓ All test assumptions are satisfied")
else:
    print("✗ Some test assumptions are violated")

## Part 2: Test Statistics

Now we'll calculate the necessary test statistics and perform the hypothesis test.

In [None]:
# Calculate test statistics
sample_mean = np.mean(processing_times)
sample_std = np.std(processing_times, ddof=1)  # ddof=1 for sample standard deviation
n = len(processing_times)
df = n - 1

# Calculate t-statistic
t_stat = (sample_mean - mu_0) / (sample_std / np.sqrt(n))

# Calculate p-value (one-tailed)
p_value = stats.t.cdf(t_stat, df)

print("Test Statistics:")
print("-" * 50)
print(f"Sample Mean (x̄): {sample_mean:.2f} seconds")
print(f"Sample Standard Deviation (s): {sample_std:.2f} seconds")
print(f"Sample Size (n): {n}")
print(f"Degrees of Freedom (df): {df}")
print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

# Verify calculations
if check_test_statistic({'t_stat': t_stat}) and check_p_value({'p_value': p_value}):
    print("\n✓ Test statistics correctly calculated")
else:
    print("\n✗ Check your test statistic calculations")

## Part 3: Decision and Visualization

Let's visualize our results and make a decision about the null hypothesis.

In [None]:
# Create visualization of test results
x = np.linspace(stats.t.ppf(0.001, df), stats.t.ppf(0.999, df), 1000)
y = stats.t.pdf(x, df)

plt.figure(figsize=(12, 6))
plt.plot(x, y, 'b-', lw=2, label='t-distribution')
plt.fill_between(x[x <= stats.t.ppf(alpha, df)], y[x <= stats.t.ppf(alpha, df)],
                 color='red', alpha=0.3, label='Rejection Region')
plt.axvline(t_stat, color='g', linestyle='--', label='Observed t-statistic')

plt.title('t-Distribution with Test Results')
plt.xlabel('t-value')
plt.ylabel('Probability Density')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Make decision
decision = "Reject H₀" if p_value < alpha else "Fail to reject H₀"
print("\nHypothesis Test Decision:")
print("-" * 50)
print(f"Decision: {decision}")

# Calculate effect size (Cohen's d)
cohens_d = (sample_mean - mu_0) / sample_std
print(f"Effect Size (Cohen's d): {cohens_d:.4f}")

# Verify conclusion
conclusion = {
    'decision': decision,
    'effect_size': cohens_d,
    'practical_significance': abs(cohens_d) > 0.5  # Medium effect size threshold
}

if check_conclusion(conclusion):
    print("✓ Statistical conclusion correctly determined")
else:
    print("✗ Check your statistical conclusion")

## Part 4: Business Analysis

Let's analyze the business implications of our findings.

In [None]:
# Calculate improvement metrics
time_reduction = mu_0 - sample_mean
percent_improvement = (time_reduction / mu_0) * 100

# Calculate consistency metrics
original_cv = np.sqrt(30) / 720 * 100  # Original coefficient of variation
new_cv = (sample_std / sample_mean) * 100  # New coefficient of variation

print("Business Impact Analysis:")
print("-" * 50)
print(f"Average Time Reduction: {time_reduction:.1f} seconds")
print(f"Percentage Improvement: {percent_improvement:.1f}%")
print(f"Process Consistency (CV):")
print(f"  - Original System: {original_cv:.1f}%")
print(f"  - New System: {new_cv:.1f}%")

print("\nBusiness Recommendations:")
print("-" * 50)
if p_value < alpha and percent_improvement > 2:  # 2% threshold for practical significance
    print("1. IMPLEMENT the new AI-powered system:")
    print(f"   - Reduces processing time by {time_reduction:.1f} seconds ({percent_improvement:.1f}%)")
    print(f"   - Improves process consistency by {original_cv - new_cv:.1f}%")
    print("2. Monitor system performance over time")
    print("3. Consider expanding to other document types")
else:
    print("1. CONTINUE testing the system:")
    print("   - Collect more data points")
    print("   - Investigate process variations")
    print("2. Analyze specific cases where performance varies")
    print("3. Consider system modifications before full implementation")