# Height Data Analysis with Bootstrap Confidence Interval

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('height.csv')
df.head()

In [None]:
plt.hist(df['height'], bins=10, edgecolor='black')
plt.title('Histogram of Heights')
plt.xlabel('Height (inches)')
plt.ylabel('Frequency')
plt.show()

In [None]:
mean_height = df['height'].mean()
print(f'Sample mean height: {mean_height:.2f} inches')

In [None]:
boot_means = []
n = len(df)
for _ in range(10000):
    sample = df['height'].sample(n, replace=True)
    boot_means.append(sample.mean())
ci_lower, ci_upper = np.percentile(boot_means, [2.5, 97.5])
print(f'95% bootstrap CI: [{ci_lower:.2f}, {ci_upper:.2f}]')

In [None]:
plt.hist(boot_means, bins=30, edgecolor='gray')
plt.axvline(ci_lower, color='red', linestyle='--', label=f'2.5% ({ci_lower:.2f})')
plt.axvline(ci_upper, color='red', linestyle='--', label=f'97.5% ({ci_upper:.2f})')
plt.title('Bootstrap Distribution of Sample Mean Heights')
plt.xlabel('Mean Height (inches)')
plt.ylabel('Frequency')
plt.legend()
plt.show()