# Assignment

# Q1

In [1]:
import scipy.stats as stats

# Set the sample mean and standard deviation
sample_mean = 50
sample_std = 5

# Set the significance level and degrees of freedom
alpha = 0.05
n = 100

# Calculate the standard error of the mean
std_error = sample_std / (n ** 0.5)

# Calculate the lower and upper bounds of the confidence interval
lower, upper = stats.norm.interval(1-alpha, loc=sample_mean, scale=std_error)

# Print the confidence interval
print(f"95% Confidence Interval: [{lower:.2f}, {upper:.2f}]")


95% Confidence Interval: [49.02, 50.98]


# Q2

In [None]:
import numpy as np
from scipy.stats import chisquare

# Set the observed and expected frequencies
observed = np.array([15, 20, 22, 8, 10, 25])
expected = np.array([30, 30, 30, 15, 15, 30]) * 0.2

# Perform the chi-square goodness of fit test
stat, p_value = chisquare(observed, expected)

# Print the test statistic and p-value
print(f"Chi-Square Statistic: {stat:.2f}")
print(f"P-value: {p_value:.4f}")

# Determine if the null hypothesis is rejected or not
if p_value < 0.05:
    print("The null hypothesis is rejected. The distribution of colors in the bag is not as expected.")
else:
    print("The null hypothesis cannot be rejected. The distribution of colors in the bag is as expected.")

# Q3

In [6]:
import numpy as np
from scipy.stats import chi2_contingency

# Create the contingency table
observed = np.array([[20, 15], [10, 25], [15, 20]])

# Perform the chi-square test
stat, p_value, dof, expected = chi2_contingency(observed)

# Print the test statistic and p-value
print(f"Chi-Square Statistic: {stat:.2f}")
print(f"P-value: {p_value:.4f}")


Chi-Square Statistic: 5.83
P-value: 0.0541


# Q4

In [9]:
from statsmodels.stats.proportion import proportion_confint

# Set the sample size and number of smokers
n = 500
num_smokers = 60

# Calculate the proportion of smokers in the sample
p = num_smokers / n

# Calculate the confidence interval
ci = proportion_confint(num_smokers, n, alpha=0.05, method='wilson')

# Print the confidence interval
print(f"95% Confidence Interval: ({ci[0]:.4f}, {ci[1]:.4f})")


95% Confidence Interval: (0.0944, 0.1514)


# Q5

In [10]:
import scipy.stats as stats

# Set the sample size, mean, and standard deviation
n = 1
mean = 75
std_dev = 12

# Set the confidence level
conf_level = 0.9

# Calculate the standard error
std_err = std_dev / (n ** 0.5)

# Calculate the confidence interval
ci = stats.norm.interval(conf_level, loc=mean, scale=std_err)

# Print the confidence interval
print(f"{conf_level*100}% Confidence Interval: ({ci[0]:.2f}, {ci[1]:.2f})")


90.0% Confidence Interval: (55.26, 94.74)


# Q6

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

# Set the degrees of freedom
df = 10

# Set the x-axis range
x = np.linspace(stats.chi2.ppf(0.01, df), stats.chi2.ppf(0.99, df), 100)

# Set the y-axis values for the chi-square distribution
y = stats.chi2.pdf(x, df)

# Create the plot
fig, ax = plt.subplots()
ax.plot(x, y)

# Shade the area corresponding to a chi-square statistic of 15
ax.fill_between(x[x>=15], y[x>=15], color='grey', alpha=0.5)

# Label the axes
ax.set_xlabel('Chi-Square Statistic')
ax.set_ylabel('Probability Density')
ax.set_title(f'Chi-Square Distribution with {df} Degrees of Freedom')

# Show the plot
plt.show()


# Q7

To calculate a 99% confidence interval for the true proportion of people in the population who prefer Coke, given a sample of 1000 people and 520 who preferred Coke, we can use the following formula:

CI = (p-h, p+h)

where p is the sample proportion, h is the margin of error, and CI is the confidence interval.

The sample proportion is simply the number of people who preferred Coke divided by the sample size:

p = 520 / 1000 = 0.52

The margin of error can be calculated as:

h = z * sqrt(p * (1-p) / n)

where z is the critical value from the standard normal distribution corresponding to a 99% confidence level, p is the sample proportion, and n is the sample size.

In [11]:
import scipy.stats as stats
import math

# Set sample size, number who preferred Coke, and confidence level
n = 1000
x = 520
conf_level = 0.99

# Calculate sample proportion
p = x / n

# Calculate critical value from standard normal distribution
z = stats.norm.ppf((1 + conf_level) / 2)

# Calculate margin of error
h = z * math.sqrt(p * (1 - p) / n)

# Calculate confidence interval
ci = (p - h, p + h)

# Print confidence interval
print("99% confidence interval for the proportion of people who prefer Coke: ({:.4f}, {:.4f})".format(ci[0], ci[1]))



99% confidence interval for the proportion of people who prefer Coke: (0.4793, 0.5607)


# Q8

In [12]:
import numpy as np
import scipy.stats as stats

# Set observed frequencies
observed_freq = [55, 45]

# Set expected frequencies (assuming a fair coin)
expected_freq = [50, 50]

# Calculate chi-square test statistic
chi_square_stat = np.sum((np.array(observed_freq) - np.array(expected_freq))**2 / np.array(expected_freq))

# Calculate degrees of freedom
df = len(observed_freq) - 1

# Calculate p-value
p_val = 1 - stats.chi2.cdf(chi_square_stat, df)

# Print results
print("Chi-square test statistic: {:.2f}".format(chi_square_stat))
print("Degrees of freedom: {}".format(df))
print("p-value: {:.4f}".format(p_val))

# Compare p-value to significance level
if p_val <= 0.05:
    print("Reject null hypothesis: The coin is biased towards tails")
else:
    print("Fail to reject null hypothesis: The coin is fair")


Chi-square test statistic: 1.00
Degrees of freedom: 1
p-value: 0.3173
Fail to reject null hypothesis: The coin is fair


# Q9

In [14]:
import numpy as np
from scipy.stats import chi2_contingency

observed = np.array([[60, 140], [30, 170]])
chi2, pval, dof, expected = chi2_contingency(observed)

print("Chi-square statistic:", chi2)
print("P-value:", pval)


Chi-square statistic: 12.057347670250895
P-value: 0.0005158863863703744


# Q10

In [15]:
import numpy as np
from scipy.stats import chi2_contingency

observed = np.array([[200, 150, 150], [225, 175, 100]])
chi2, pval, dof, expected = chi2_contingency(observed)

print("Chi-square statistic:", chi2)
print("P-value:", pval)
# Since the p-value is less than the significance level of 0.01, we reject the null hypothesis and
# conclude that there is a significant association between chocolate preference and country of origin.

Chi-square statistic: 13.393665158371041
P-value: 0.0012348168997745915


# Q11

In [17]:
import numpy as np
from scipy.stats import t

sample_mean = 72
pop_mean = 70
sample_std = 10
n = 30

# calculate the t-statistic
t_stat = (sample_mean - pop_mean) / (sample_std / np.sqrt(n))

# calculate the p-value
pval = 2 * (1 - t.cdf(abs(t_stat), n-1))

print("t-statistic:", t_stat)
print("P-value:", pval)


t-statistic: 1.0954451150103321
P-value: 0.2823362372860698
