In [None]:
Q1. Calculate the 95% confidence interval for a sample of data with a mean of 50 and a standard deviation
of 5 using Python. Interpret the results.

In [None]:
import numpy as np
import scipy.stats as stats

# Given data
mean = 50
std_dev = 5
n = 30  # Sample size (you can adjust this as needed)

# Calculate the standard error
standard_error = std_dev / np.sqrt(n)

# Determine the critical value for a 95% confidence level
critical_value = stats.norm.ppf(0.975)  # for two-tailed

# Calculate the margin of error
margin_of_error = critical_value * standard_error

# Calculate the confidence interval
confidence_interval = (mean - margin_of_error, mean + margin_of_error)

print(f"95% Confidence Interval: {confidence_interval}")


In [None]:
Q2. Conduct a chi-square goodness of fit test to determine if the distribution of colors of M&Ms in a bag
matches the expected distribution of 20% blue, 20% orange, 20% green, 10% yellow, 10% red, and 20%
brown. Use Python to perform the test with a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chisquare

# Define observed counts (example data, replace with your actual counts)
observed_counts = np.array([18, 22, 25, 10, 5, 20])  # Example observed counts

# Define expected proportions
expected_proportions = np.array([0.20, 0.20, 0.20, 0.10, 0.10, 0.20])

# Calculate expected counts based on a total count
total_count = observed_counts.sum()
expected_counts = expected_proportions * total_count

# Perform the chi-square goodness of fit test
chi2_stat, p_value = chisquare(observed_counts, expected_counts)

# Significance level
alpha = 0.05

# Output results
print(f"Chi-square Statistic: {chi2_stat}")
print(f"P-value: {p_value}")

# Decision
if p_value < alpha:
    print("Reject the null hypothesis: the distribution of M&M colors does not match the expected distribution.")
else:
    print("Fail to reject the null hypothesis: the distribution of M&M colors matches the expected distribution.")


In [None]:
Q3. Use Python to calculate the chi-square statistic and p-value for a contingency table with the following
data:
Outcome 1 20 15
Outcome 2 10 25
Outcome 3 15 20 
Interpret the results of the test.

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Define the contingency table
data = np.array([[20, 15],
                 [10, 25],
                 [15, 20]])

# Perform the chi-square test
chi2_stat, p_value, dof, expected = chi2_contingency(data)

# Output results
print(f"Chi-square Statistic: {chi2_stat}")
print(f"P-value: {p_value}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")

# Interpret results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between the variables.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between the variables.")


In [None]:
Q4. A study of the prevalence of smoking in a population of 500 individuals found that 60 individuals
smoked. Use Python to calculate the 95% confidence interval for the true proportion of individuals in the
population who smoke.

In [None]:
import numpy as np
import scipy.stats as stats

# Given data
n = 500  # Total individuals
x = 60   # Number of smokers

# Calculate the sample proportion
p_hat = x / n

# Calculate the standard error
standard_error = np.sqrt((p_hat * (1 - p_hat)) / n)

# Determine the critical z-score for 95% confidence level
z_score = stats.norm.ppf(0.975)  # Two-tailed

# Calculate the margin of error
margin_of_error = z_score * standard_error

# Calculate the confidence interval
confidence_interval = (p_hat - margin_of_error, p_hat + margin_of_error)

# Output results
print(f"Sample Proportion: {p_hat:.3f}")
print(f"95% Confidence Interval: {confidence_interval[0]:.3f} to {confidence_interval[1]:.3f}")


In [None]:
Q5. Calculate the 90% confidence interval for a sample of data with a mean of 75 and a standard deviation
of 12 using Python. Interpret the results.

In [None]:
import numpy as np
import scipy.stats as stats

# Given data
mean = 75
std_dev = 12
n = 30  # Sample size (you can adjust this based on your scenario)

# Calculate the standard error
standard_error = std_dev / np.sqrt(n)

# Determine the critical z-score for 90% confidence level
z_score = stats.norm.ppf(0.95)  # One-tailed for 90% CI

# Calculate the margin of error
margin_of_error = z_score * standard_error

# Calculate the confidence interval
confidence_interval = (mean - margin_of_error, mean + margin_of_error)

# Output results
print(f"Sample Mean: {mean}")
print(f"90% Confidence Interval: {confidence_interval[0]:.2f} to {confidence_interval[1]:.2f}")

In [None]:
Q6. Use Python to plot the chi-square distribution with 10 degrees of freedom. Label the axes and shade the
area corresponding to a chi-square statistic of 15.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import chi2

# Parameters
df = 10  # degrees of freedom
x = np.linspace(0, 30, 1000)  # range of x values

# Calculate the chi-square distribution values
y = chi2.pdf(x, df)

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(x, y, label=f'Chi-square Distribution (df={df})', color='blue')

# Shade the area corresponding to chi-square statistic of 15
x_fill = np.linspace(15, 30, 500)
y_fill = chi2.pdf(x_fill, df)
plt.fill_between(x_fill, y_fill, alpha=0.5, color='orange', label='Area for Chi-square > 15')

# Add labels and title
plt.title('Chi-square Distribution with 10 Degrees of Freedom')
plt.xlabel('Chi-square Value')
plt.ylabel('Probability Density')
plt.axvline(15, color='red', linestyle='--', label='Chi-square = 15')  # Line for chi-square statistic

# Add a legend
plt.legend()

# Show the plot
plt.grid()
plt.show()

In [None]:
Q7. A random sample of 1000 people was asked if they preferred Coke or Pepsi. Of the sample, 520
preferred Coke. Calculate a 99% confidence interval for the true proportion of people in the population who
prefer Coke.

In [None]:
import numpy as np
import scipy.stats as stats

# Given data
n = 1000  # Total individuals surveyed
x = 520   # Number of people who prefer Coke

# Calculate the sample proportion
p_hat = x / n

# Calculate the standard error
standard_error = np.sqrt((p_hat * (1 - p_hat)) / n)

# Determine the critical z-score for 99% confidence level
z_score = stats.norm.ppf(0.995)  # Two-tailed

# Calculate the margin of error
margin_of_error = z_score * standard_error

# Calculate the confidence interval
confidence_interval = (p_hat - margin_of_error, p_hat + margin_of_error)

# Output results
print(f"Sample Proportion: {p_hat:.3f}")
print(f"99% Confidence Interval: {confidence_interval[0]:.3f} to {confidence_interval[1]:.3f}")

In [None]:
Q8. A researcher hypothesizes that a coin is biased towards tails. They flip the coin 100 times and observe
45 tails. Conduct a chi-square goodness of fit test to determine if the observed frequencies match the
expected frequencies of a fair coin. Use a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chisquare

# Observed frequencies
observed_frequencies = np.array([45, 55])  # 45 tails and 55 heads

# Expected frequencies for a fair coin
expected_frequencies = np.array([50, 50])  # 50 tails and 50 heads

# Perform the chi-square goodness of fit test
chi2_stat, p_value = chisquare(observed_frequencies, expected_frequencies)

# Significance level
alpha = 0.05

# Output results
print(f"Chi-square Statistic: {chi2_stat:.3f}")
print(f"P-value: {p_value:.3f}")

# Decision
if p_value < alpha:
    print("Reject the null hypothesis: The coin is biased towards tails.")
else:
    print("Fail to reject the null hypothesis: The coin is not biased towards tails.")

In [None]:
Q9. A study was conducted to determine if there is an association between smoking status (smoker or
non-smoker) and lung cancer diagnosis (yes or no). The results are shown in the contingency table below.
Conduct a chi-square test for independence to determine if there is a significant association between
smoking status and lung cancer diagnosis.
            Lung Cancer: Yes     Lung Cancer: No
Smoker          60                     140
Non-smoker      30                     170

Use a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Define the contingency table
# Format: [[Lung Cancer Yes, Lung Cancer No], [Non-Smoker Yes, Non-Smoker No]]
observed = np.array([[60, 140],   # Smoker
                     [30, 170]])  # Non-Smoker

# Perform the chi-square test for independence
chi2_stat, p_value, dof, expected = chi2_contingency(observed)

# Significance level
alpha = 0.05

# Output results
print(f"Chi-square Statistic: {chi2_stat:.3f}")
print(f"P-value: {p_value:.3f}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")

# Decision
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between smoking status and lung cancer diagnosis.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between smoking status and lung cancer diagnosis.")


In [None]:
Q10. A study was conducted to determine if the proportion of people who prefer milk chocolate, dark
chocolate, or white chocolate is different in the U.S. versus the U.K. A random sample of 500 people from
the U.S. and a random sample of 500 people from the U.K. were surveyed. The results are shown in the
contingency table below. Conduct a chi-square test for independence to determine if there is a significant
association between chocolate preference and country of origin.

               Milk Chocolate Dark Chocolate White Chocolate
U.S. (n=500)       200              150            150
U.K. (n=500)       225              175            100
Use a significance level of 0.01.

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

# Define the contingency table
# Format: [[Milk Chocolate, Dark Chocolate, White Chocolate], [U.K. Milk, U.K. Dark, U.K. White]]
observed = np.array([[200, 150, 150],  # U.S.
                     [225, 175, 100]])  # U.K.

# Perform the chi-square test for independence
chi2_stat, p_value, dof, expected = chi2_contingency(observed)

# Significance level
alpha = 0.01

# Output results
print(f"Chi-square Statistic: {chi2_stat:.3f}")
print(f"P-value: {p_value:.3f}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")

# Decision
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant association between chocolate preference and country of origin.")
else:
    print("Fail to reject the null hypothesis: There is no significant association between chocolate preference and country of origin.")


In [None]:
Q11. A random sample of 30 people was selected from a population with an unknown mean and standard
deviation. The sample mean was found to be 72 and the sample standard deviation was found to be 10.
Conduct a hypothesis test to determine if the population mean is significantly different from 70. Use a
significance level of 0.05.

In [None]:
import numpy as np
from scipy import stats

# Given data
sample_mean = 72
population_mean = 70
sample_std_dev = 10
n = 30  # Sample size

# Calculate the t-statistic
t_statistic = (sample_mean - population_mean) / (sample_std_dev / np.sqrt(n))

# Degrees of freedom
df = n - 1

# Significance level
alpha = 0.05

# Critical t-value for a two-tailed test
t_critical = stats.t.ppf(1 - alpha / 2, df)

# Output results
print(f"T-statistic: {t_statistic:.3f}")
print(f"Critical T-value: {t_critical:.3f}")

# Decision
if abs(t_statistic) > t_critical:
    print("Reject the null hypothesis: The population mean is significantly different from 70.")
else:
    print("Fail to reject the null hypothesis: The population mean is not significantly different from 70.")
