In [None]:
# Write a Python program to perform a Z-test for comparing a sample mean to a known population mean and
interpret the results

import numpy as np
from scipy import stats
def z_test(sample_mean, population_mean, sample_std, sample_size):
    """
    Perform a Z-test to compare a sample mean to a known population mean.
    Parameters:
    sample_mean (float): Sample mean
    population_mean (float): Known population mean
    sample_std (float): Sample standard deviation
    sample_size (int): Sample size

    Returns:
    z_score (float): Z-score
    p_value (float): P-value
    """
    std_error = sample_std / np.sqrt(sample_size)
    z_score = (sample_mean - population_mean) / std_error
    p_value = 2 * (1 - stats.norm.cdf(np.abs(z_score)))
   return z_score, p_value
sample_mean = 25.6
population_mean = 25.0
sample_std = 2.5
sample_size = 36
z_score, p_value = z_test(sample_mean, population_mean, sample_std, sample_size)
print("Z-score:", z_score)
print("P-value:", p_value)
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The sample mean is significantly different from the population mean.")
else:
    print("Fail to reject the null hypothesis. The sample mean is not significantly different from the population mean.")

# Simulate random data to perform hypothesis testing and calculate the corresponding P-value using Python

import numpy as np
from scipy import stats

# Set the seed for reproducibility
np.random.seed(0)

# Simulate random data
n = 100  # Sample size
mu0 = 0  # Population mean under the null hypothesis
sigma = 1  # Population standard deviation
data = np.random.normal(mu0, sigma, n)

# Define the null and alternative hypotheses
H0 = "μ = 0"
H1 = "μ ≠ 0"

# Perform the hypothesis test
t_stat, p_value = stats.ttest_1samp(data, mu0)

print("Null Hypothesis:", H0)
print("Alternative Hypothesis:", H1)
print("T-statistic:", t_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The sample mean is significantly different from 0.")
else:
    print("Fail to reject the null hypothesis. The sample mean is not significantly different from 0.")


#Implement a one-sample Z-test using Python to compare the sample mean with the population mean@

import numpy as np
from scipy import stats

def one_sample_z_test(sample, population_mean, population_std):
    """
    Perform a one-sample Z-test to compare the sample mean with the population mean.

    Parameters:
    sample (numpy array): Sample data
    population_mean (float): Population mean
    population_std (float): Population standard deviation

    Returns:
    z_score (float): Z-score
    p_value (float): P-value
    """
    # Calculate the sample mean
    sample_mean = np.mean(sample)

    # Calculate the standard error
    std_error = population_std / np.sqrt(len(sample))

    # Calculate the Z-score
    z_score = (sample_mean - population_mean) / std_error

    # Calculate the P-value
    p_value = 2 * (1 - stats.norm.cdf(np.abs(z_score)))

    return z_score, p_value

# Example usage
np.random.seed(0)
sample = np.random.normal(0, 1, 100)  # Sample data
population_mean = 0  # Population mean
population_std = 1  # Population standard deviation

z_score, p_value = one_sample_z_test(sample, population_mean, population_std)

print("Z-score:", z_score)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The sample mean is significantly different from the population mean.")
else:
    print("Fail to reject the null hypothesis. The sample mean is not significantly different from the population mean.")


#Perform a two-tailed Z-test using Python and visualize the decision region on a plot


import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Define the population mean and standard deviation
population_mean = 0
population_std = 1

# Define the sample mean and standard deviation
sample_mean = 0.5
sample_std = 1

# Define the sample size
sample_size = 100

# Calculate the Z-score
z_score = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size))

# Define the significance level
alpha = 0.05

# Calculate the critical Z-scores
critical_z_score = stats.norm.ppf(1 - alpha / 2)

# Print the results
print("Z-score:", z_score)
print("Critical Z-score:", critical_z_score)

# Create a plot of the standard normal distribution
x = np.linspace(-3, 3, 100)
y = stats.norm.pdf(x)

# Plot the decision region
plt.plot(x, y)
plt.fill_between(x, y, where=(x < -critical_z_score) | (x > critical_z_score), color='red', alpha=0.5)
plt.axvline(x=-critical_z_score, color='red', linestyle='--')
plt.axvline(x=critical_z_score, color='red', linestyle='--')
plt.title("Decision Region for Two-Tailed Z-Test")
plt.xlabel("Z-score")
plt.ylabel("Probability Density")
plt.show()

# Interpret the results
if np.abs(z_score) > critical_z_score:
    print("Reject the null hypothesis. The sample mean is significantly different from the population mean.")
else:
    print("Fail to reject the null hypothesis. The sample mean is not significantly different from the population mean.")


# Create a Python function that calculates and visualizes Type 1 and Type 2 errors during hypothesis testing

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def calculate_errors(alpha, beta, mu0, mu1, sigma):
    """
    Calculate Type 1 and Type 2 errors in hypothesis testing.

    Parameters:
    alpha (float): Significance level (Type 1 error rate)
    beta (float): Type 2 error rate
    mu0 (float): Population mean under the null hypothesis
    mu1 (float): Population mean under the alternative hypothesis
    sigma (float): Population standard deviation

    Returns:
    type1_error (float): Type 1 error rate
    type2_error (float): Type 2 error rate
    """
    # Calculate the critical value for the Z-test
    critical_value = stats.norm.ppf(1 - alpha)

    # Calculate the Type 1 error rate (alpha)
    type1_error = alpha

    # Calculate the Type 2 error rate (beta)
    type2_error = stats.norm.cdf(critical_value - (mu1 - mu0) / sigma)

    return type1_error, type2_error

def visualize_errors(alpha, beta, mu0, mu1, sigma):
    """
    Visualize Type 1 and Type 2 errors in hypothesis testing.

    Parameters:
    alpha (float): Significance level (Type 1 error rate)
    beta (float): Type 2 error rate
    mu0 (float): Population mean under the null hypothesis
    mu1 (float): Population mean under the alternative hypothesis
    sigma (float): Population standard deviation
    """
    # Calculate the critical value for the Z-test
    critical_value = stats.norm.ppf(1 - alpha)

    # Create a plot of the null and alternative distributions
    x = np.linspace(mu0 - 3 * sigma, mu1 + 3 * sigma, 100)
    null_distribution = stats.norm.pdf(x, mu0, sigma)
    alternative_distribution = stats.norm.pdf(x, mu1, sigma)

    # Plot the critical value and shade the Type 1 and Type 2 error regions
    plt.plot(x, null_distribution, label='Null Distribution')
    plt.plot(x, alternative_distribution, label='Alternative Distribution')
    plt.axvline(x=critical_value, color='red', linestyle='--')
    plt.fill_between(x, null_distribution, where=(x > critical_value), color='red', alpha=0.5, label='Type 1 Error')
    plt.fill_between(x, alternative_distribution, where=(x < critical_value), color='blue', alpha=0.5, label='Type 2 Error')
    plt.title('Type 1 and Type 2 Errors in Hypothesis Testing')
    plt.xlabel('Value')
    plt.ylabel('Probability Density')
    plt.legend()
    plt.show()

# Example usage
alpha = 0.05  # Significance level (Type 1 error rate)
beta = 0.2  # Type 2 error rate
mu0 = 0  # Population mean under the null hypothesis
mu1 = 1  # Population mean under the alternative hypothesis
sigma = 1  # Population standard deviation

type1_error, type2_error = calculate_errors(alpha, beta, mu0, mu1, sigma)
print("Type 1 Error Rate:", type1_error)
print("Type 2 Error Rate:", type2_error)

visualize_errors(alpha, beta, mu0, mu1, sigma)


# Write a Python program to perform an independent T-test and interpret the results

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Define the sample data
np.random.seed(0)
group1 = np.random.normal(0, 1, 100)
group2 = np.random.normal(1, 1, 100)

# Perform the independent T-test
t_stat, p_value = stats.ttest_ind(group1, group2)

# Print the results
print("T-statistic:", t_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The means of the two groups are significantly different.")
else:
    print("Fail to reject the null hypothesis. The means of the two groups are not significantly different.")

# Visualize the data
plt.hist(group1, alpha=0.5, label='Group 1')
plt.hist(group2, alpha=0.5, label='Group 2')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Group 1 and Group 2')
plt.show()


# Perform a paired sample T-test using Python and visualize the comparison results

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Define the sample data
np.random.seed(0)
before = np.random.normal(0, 1, 100)
after = before + np.random.normal(0, 0.5, 100)

# Perform the paired sample T-test
t_stat, p_value = stats.ttest_rel(before, after)

# Print the results
print("T-statistic:", t_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The means of the two related samples are significantly different.")
else:
    print("Fail to reject the null hypothesis. The means of the two related samples are not significantly different.")

# Visualize the comparison results
plt.figure(figsize=(8, 6))
plt.subplot(1, 2, 1)
plt.hist(before, alpha=0.5, label='Before')
plt.hist(after, alpha=0.5, label='After')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Before and After Samples')

plt.subplot(1, 2, 2)
plt.scatter(before, after)
plt.plot([np.min(before), np.max(before)], [np.min(before), np.max(before)], 'r--')
plt.xlabel('Before')
plt.ylabel('After')
plt.title('Scatter Plot of Before and After Samples')

plt.tight_layout()
plt.show()

# Simulate data and perform both Z-test and T-test, then compare the results using Python
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Simulate data from a normal distribution
n = 100  # Sample size
mu = 0  # Population mean
sigma = 1  # Population standard deviation
data = np.random.normal(mu, sigma, n)

# Perform Z-test
z_stat = (np.mean(data) - mu) / (sigma / np.sqrt(n))
z_p_value = 2 * (1 - stats.norm.cdf(np.abs(z_stat)))

# Perform T-test
t_stat, t_p_value = stats.ttest_1samp(data, mu)

# Print the results
print("Z-test Results:")
print("Z-statistic:", z_stat)
print("P-value:", z_p_value)

print("\nT-test Results:")
print("T-statistic:", t_stat)
print("P-value:", t_p_value)

# Compare the results
alpha = 0.05
if z_p_value < alpha and t_p_value < alpha:
    print("\nBoth Z-test and T-test reject the null hypothesis.")
elif z_p_value >= alpha and t_p_value >= alpha:
    print("\nBoth Z-test and T-test fail to reject the null hypothesis.")
else:
    print("\nZ-test and T-test results do not agree.")

# Visualize the data
plt.hist(data, alpha=0.5, label='Simulated Data')
plt.axvline(x=mu, color='red', linestyle='--', label='Population Mean')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Simulated Data')
plt.show()


# Write a Python function to calculate the confidence interval for a sample mean and explain its significance
import numpy as np
from scipy import stats

def calculate_confidence_interval(data, confidence_level=0.95):
    """
    Calculate the confidence interval for a sample mean.

    Parameters:
    data (numpy array): Sample data
    confidence_level (float, optional): Confidence level (default is 0.95)

    Returns:
    tuple: Lower and upper bounds of the confidence interval
    """
    # Calculate the sample mean and standard deviation
    sample_mean = np.mean(data)
    sample_std = np.std(data, ddof=1)  # Use sample standard deviation (Bessel's correction)

    # Calculate the standard error
    standard_error = sample_std / np.sqrt(len(data))

    # Calculate the critical value from the t-distribution
    degrees_of_freedom = len(data) - 1
    critical_value = stats.t.ppf((1 + confidence_level) / 2, degrees_of_freedom)

    # Calculate the margin of error
    margin_of_error = critical_value * standard_error

    # Calculate the confidence interval
    lower_bound = sample_mean - margin_of_error
    upper_bound = sample_mean + margin_of_error

    return lower_bound, upper_bound

# Example usage
np.random.seed(0)
data = np.random.normal(0, 1, 100)
confidence_level = 0.95

lower_bound, upper_bound = calculate_confidence_interval(data, confidence_level)
print(f"Confidence Interval ({confidence_level*100}%): ({lower_bound:.4f}, {upper_bound:.4f})")


# Write a Python program to calculate the margin of error for a given confidence level using sample data
import numpy as np
from scipy import stats

def calculate_margin_of_error(data, confidence_level):
    """
    Calculate the margin of error for a given confidence level.

    Parameters:
    data (numpy array): Sample data
    confidence_level (float): Confidence level (e.g., 0.95)

    Returns:
    float: Margin of error
    """
    # Calculate the sample size
    sample_size = len(data)

    # Calculate the sample standard deviation
    sample_std = np.std(data, ddof=1)  # Use sample standard deviation (Bessel's correction)

    # Calculate the standard error
    standard_error = sample_std / np.sqrt(sample_size)

    # Calculate the critical value from the t-distribution or standard normal distribution
    if sample_size < 30:
        degrees_of_freedom = sample_size - 1
        critical_value = stats.t.ppf((1 + confidence_level) / 2, degrees_of_freedom)
    else:
        critical_value = stats.norm.ppf((1 + confidence_level) / 2)

    # Calculate the margin of error
    margin_of_error = critical_value * standard_error

    return margin_of_error

# Example usage
np.random.seed(0)
data = np.random.normal(0, 1, 100)  # Sample data
confidence_level = 0.95  # Confidence level

margin_of_error = calculate_margin_of_error(data, confidence_level)
print(f"Margin of Error ({confidence_level*100}% confidence level): {margin_of_error:.4f}")


# Implement a Bayesian inference method using Bayes' Theorem in Python and explain the process
import numpy as np

def bayes_theorem(prior, likelihood, evidence):
    """
    Apply Bayes' Theorem to update the probability of a hypothesis.

    Parameters:
    prior (float): Prior probability of the hypothesis
    likelihood (float): Likelihood of the evidence given the hypothesis
    evidence (float): Probability of the evidence

    Returns:
    float: Posterior probability of the hypothesis
    """
    posterior = (prior * likelihood) / evidence
    return posterior

# Perform a Chi-square test for independence between two categorical variables in Python

import numpy as np
from scipy import stats
import pandas as pd

# Define the categorical variables
np.random.seed(0)
variable1 = np.random.choice(['A', 'B', 'C'], size=100)
variable2 = np.random.choice(['X', 'Y', 'Z'], size=100)

# Create a contingency table
contingency_table = pd.crosstab(variable1, variable2)

# Print the contingency table
print("Contingency Table:")
print(contingency_table)

# Perform the Chi-square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(contingency_table)

# Print the results
print("\nChi-square Test Results:")
print("Chi-square Statistic:", chi2_stat)
print("P-value:", p_value)
print("Degrees of Freedom:", dof)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("\nReject the null hypothesis. The variables are not independent.")
else:
    print("\nFail to reject the null hypothesis. The variables are independent.")

# Write a Python program to calculate the expected frequencies for a Chi-square test based on observed data
import numpy as np
import pandas as pd

def calculate_expected_frequencies(observed_data):
    """
    Calculate the expected frequencies for a Chi-square test.

    Parameters:
    observed_data (pandas DataFrame): Observed data in the form of a contingency table

    Returns:
    pandas DataFrame: Expected frequencies
    """
    # Calculate the row and column totals
    row_totals = observed_data.sum(axis=1)
    col_totals = observed_data.sum(axis=0)

    # Calculate the grand total
    grand_total = observed_data.sum().sum()

    # Calculate the expected frequencies
    expected_frequencies = np.outer(row_totals, col_totals) / grand_total

    # Convert the expected frequencies to a pandas DataFrame
    expected_frequencies_df = pd.DataFrame(expected_frequencies, index=observed_data.index, columns=observed_data.columns)

    return expected_frequencies_df

# Example usage
np.random.seed(0)
observed_data = pd.DataFrame({
    'A': [10, 20, 30],
    'B': [40, 50, 60]
}, index=['X', 'Y', 'Z'])

print("Observed Data:")
print(observed_data)

expected_frequencies = calculate_expected_frequencies(observed_data)

print("\nExpected Frequencies:")
print(expected_frequencies)

# Perform a goodness-of-fit test using Python to compare the observed data to an expected distribution

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Define the observed data
np.random.seed(0)
observed_data = np.random.normal(0, 1, 100)

# Define the expected distribution
expected_distribution = stats.norm(0, 1)

# Perform the goodness-of-fit test
chi2_stat, p_value = stats.chisquare(observed_data, f_exp=expected_distribution.pdf(np.linspace(-3, 3, 100)))

# Print the results
print("Chi-square Statistic:", chi2_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The observed data does not fit the expected distribution.")
else:
    print("Fail to reject the null hypothesis. The observed data fits the expected distribution.")

# Visualize the data
plt.hist(observed_data, alpha=0.5, label='Observed Data')
x = np.linspace(-3, 3, 100)
plt.plot(x, expected_distribution.pdf(x), 'r-', label='Expected Distribution')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Goodness-of-Fit Test')
plt.show()


# Create a Python script to simulate and visualize the Chi-square distribution and discuss its characteristics

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Set the seed for reproducibility
np.random.seed(0)

# Define the degrees of freedom
dof = 5

# Simulate Chi-square random variables
chi2_rvs = np.random.chisquare(dof, size=1000)

# Create a histogram of the simulated Chi-square random variables
plt.hist(chi2_rvs, bins=30, density=True, alpha=0.5, label='Simulated Chi-square Distribution')

# Plot the theoretical Chi-square distribution
x = np.linspace(0, 20, 100)
y = stats.chi2.pdf(x, dof)
plt.plot(x, y, 'r-', label='Theoretical Chi-square Distribution')

# Add title and labels
plt.title('Chi-square Distribution with {} Degrees of Freedom'.format(dof))
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.legend()

# Display the plot
plt.show()

# Implement an F-test using Python to compare the variances of two random samples

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Generate two random samples with different variances
sample1 = np.random.normal(0, 1, 100)
sample2 = np.random.normal(0, 2, 100)

# Calculate the sample variances
var1 = np.var(sample1, ddof=1)
var2 = np.var(sample2, ddof=1)

# Perform the F-test
f_stat, p_value = stats.f_oneway(sample1, sample2)

# Print the results
print("F-statistic:", f_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. The variances are not significantly different.")

# Visualize the data
plt.hist(sample1, alpha=0.5, label='Sample 1')
plt.hist(sample2, alpha=0.5, label='Sample 2')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Sample Data')
plt.show()


# Write a Python program to perform an ANOVA test to compare means between multiple groups and interpret the results
import numpy as np
from scipy import stats
import pandas as pd

# Set the seed for reproducibility
np.random.seed(0)

# Generate sample data
group1 = np.random.normal(0, 1, 10)
group2 = np.random.normal(1, 1, 10)
group3 = np.random.normal(2, 1, 10)

# Create a pandas DataFrame
data = pd.DataFrame({
    'Group': ['Group1']*10 + ['Group2']*10 + ['Group3']*10,
    'Value': np.concatenate((group1, group2, group3))
})

# Perform the ANOVA test
anova_stat, p_value = stats.f_oneway(group1, group2, group3)

# Print the results
print("ANOVA Statistic:", anova_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The means are significantly different.")
else:
    print("Fail to reject the null hypothesis. The means are not significantly different.")

# Visualize the data
import matplotlib.pyplot as plt
plt.boxplot([group1, group2, group3], labels=['Group1', 'Group2', 'Group3'])
plt.xlabel('Group')
plt.ylabel('Value')
plt.title('Boxplot of Group Data')
plt.show()


# Perform a one-way ANOVA test using Python to compare the means of different groups and plot the results

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Generate sample data
group1 = np.random.normal(0, 1, 10)
group2 = np.random.normal(1, 1, 10)
group3 = np.random.normal(2, 1, 10)

# Perform the one-way ANOVA test
anova_stat, p_value = stats.f_oneway(group1, group2, group3)

# Print the results
print("ANOVA Statistic:", anova_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The means are significantly different.")
else:
    print("Fail to reject the null hypothesis. The means are not significantly different.")

# Plot the results
plt.figure(figsize=(8, 6))
plt.boxplot([group1, group2, group3], labels=['Group1', 'Group2', 'Group3'])
plt.xlabel('Group')
plt.ylabel('Value')
plt.title('Boxplot of Group Data')
plt.show()

# Plot the means and standard errors
means = [np.mean(group1), np.mean(group2), np.mean(group3)]
std_errors = [np.std(group1) / np.sqrt(len(group1)), np.std(group2) / np.sqrt(len(group2)), np.std(group3) / np.sqrt(len(group3))]
plt.figure(figsize=(8, 6))
plt.errorbar(range(len(means)), means, yerr=std_errors, fmt='o')
plt.xlabel('Group')
plt.ylabel('Mean')
plt.title('Means and Standard Errors of Group Data')
plt.show()


# Write a Python function to check the assumptions (normality, independence, and equal variance) for ANOVA
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

def check_anova_assumptions(data, group_variable, response_variable):
    """
    Check the assumptions (normality, independence, and equal variance) for ANOVA.

    Parameters:
    data (pandas DataFrame): Data containing the group variable and response variable
    group_variable (str): Name of the group variable
    response_variable (str): Name of the response variable

    Returns:
    None
    """
    # Check normality assumption
    print("Normality Assumption:")
    for group in data[group_variable].unique():
        group_data = data[data[group_variable] == group][response_variable]
        stat, p_value = stats.shapiro(group_data)
        print(f"Group {group}: Shapiro-Wilk statistic = {stat:.4f}, p-value = {p_value:.4f}")
        if p_value < 0.05:
            print("Normality assumption not met for this group.")

    # Check independence assumption
    print("\nIndependence Assumption:")
    print("This assumption is difficult to check statistically, but it can be checked using residual plots.")

    # Check equal variance assumption
    print("\nEqual Variance Assumption:")
    for group in data[group_variable].unique():
        group_data = data[data[group_variable] == group][response_variable]
        print(f"Group {group}: Variance = {group_data.var():.4f}")
    stat, p_value = stats.levene(*[data[data[group_variable] == group][response_variable] for group in data[group_variable].unique()])
    print(f"Levene's statistic = {stat:.4f}, p-value = {p_value:.4f}")
    if p_value < 0.05:
        print("Equal variance assumption not met.")

    # Plot residual plots to check independence assumption
    plt.figure(figsize=(8, 6))
    for group in data[group_variable].unique():
        group_data = data[data[group_variable] == group][response_variable]
        residuals = group_data - group_data.mean()
        plt.scatter(group_data, residuals, label=group)
    plt.xlabel('Response Variable')
    plt.ylabel('Residuals')
    plt.title('Residual Plots')
    plt.legend()
    plt.show()


# Perform a two-way ANOVA test using Python to study the interaction between two factors and visualize the results
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the seed for reproducibility
np.random.seed(0)

# Generate sample data
factor1 = np.repeat(['A', 'B', 'C'], 10)
factor2 = np.tile(['X', 'Y'], 15)
response = np.random.normal(0, 1, 30)

# Create a pandas DataFrame
data = pd.DataFrame({
    'Factor1': factor1,
    'Factor2': factor2,
    'Response': response
})

# Perform the two-way ANOVA test
anova_table = stats.anova_lm(stats.ols('Response ~ C(Factor1) + C(Factor2) + C(Factor1):C(Factor2)', data).fit())

# Print the ANOVA table
print(anova_table)

# Visualize the interaction between the two factors
sns.set()
plt.figure(figsize=(8, 6))
sns.lineplot(x='Factor2', y='Response', hue='Factor1', data=data)
plt.xlabel('Factor 2')
plt.ylabel('Response')
plt.title('Interaction between Factor 1 and Factor 2')
plt.show()

# Visualize the main effects of the two factors
plt.figure(figsize=(8, 6))
sns.boxplot(x='Factor1', y='Response', data=data)
plt.xlabel('Factor 1')
plt.ylabel('Response')
plt.title('Main Effect of Factor 1')
plt.show()

plt.figure(figsize=(8, 6))
sns.boxplot(x='Factor2', y='Response', data=data)
plt.xlabel('Factor 2')
plt.ylabel('Response')
plt.title('Main Effect of Factor 2')
plt.show()


# Write a Python program to visualize the F-distribution and discuss its use in hypothesis testing
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Define the degrees of freedom
df1 = 5
df2 = 10

# Generate F-distribution values
x = np.linspace(0, 10, 100)
y = stats.f.pdf(x, df1, df2)

# Create the plot
plt.figure(figsize=(8, 6))
plt.plot(x, y, label=f'F-Distribution (df1={df1}, df2={df2})')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.title('F-Distribution')
plt.legend()
plt.show()

# Perform a one-way ANOVA test in Python and visualize the results with boxplots to compare group means
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Generate sample data
group1 = np.random.normal(0, 1, 10)
group2 = np.random.normal(1, 1, 10)
group3 = np.random.normal(2, 1, 10)

# Create a pandas DataFrame
data = pd.DataFrame({
    'Group': np.repeat(['Group1', 'Group2', 'Group3'], 10),
    'Value': np.concatenate((group1, group2, group3))
})

# Perform the one-way ANOVA test
anova_stat, p_value = stats.f_oneway(group1, group2, group3)

# Print the results
print("ANOVA Statistic:", anova_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The group means are significantly different.")
else:
    print("Fail to reject the null hypothesis. The group means are not significantly different.")

# Visualize the results with boxplots
plt.figure(figsize=(8, 6))
plt.boxplot([group1, group2, group3], labels=['Group1', 'Group2', 'Group3'])
plt.xlabel('Group')
plt.ylabel('Value')
plt.title('Boxplot of Group Data')
plt.show()

# Simulate random data from a normal distribution, then perform hypothesis testing to evaluate the means
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Simulate random data from a normal distribution
mu1, mu2 = 0, 1  # means
sigma1, sigma2 = 1, 1  # standard deviations
n1, n2 = 100, 100  # sample sizes

data1 = np.random.normal(mu1, sigma1, n1)
data2 = np.random.normal(mu2, sigma2, n2)

# Perform hypothesis testing
alpha = 0.05  # significance level

# Null hypothesis: mu1 = mu2
# Alternative hypothesis: mu1 != mu2

t_stat, p_value = stats.ttest_ind(data1, data2)

print("T-statistic:", t_stat)
print("P-value:", p_value)

if p_value < alpha:
    print("Reject the null hypothesis. The means are significantly different.")
else:
    print("Fail to reject the null hypothesis. The means are not significantly different.")

# Visualize the data
plt.hist(data1, alpha=0.5, label='Data 1')
plt.hist(data2, alpha=0.5, label='Data 2')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Simulated Data')
plt.show()


#  Perform a hypothesis test for population variance using a Chi-square distribution and interpret the results
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Generate sample data
n = 100  # sample size
sigma = 1  # population standard deviation
data = np.random.normal(0, sigma, n)

# Define the null and alternative hypotheses
null_hypothesis = "The population variance is equal to 1."
alternative_hypothesis = "The population variance is not equal to 1."

# Calculate the test statistic and p-value
chi2_stat = (n - 1) * np.var(data) / (sigma ** 2)
p_value = 2 * (1 - stats.chi2.cdf(chi2_stat, n - 1))

# Print the results
print("Null Hypothesis:", null_hypothesis)
print("Alternative Hypothesis:", alternative_hypothesis)
print("Test Statistic (Chi-square):", chi2_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis. The population variance is significantly different from 1.")
else:
    print("Fail to reject the null hypothesis. The population variance is not significantly different from 1.")

# Visualize the Chi-square distribution
x = np.linspace(0, 20, 100)
y = stats.chi2.pdf(x, n - 1)
plt.plot(x, y)
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.title('Chi-square Distribution')
plt.show()


# Write a Python script to perform a Z-test for comparing proportions between two datasets or groups
import numpy as np
from scipy import stats

# Define the sample sizes and proportions
n1 = 100  # sample size 1
n2 = 150  # sample size 2
p1 = 0.4  # proportion 1
p2 = 0.6  # proportion 2

# Calculate the pooled proportion
p_pooled = (n1 * p1 + n2 * p2) / (n1 + n2)

# Calculate the standard error
se = np.sqrt(p_pooled * (1 - p_pooled) * (1/n1 + 1/n2))

# Calculate the Z-statistic
z_stat = (p1 - p2) / se

# Calculate the p-value
p_value = 2 * (1 - stats.norm.cdf(np.abs(z_stat)))

# Print the results
print("Z-statistic:", z_stat)
print("p-value:", p_value)

# Interpret the results
alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis. The proportions are significantly different.")
else:
    print("Fail to reject the null hypothesis. The proportions are not significantly different.")


# Implement an F-test for comparing the variances of two datasets, then interpret and visualize the results
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Generate two datasets with different variances
dataset1 = np.random.normal(0, 1, 100)
dataset2 = np.random.normal(0, 2, 100)

# Calculate the variances
var1 = np.var(dataset1, ddof=1)
var2 = np.var(dataset2, ddof=1)

# Perform the F-test
f_stat, p_value = stats.f_oneway(dataset1, dataset2)

# Print the results
print("F-statistic:", f_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. The variances are not significantly different.")

# Visualize the results
plt.figure(figsize=(8, 6))
plt.subplot(1, 2, 1)
plt.hist(dataset1, alpha=0.5, label='Dataset 1')
plt.hist(dataset2, alpha=0.5, label='Dataset 2')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Datasets')

plt.subplot(1, 2, 2)
plt.boxplot([dataset1, dataset2], labels=['Dataset 1', 'Dataset 2'])
plt.xlabel('Dataset')
plt.ylabel('Value')
plt.title('Boxplot of Datasets')

plt.tight_layout()
plt.show()


# Perform a Chi-square test for goodness of fit with simulated data and analyze the results.
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Set the seed for reproducibility
np.random.seed(0)

# Simulate data from a normal distribution
n = 1000  # sample size
mu = 0  # mean
sigma = 1  # standard deviation
data = np.random.normal(mu, sigma, n)

# Define the expected frequencies under the null hypothesis
expected_frequencies = np.array([0.2, 0.3, 0.3, 0.2])  # expected frequencies under the null hypothesis

# Create bins for the data
bins = np.array([-np.inf, -1, 0, 1, np.inf])  # bins for the data

# Calculate the observed frequencies
observed_frequencies, _ = np.histogram(data, bins=bins)

# Calculate the Chi-square statistic
chi2_stat = np.sum((observed_frequencies - n * expected_frequencies) ** 2 / (n * expected_frequencies))

# Calculate the p-value
p_value = 1 - stats.chi2.cdf(chi2_stat, df=3)  # p-value

# Print the results
print("Chi-square Statistic:", chi2_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis. The data do not fit the expected distribution.")
else:
    print("Fail to reject the null hypothesis. The data fit the expected distribution.")

# Visualize the results
plt.hist(data, bins=bins, alpha=0.5, label='Observed Frequencies')
plt.bar(bins[:-1], n * expected_frequencies, alpha=0.5, label='Expected Frequencies')
plt.legend()
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Simulated Data')
plt.show()




























    