In [4]:
import pandas as pd
from scipy.stats import pearsonr
import numpy as np

# Load the data
cap_pull_data = pd.read_csv('Corr-and-Assoc/CapPull.dat', sep='\t')

# Extract the necessary columns
fid = cap_pull_data['fid']
cpf = cap_pull_data['cpf']

# Calculate the Pearson correlation coefficient and the p-value
correlation_coefficient, p_value = pearsonr(fid, cpf)

# Print the correlation coefficient (sample statistic) and p-value
print(f"Correlation coefficient (r): {correlation_coefficient:.4f}")
print(f"P-value: {p_value:.4f}")

# Test the hypothesis
alpha = 0.10
if p_value < alpha:
    print("Reject the Null Hypothesis")
else:
    print("Fail to Reject the Null Hypothesis")


Correlation coefficient (r): -0.0540
P-value: 0.8211
Fail to Reject the Null Hypothesis


In [5]:
import scipy.stats as stats

# Given correlation values and sample size
old_correlation = 0.73
new_correlation = 0.84
n = 75

# Fisher's Z transformation
z_old = 0.5 * np.log((1 + old_correlation) / (1 - old_correlation))
z_new = 0.5 * np.log((1 + new_correlation) / (1 - new_correlation))

# Standard error
se = np.sqrt(1/(n - 3))

# Z-test statistic
z_stat = (z_new - z_old) / se
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))

# Print the results
print(f"Z statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Test the hypothesis
alpha = 0.05
if p_value < alpha:
    print("Reject the Null Hypothesis")
else:
    print("Fail to Reject the Null Hypothesis")


Z statistic: 2.4815
P-value: 0.0131
Reject the Null Hypothesis


In [7]:
lawn_chr_data = pd.read_csv('Corr-and-Assoc/LawnChr.dat', sep='\t')


In [8]:
lawn_chr_data.head()

Unnamed: 0,Surface_Quality,Speed_FPM
0,5,1200
1,4,1300
2,7,1100
3,3,1000
4,5,1200


In [9]:
import pandas as pd
from scipy.stats import pearsonr

# Load the data
lawn_chr_data = pd.read_csv('Corr-and-Assoc/LawnChr.dat', sep='\t')

# Extract the necessary columns with the correct names
speed = lawn_chr_data['Speed_FPM']
surface_quality = lawn_chr_data['Surface_Quality']

# Calculate the Pearson correlation coefficient and the p-value
correlation_coefficient, p_value = pearsonr(speed, surface_quality)

# Print the correlation coefficient (sample statistic) and p-value
print(f"Correlation coefficient (r): {correlation_coefficient:.4f}")
print(f"P-value: {p_value:.4f}")

# Test the hypothesis
alpha = 0.05
if p_value < alpha:
    print("Reject the Null Hypothesis")
else:
    print("Fail to Reject the Null Hypothesis")


Correlation coefficient (r): -0.0712
P-value: 0.7655
Fail to Reject the Null Hypothesis


In [12]:
beverage_data = pd.read_csv('Corr-and-Assoc/Beverage.dat', sep='\t')


In [13]:
beverage_data.head()

Unnamed: 0,ct,region,count
0,1,1,165
1,1,2,77
2,1,3,58
3,1,4,99
4,2,1,73


In [14]:
import pandas as pd
import scipy.stats as stats

# Load the data
beverage_data = pd.read_csv('Corr-and-Assoc/Beverage.dat', sep='\t')

# Create a contingency table using 'ct' and 'region' with 'count' as values
contingency_table = beverage_data.pivot_table(index='region', columns='ct', values='count', aggfunc='sum')

# Perform chi-square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(contingency_table)

# Print the results
print(f"Chi-square statistic: {chi2_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Test the hypothesis
alpha = 0.05
if p_value < alpha:
    print("Reject the Null Hypothesis")
else:
    print("Fail to Reject the Null Hypothesis")


Chi-square statistic: 106.0855
P-value: 0.0000
Reject the Null Hypothesis


In [15]:
import pandas as pd
import scipy.stats as stats

# Load the data from Beverage.dat
beverage_data = pd.read_csv('Corr-and-Assoc/Beverage.dat', sep='\t')

# Create a contingency table using 'region' and 'ct' with 'count' as values
contingency_table = beverage_data.pivot_table(index='region', columns='ct', values='count', aggfunc='sum')

# Perform chi-square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(contingency_table)

# Calculate Cramer's V for effect size
n = contingency_table.sum().sum()
cramers_v = (chi2_stat / (n * (min(contingency_table.shape) - 1))) ** 0.5

# Print the results
print(f"Chi-square statistic: {chi2_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Cramer's V: {cramers_v:.4f}")

# Test the hypothesis
alpha = 0.05
if p_value < alpha:
    conclusion = "Reject the Null Hypothesis"
else:
    conclusion = "Fail to Reject the Null Hypothesis"

print(f"Conclusion: {conclusion}")


Chi-square statistic: 106.0855
P-value: 0.0000
Cramer's V: 0.2102
Conclusion: Reject the Null Hypothesis


In [18]:
sales_data = pd.read_csv('Corr-and-Assoc/Sales.dat', sep='\t')


In [19]:
sales_data.head()

Unnamed: 0,clerk,time
0,1,473
1,1,189
2,1,140
3,1,125
4,1,46


In [20]:
import pandas as pd
import scipy.stats as stats

# Load the data from Sales.dat
sales_data = pd.read_csv('Corr-and-Assoc/Sales.dat', sep='\t')

# Display first few rows to verify
print(sales_data.head())

# List of unique clerks
clerks = sales_data['clerk'].unique()

# Prepare data for ANOVA
groups = [sales_data[sales_data['clerk'] == clerk]['time'] for clerk in clerks]

# Perform One-Way ANOVA
f_statistic, p_value = stats.f_oneway(*groups)

# Calculate Eta Squared for effect size
ss_between = sum(len(group) * (group.mean() - sales_data['time'].mean())**2 for group in groups)
ss_total = sum((sales_data['time'] - sales_data['time'].mean())**2)
eta_squared = ss_between / ss_total

# Print results
print(f"F-statistic: {f_statistic:.3f}")
print(f"P-value: {p_value:.3f}")
print(f"Eta Squared: {eta_squared:.3f}")

# Determine conclusion
alpha = 0.05
if p_value < alpha:
    conclusion = "Reject the Null Hypothesis"
else:
    conclusion = "Fail to Reject the Null Hypothesis"

print(f"Conclusion: {conclusion}")


   clerk  time
0      1   473
1      1   189
2      1   140
3      1   125
4      1    46
F-statistic: 9.440
P-value: 0.005
Eta Squared: 0.252
Conclusion: Reject the Null Hypothesis
