In [None]:
import numpy as np
import csv

def one_sample_ttest(data, popmean, alpha):
    """Performs a one-sample t-test."""
    t_statistic = (np.mean(data) - popmean) / (np.std(data, ddof=1) / np.sqrt(len(data)))
    df = len(data) - 1  # Degrees of freedom
    critical_value = critical_values[alpha][df] if df <= 10 and alpha in critical_values else "dof/alpha not in dictionary"
    return t_statistic, critical_value

def unpaired_ttest(data1, data2, alpha):
    """Performs an unpaired t-test."""
    n1, n2 = len(data1), len(data2)
    x1_bar, x2_bar = np.mean(data1), np.mean(data2)
    s1_sq, s2_sq = np.var(data1, ddof=1), np.var(data2, ddof=1)

    sp = np.sqrt(((n1 - 1) * s1_sq + (n2 - 1) * s2_sq) / (n1 + n2 - 2))
    t_statistic = (x1_bar - x2_bar) / (sp * np.sqrt(1/n1 + 1/n2))

    df = n1 + n2 - 2
    critical_value = critical_values[alpha][df] if df <= 10 and alpha in critical_values else "dof/alpha not in dictionary"
    return t_statistic, critical_value

def paired_ttest(data1, data2, alpha):
    """Performs a paired t-test."""
    diff = np.array(data1) - np.array(data2)
    t_statistic = np.mean(diff) / (np.std(diff, ddof=1) / np.sqrt(len(diff)))
    df = len(diff) - 1  # Degrees of freedom
    critical_value = critical_values[alpha][df] if df <= 10 and alpha in critical_values else "dof/alpha not in dictionary"
    return t_statistic, critical_value

# Manually defined dictionary of critical values
critical_values = {
    0.05: {
        1: 12.706, 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571,
        6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228
    },
    0.025: {
        1: 63.657, 2: 9.925, 3: 5.841, 4: 4.604, 5: 4.032,
        6: 3.707, 7: 3.499, 8: 3.355, 9: 3.250, 10: 3.169
    },
    0.01: {
        1: 318.313, 2: 22.327, 3: 10.215, 4: 7.173, 5: 5.893,
        6: 5.208, 7: 4.782, 8: 4.499, 9: 4.296, 10: 4.143
    }
}

# User input
test_type = input("Enter the type of t-test (one-sample, unpaired, paired): ")
file_path1 = input("Enter the file path for dataset 1 (CSV): ")
alpha = float(input("Enter the significance level (alpha): "))

# Read data from CSV file 1
with open(file_path1, 'r') as file1:
    reader1 = csv.reader(file1)
    data1 = [float(row[0]) for row in reader1]  # Assuming data is in the first column

if test_type in ("unpaired", "paired"):
    file_path2 = input("Enter the file path for dataset 2 (CSV): ")
    with open(file_path2, 'r') as file2:
        reader2 = csv.reader(file2)
        data2 = [float(row[0]) for row in reader2]  # Assuming data is in the first column

# Get population mean if needed
if test_type == "one-sample":
    popmean = float(input("Enter the population mean: "))

# Perform the selected t-test
if test_type == "one-sample":
    t_statistic, critical_value = one_sample_ttest(data1, popmean, alpha)
elif test_type == "unpaired":
    t_statistic, critical_value = unpaired_ttest(data1, data2, alpha)
elif test_type == "paired":
    t_statistic, critical_value = paired_ttest(data1, data2, alpha)
else:
    print("Invalid test type.")
    exit()

# Print results
print("T-statistic:", t_statistic)
print("Critical value:", critical_value)

Enter the type of t-test (one-sample, unpaired, paired): one-sample


KeyboardInterrupt: Interrupted by user

In [None]:

def one_sample_ttest(data, popmean, alpha):
    """Performs a one-sample t-test."""
    t_statistic = (np.mean(data) - popmean) / (np.std(data, ddof=1) / np.sqrt(len(data)))
    df = len(data) - 1  # Degrees of freedom
    critical_value = critical_values[alpha][df] if df <= 10 and alpha in critical_values else "dof/alpha not in dictionary"
    return t_statistic, critical_value

critical_values = {
    0.05: {
        1: 12.71, 2: 4.303, 3: 3.182, 4: 2.776, 5: 2.571,
        6: 2.447, 7: 2.365, 8: 2.306, 9: 2.262, 10: 2.228
    },
    0.20: {
        1: 3.078, 2: 1.886, 3: 1.638, 4:1.533, 5: 1.476,
        6: 1.440, 7: 1.415, 8:1.397, 9: 1.383, 10: 1.372
    },
    0.5: {
        1: 1.00, 2:0.816, 3: 0.765, 4: 0.741, 5: 0.727,
        6: 0.718, 7:0.711 ,8:0.706, 9: 0.703, 10: 0.700
    }
}
test_type = input("Enter the type of t-test (one-sample, unpaired, paired): ")
alpha = float(input("Enter the significance level (alpha): "))


if test_type in ("unpaired", "paired"):
    file_path1 = input("Enter the file path for dataset 1 (CSV): ")
    with open(file_path1, 'r') as file1:
      reader1 = csv.reader(file1)
      data1 = [float(row[0]) for row in reader1]
    file_path2 = input("Enter the file path for dataset 2 (CSV): ")
    with open(file_path2, 'r') as file2:
        reader2 = csv.reader(file2)
        data2 = [float(row[0]) for row in reader2]

if test_type == "one-sample":
    popmean = float(input("Enter the population mean: "))
    t_statistic, critical_value = one_sample_ttest(data1, popmean, alpha)
data1 = [float(row[0]) for row in reader1]

Enter the type of t-test (one-sample, unpaired, paired): one-sample
Enter the significance level (alpha): 0.05
Enter the population mean: 75


NameError: name 'data1' is not defined

In [None]:
import math

def get_t_distribution():
    return {
        # Degrees of Freedom: {Significance Levels (Two-Tailed)}
        1: {0.05: 12.71, 0.20: 3.078, 0.50: 1.000},
        2: {0.05: 4.303, 0.20: 1.886, 0.50: 0.816},
        3: {0.05: 3.182, 0.20: 1.638, 0.50: 0.765},
        4: {0.05: 2.776, 0.20: 1.533, 0.50: 0.741 },
        5: {0.05: 2.571, 0.20: 1.476, 0.50: 0.727},
        6: {0.05: 2.447, 0.20: 1.440, 0.50: 0.718},
        7: {0.05: 2.365, 0.20: 1.415, 0.50: 0.711},
        8: {0.05: 2.306, 0.20: 1.397, 0.50: 0.706},
        9: {0.05: 2.262, 0.20: 1.383, 0.50:  0.703},
        10: {0.05: 2.228, 0.20: 1.372, 0.50: 0.700},
        11: {0.05: 2.201, 0.20: 1.363, 0.50: 0.697},
        12: {0.05: 2.179, 0.20: 1.356, 0.50: 0.695},
        13: {0.05: 2.160, 0.20: 1.350, 0.50: 0.694},
        14: {0.05: 2.145, 0.20: 1.345, 0.50: 0.692},
        15: {0.05: 2.131, 0.20: 1.341, 0.50: 0.691},
    }

def one_sample_test():
    population_mean = float(input("Enter the population mean: "))
    sample_mean = float(input("Enter the sample mean: "))
    std_dev = float(input("Enter the standard deviation: "))
    sample_size = int(input("Enter the sample size: "))
    alpha = float(input("Enter the significance level (e.g., 0.05, 0.20, 0.50): "))

    t_calculated = abs((sample_mean - population_mean) / (std_dev / math.sqrt(sample_size)))
    degrees_of_freedom = sample_size - 1

    t_distribution = get_t_distribution()

    if degrees_of_freedom in t_distribution and alpha in t_distribution[degrees_of_freedom]:
        t_critical = t_distribution[degrees_of_freedom][alpha]
        print(f"Calculated t-value: {t_calculated:.3f}")
        print(f"Critical t-value: {t_critical}")
        if t_calculated > t_critical:
            print("Result: Reject the null hypothesis.")
        else:
            print("Result: Accept the null hypothesis.")
    else:
        print("Critical t-value not available for the given degrees of freedom and alpha.")

one_sample_test()

Enter the population mean: 75
Enter the sample mean: 72
Enter the standard deviation: 5
Enter the sample size: 10
Enter the significance level (e.g., 0.05, 0.20, 0.50): 0.05
Calculated t-value: 1.897
Critical t-value: 2.262
Result: Fail to reject the null hypothesis.


In [None]:
def unpaired_test():
  file_path1 = input("Enter the file path for dataset 1 (CSV): ")
  with open(file_path1, 'r') as file1:
      reader1 = csv.reader(file1)
      data1 = [float(row[0]) for row in reader1]
  mean1 = sum(data1) / len(data1)
  variance1 = sum([(x - mean1)**2 for x in data1]) / (len(data1) - 1)  # Sample variance


  file_path2 = input("Enter the file path for dataset 2 (CSV): ")
  with open(file_path2, 'r') as file2:
      reader2 = csv.reader(file2)
      data2 = [float(row[0]) for row in reader2]
  mean2 = sum(data2) / len(data2)
  variance2 = sum([(x - mean2)**2 for x in data2]) / (len(data2) - 1)  # Sample variance
  alpha = float(input("Enter the significance level (e.g., 0.05, 0.20, 0.50): "))
  n1,n2=len(data1),len(data2)
  t_statistic = (mean1 - mean2) / math.sqrt((variance1 / n1) + (variance2 / n2))
  t_distribution = get_t_distribution()
  degrees_of_freedom = n1+n2-2

  if degrees_of_freedom in t_distribution and alpha in t_distribution[degrees_of_freedom]:
        t_critical = t_distribution[degrees_of_freedom][alpha]
        print(f"Calculated t-value: {t_statistic:.3f}")
        print(f"Critical t-value: {t_critical}")
        if t_statistic > t_critical:
            print("Result: Reject the null hypothesis.")
        else:
            print("Result: Accept the null hypothesis.")
  else:
        print("Critical t-value not available for the given degrees of freedom and alpha.")
unpaired_test()

Enter the file path for dataset 1 (CSV): /content/drive/MyDrive/DA/A - Sheet1.csv
Enter the file path for dataset 2 (CSV): /content/drive/MyDrive/DA/B - Sheet1.csv
Enter the significance level (e.g., 0.05, 0.20, 0.50): 0.05
Calculated t-value: 4.801
Critical t-value: 2.306
Result: Reject the null hypothesis.


In [None]:
import math
import csv

def paired_test():
    file_path = input("Enter the file path for the dataset (CSV): ")
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row if present
        data = []
        for row in reader:
            try:
                x1 = float(row[0])
                x2 = float(row[1])
                data.append((x1, x2))  # Store as tuples (x1, x2)
            except (IndexError, ValueError):
                print(f"Skipping invalid row: {row}")

    alpha = float(input("Enter the significance level (e.g., 0.05, 0.20, 0.50): "))

    # Calculate differences
    differences = [x1 - x2 for x1, x2 in data]
     # Calculate mean and variance of differences
    mean_diff = sum(differences) / len(differences)
    variance_diff = sum([(x - mean_diff)**2 for x in differences]) / (len(differences) - 1)

    # Calculate t-statistic
    t_statistic = mean_diff / (math.sqrt(variance_diff / len(differences)))

    # Degrees of freedom
    degrees_of_freedom = len(differences) - 1

    t_distribution = get_t_distribution()

    # Check if degrees of freedom and alpha are in the t_distribution dictionary
    if degrees_of_freedom in t_distribution and alpha in t_distribution[degrees_of_freedom]:
        t_critical = t_distribution[degrees_of_freedom][alpha]
        print(f"Calculated t-value: {t_statistic:.3f}")
        print(f"Critical t-value: {t_critical:.3f}")

        # Check if calculated t-statistic exceeds the critical value
        if abs(t_statistic) > t_critical:  # Use abs() for two-tailed test
            print("Result: Reject the null hypothesis.")
        else:
            print("Result: Fail to reject the null hypothesis.")
    else:
        print("Critical t-value not available for the given degrees of freedom and alpha.")
paired_test()

Enter the file path for the dataset (CSV): /content/drive/MyDrive/DA/k1 - Sheet1.csv
Enter the significance level (e.g., 0.05, 0.20, 0.50): 0.50
Calculated t-value: -4.264
Critical t-value: 0.694
Result: Reject the null hypothesis.


**CHI-SQAURE-TEST**

In [None]:
import math

def get_chi_square_critical_values():
    return {
        # Degrees of Freedom: {Significance Levels}
        1: {0.05: 3.841, 0.20:1.642, 0.50:0.455},
        2: {0.05: 5.991, 0.20:3.219, 0.50:1.386},
        3: {0.05: 7.815, 0.20:4.642, 0.50:2.366},
        4: {0.05: 9.488, 0.20:5.989, 0.50:3.357},
        5: {0.05: 11.070, 0.20:7.289, 0.50:4.351},
        6: {0.05: 12.592, 0.20:8.558, 0.50:5.348},
        7: {0.05: 14.067, 0.20:9.803, 0.50:6.346},
        8: {0.05: 15.507, 0.20:11.030, 0.50:7.344},
        9: {0.05: 16.919, 0.20:12.242, 0.50:8.343},
        10: {0.05: 18.307, 0.20:13.442, 0.50:9.342}
    }

def chi_square_test():
    file_path = input("Enter the file path for the dataset (CSV): ")

    # Read data from CSV
    observed_frequencies = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip header if present
        for row in reader:
            try:
                observed_frequencies.append([int(x) for x in row])
            except ValueError:
                print(f"Skipping invalid row: {row}")

    # Get total observations
    total_observations = sum(sum(row) for row in observed_frequencies)

    # Calculate expected frequencies (assuming independence)
    num_rows = len(observed_frequencies)
    num_cols = len(observed_frequencies[0])
    expected_frequencies = [[0] * num_cols for _ in range(num_rows)]

    for i in range(num_rows):
        for j in range(num_cols):
            row_sum = sum(observed_frequencies[i])
            col_sum = sum(observed_frequencies[k][j] for k in range(num_rows))
            expected_frequencies[i][j] = (row_sum * col_sum) / total_observations

    # Calculate Chi-square statistic
    chi_square_statistic = 0
    for i in range(num_rows):
        for j in range(num_cols):
            chi_square_statistic += ((observed_frequencies[i][j] - expected_frequencies[i][j])**2) / expected_frequencies[i][j]

    # Get degrees of freedom
    degrees_of_freedom = (num_rows - 1) * (num_cols - 1)

    alpha = float(input("Enter the significance level (e.g., 0.05, 0.20, 0.50): "))
    chi_square_distribution = get_chi_square_critical_values()

    if degrees_of_freedom in chi_square_distribution and alpha in chi_square_distribution[degrees_of_freedom]:
        critical_value = chi_square_distribution[degrees_of_freedom][alpha]
        print(f"Calculated Chi-square value: {chi_square_statistic:.3f}")
        print(f"Critical Chi-square value: {critical_value:.3f}")
        if chi_square_statistic > critical_value:
            print("Result: Reject the null hypothesis.")
        else:
            print("Result: Fail to reject the null hypothesis.")
    else:
        print("Critical Chi-square value not available for the given degrees of freedom and alpha.")

chi_square_test()

Enter the file path for the dataset (CSV): /content/drive/MyDrive/DA/k1 - xy.csv
Enter the significance level (e.g., 0.05, 0.20, 0.50): 0.05
Calculated Chi-square value: 18.177
Critical Chi-square value: 7.815
Result: Reject the null hypothesis.


**T-TEST (ONE SAMPLE,PAIRED,UNPAIRED)**

In [None]:
import math
import csv

def get_t_distribution():
    return {
        1: {0.05: 12.71, 0.20: 3.078, 0.50: 1.000},
        2: {0.05: 4.303, 0.20: 1.886, 0.50: 0.816},
        3: {0.05: 3.182, 0.20: 1.638, 0.50: 0.765},
        4: {0.05: 2.776, 0.20: 1.533, 0.50: 0.741},
        5: {0.05: 2.571, 0.20: 1.476, 0.50: 0.727},
        6: {0.05: 2.447, 0.20: 1.440, 0.50: 0.718},
        7: {0.05: 2.365, 0.20: 1.415, 0.50: 0.711},
        8: {0.05: 2.306, 0.20: 1.397, 0.50: 0.706},
        9: {0.05: 2.262, 0.20: 1.383, 0.50: 0.703},
        10: {0.05: 2.228, 0.20: 1.372, 0.50: 0.700},
        11: {0.05: 2.201, 0.20: 1.363, 0.50: 0.697},
        12: {0.05: 2.179, 0.20: 1.356, 0.50: 0.695},
        13: {0.05: 2.160, 0.20: 1.350, 0.50: 0.694},
        14: {0.05: 2.145, 0.20: 1.345, 0.50: 0.692},
        15: {0.05: 2.131, 0.20: 1.341, 0.50: 0.691},
    }

def one_sample_test():
    population_mean = float(input("Enter the population mean: "))
    sample_mean = float(input("Enter the sample mean: "))
    std_dev = float(input("Enter the standard deviation: "))
    sample_size = int(input("Enter the sample size: "))
    alpha = float(input("Enter the significance level (e.g., 0.05, 0.20, 0.50): "))

    t_calculated = abs((sample_mean - population_mean) / (std_dev / math.sqrt(sample_size)))
    degrees_of_freedom = sample_size - 1

    t_distribution = get_t_distribution()

    if degrees_of_freedom in t_distribution and alpha in t_distribution[degrees_of_freedom]:
        t_critical = t_distribution[degrees_of_freedom][alpha]
        print(f"Calculated t-value: {t_calculated:.3f}")
        print(f"Critical t-value: {t_critical}")
        if t_calculated > t_critical:
            print("Result: Reject the null hypothesis.")
        else:
            print("Result: Accept the null hypothesis.")
    else:
        print("Critical t-value not available for the given degrees of freedom and alpha.")

def unpaired_test():
    file_path1 = input("Enter the file path for dataset 1 (CSV): ")
    with open(file_path1, 'r') as file1:
        reader1 = csv.reader(file1)
        data1 = [float(row[0]) for row in reader1]
    mean1 = sum(data1) / len(data1)
    variance1 = sum([(x - mean1)**2 for x in data1]) / (len(data1) - 1)

    file_path2 = input("Enter the file path for dataset 2 (CSV): ")
    with open(file_path2, 'r') as file2:
        reader2 = csv.reader(file2)
        data2 = [float(row[0]) for row in reader2]
    mean2 = sum(data2) / len(data2)
    variance2 = sum([(x - mean2)**2 for x in data2]) / (len(data2) - 1)

    alpha = float(input("Enter the significance level (e.g., 0.05, 0.20, 0.50): "))
    n1, n2 = len(data1), len(data2)
    t_statistic = (mean1 - mean2) / math.sqrt((variance1 / n1) + (variance2 / n2))
    t_distribution = get_t_distribution()
    degrees_of_freedom = n1 + n2 - 2

    if degrees_of_freedom in t_distribution and alpha in t_distribution[degrees_of_freedom]:
        t_critical = t_distribution[degrees_of_freedom][alpha]
        print(f"Calculated t-value: {t_statistic:.3f}")
        print(f"Critical t-value: {t_critical}")
        if t_statistic > t_critical:
            print("Result: Reject the null hypothesis.")
        else:
            print("Result: Accept the null hypothesis.")
    else:
        print("Critical t-value not available for the given degrees of freedom and alpha.")

def paired_test():
    file_path = input("Enter the file path for the dataset (CSV): ")
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row if present
        data = []
        for row in reader:
            try:
                x1 = float(row[0])
                x2 = float(row[1])
                data.append((x1, x2))  # Store as tuples (x1, x2)
            except (IndexError, ValueError):
                print(f"Skipping invalid row: {row}")

    alpha = float(input("Enter the significance level (e.g., 0.05, 0.20, 0.50): "))

    # Calculate differences
    differences = [x1 - x2 for x1, x2 in data]
    # Calculate mean and variance of differences
    mean_diff = sum(differences) / len(differences)
    variance_diff = sum([(x - mean_diff)**2 for x in differences]) / (len(differences) - 1)

    # Calculate t-statistic
    t_statistic = mean_diff / (math.sqrt(variance_diff / len(differences)))

    # Degrees of freedom
    degrees_of_freedom = len(differences) - 1

    t_distribution = get_t_distribution()

    # Check if degrees of freedom and alpha are in the t_distribution dictionary
    if degrees_of_freedom in t_distribution and alpha in t_distribution[degrees_of_freedom]:
        t_critical = t_distribution[degrees_of_freedom][alpha]
        print(f"Calculated t-value: {t_statistic:.3f}")
        print(f"Critical t-value: {t_critical:.3f}")

        # Check if calculated t-statistic exceeds the critical value
        if abs(t_statistic) > t_critical:  # Use abs() for two-tailed test
            print("Result: Reject the null hypothesis.")
        else:
            print("Result: Fail to reject the null hypothesis.")
    else:
        print("Critical t-value not available for the given degrees of freedom and alpha.")

def main():
    print("Choose the type of t-test you would like to perform:")
    print("1. One-Sample t-test")
    print("2. Unpaired t-test")
    print("3. Paired t-test")

    choice = int(input("Enter the number corresponding to your choice: "))

    if choice == 1:
        one_sample_test()
    elif choice == 2:
        unpaired_test()
    elif choice == 3:
        paired_test()
    else:
        print("Invalid choice. Please select 1, 2, or 3.")

# Run the main function to let the user choose the test
main()


**ANOVA TEST**

In [3]:
import pandas as pd
def get_f_distribution():
    return {
       0.05: {
        1: {
            1: 161.4, 2: 18.51, 3: 10.13, 4: 7.71, 5: 6.61, 6: 5.99,
            7: 5.59, 8: 5.32, 9: 5.12, 10: 4.96, 11: 4.84, 12: 4.75,
            13: 4.67, 14: 4.60, 15: 4.54, 16: 4.49, 17: 4.45, 18: 4.41,
            19: 4.3807, 20: 4.3512
        },
        2: {
            1: 199.5, 2: 19.0, 3: 9.5521, 4: 6.9443, 5: 5.7861, 6: 5.1433,
            7: 4.7374, 8: 4.459, 9: 4.2565, 10: 4.1028, 11: 3.9823, 12: 3.8853,
            13: 3.8056, 14: 3.7389, 15: 3.6823, 16: 3.6337, 17: 3.5915, 18: 3.5546,
            19: 3.5219, 20: 3.4928
        },
        3: {
            1: 215.7073, 2: 19.1643, 3: 9.2766, 4: 6.5914, 5: 5.4095, 6: 4.7571,
            7: 4.3468, 8: 4.0662, 9: 3.8625, 10: 3.7083, 11: 3.5874, 12: 3.4903,
            13: 3.4105, 14: 3.3439, 15: 3.2874, 16: 3.2389, 17: 3.1968, 18: 3.1599,
            19: 3.1274, 20: 3.0984
        }
     },
     0.005 : {
          1: {
        1: 16211, 2: 20000, 3: 21615, 4: 31.33, 5: 22.78, 6: 18.63,
        7: 16.24, 8: 14.69, 9: 13.61, 10: 12.83, 11: 12.21, 12: 11.75,
        13: 11.36, 14: 11.06, 15: 10.80, 16: 10.58, 17: 10.40, 18: 10.22,
        19: 10.07, 20: 9.94
    },
    2: {
        1: 198.5, 2: 199.0, 3: 199.2, 4: 26.28, 5: 18.31, 6: 14.54,
        7: 12.40, 8: 11.04, 9: 10.11, 10: 9.43, 11: 8.91, 12: 8.51,
        13: 8.14, 14: 7.92, 15: 7.70, 16: 7.51, 17: 7.35, 18: 7.21,
        19: 7.09, 20: 6.99
    },
    3: {
        1: 55.55, 2: 49.80, 3: 47.47, 4: 24.26, 5: 16.53, 6: 12.92,
        7: 10.80, 8: 9.60, 9: 8.72, 10: 8.08, 11: 7.53, 12: 7.13,
        13: 6.82, 14: 6.58, 15: 6.36, 16: 6.16, 17: 6.03, 18: 5.92,
        19: 5.82, 20: 5.74
    }


       },
     0.01 :
       {
           1: {
        1: 4052, 2: 4999.5, 3: 5403, 4: 21.2, 5: 16.26, 6: 13.75,
        7: 12.25, 8: 11.26, 9: 10.56, 10: 10.04, 11: 9.65, 12: 9.33,
        13: 9.07, 14: 8.86, 15: 8.68, 16: 8.53, 17: 8.4, 18: 8.29,
        19: 8.18, 20: 8.10
    },
    2: {
        1: 98.5, 2: 99.0, 3: 99.17, 4: 30.82, 5: 18.0, 6: 13.27,
        7: 10.92, 8: 8.65, 9: 8.02, 10: 7.56, 11: 7.21, 12: 6.93,
        13: 6.71, 14: 6.51, 15: 6.33, 16: 6.23, 17: 6.11, 18: 6.01,
        19: 5.93, 20: 5.85
    },
    3: {
        1: 34.12, 2: 29.46, 3: 18.0, 4: 12.06, 5: 9.78, 6: 8.59,
        7: 7.74, 8: 7.15, 9: 6.55, 10: 6.22, 11: 5.95, 12: 5.75,
        13: 5.56, 14: 5.4, 15: 5.29, 16: 5.18, 17: 5.09, 18: 5.01,
        19: 4.94, 20: 4.88
    }
       } ,
    0.025 :
       {
            1: {
        1: 647.8, 2: 799.5, 3: 864.2, 4: 12.22, 5: 10.01, 6: 8.81,
        7: 8.07, 8: 7.57, 9: 7.21, 10: 6.94, 11: 6.72, 12: 6.55,
        13: 6.40, 14: 6.30, 15: 6.20, 16: 6.12, 17: 6.04, 18: 5.98,
        19: 5.92, 20: 5.87
    },
    2: {
        1: 38.51, 2: 39.00, 3: 39.17, 4: 10.65, 5: 8.63, 6: 7.26,
        7: 6.54, 8: 6.06, 9: 5.71, 10: 5.46, 11: 5.26, 12: 5.10,
        13: 4.97, 14: 4.86, 15: 4.77, 16: 4.69, 17: 4.62, 18: 4.56,
        19: 4.51, 20: 4.46
    },
    3: {
        1: 17.44, 2: 16.04, 3: 15.44, 4: 9.98, 5: 7.76, 6: 6.60,
        7: 5.89, 8: 5.42, 9: 5.08, 10: 4.83, 11: 4.63, 12: 4.47,
        13: 4.34, 14: 4.24, 15: 4.14, 16: 4.08, 17: 4.02, 18: 3.95,
        19: 3.90, 20: 3.86
    }
       } ,
      0.10 :
       {
        1: {
        1: 39.86, 2: 8.53, 3: 5.54, 4: 4.54, 5: 4.06, 6: 3.78, 7: 3.59, 8: 3.46, 9: 3.36,
        10: 3.29, 11: 3.23, 12: 3.18, 13: 3.14, 14: 3.10, 15: 3.07, 16: 3.05, 17: 3.03,
        18: 3.01, 19: 2.99, 20: 2.97
    },
    2: {
        1: 49.50, 2: 9.00, 3: 5.46, 4: 4.32, 5: 3.78, 6: 3.46, 7: 3.29, 8: 3.11, 9: 3.01,
        10: 2.92, 11: 2.86, 12: 2.81, 13: 2.76, 14: 2.73, 15: 2.70, 16: 2.67, 17: 2.64,
        18: 2.62, 19: 2.61, 20: 2.59
    },
    3: {
        1: 53.59, 2: 9.16, 3: 5.39, 4: 4.19, 5: 3.62, 6: 3.29, 7: 3.07, 8: 2.92, 9: 2.81,
        10: 2.73, 11: 2.66, 12: 2.61, 13: 2.57, 14: 2.52, 15: 2.49, 16: 2.46, 17: 2.44,
        18: 2.42, 19: 2.40, 20: 2.38
    }
       }
    }

file_path = "/content/plant - Sheet1.csv"
data = pd.read_csv(file_path, header=None, names=["Group", "Value"])


group_means = data.groupby("Group")["Value"].mean().round(2)

overall_mean = data["Value"].mean().round(2)

SSB = sum(data.groupby("Group").size() * (group_means - overall_mean) ** 2)
SSW = sum((data["Value"] - data.groupby("Group")["Value"].transform("mean")) ** 2)

df_between = len(group_means) - 1
df_within = len(data) - len(group_means)

MSB = SSB / df_between
MSW = SSW / df_within

F_calculated = MSB / MSW
alpha = float(input("Enter the significance level (e.g., 0.005, 0.01, 0.025, 0.05, 0.10): "))
f_distribution = get_f_distribution()
critical_value = f_distribution[alpha][df_between][df_within]

# Print results
print("ANOVA Results:")
print(f"SSB (Sum of Squares Between): {SSB:.2f}")
print(f"SSW (Sum of Squares Within): {SSW:.2f}")
print(f"F-calculated: {F_calculated:.2f}")
print(f"Degrees of Freedom Between: {df_between}")
print(f"Degrees of Freedom Within: {df_within}")
print(f"Critical Value (F-table): {critical_value}")

# Hypothesis testing
if F_calculated > critical_value:
    print(" Reject the null hypothesis. There is a significant difference between the group means.")
else:
    print("Accept the null hypothesis. There is no significant difference between the group means.")

FileNotFoundError: [Errno 2] No such file or directory: '/content/plant - Sheet1.csv'

In [18]:
def get_f_distribution():
    return {
       0.05: {
        1: {
            1: 161.4, 2: 18.51, 3: 10.13, 4: 7.71, 5: 6.61, 6: 5.99,
            7: 5.59, 8: 5.32, 9: 5.12, 10: 4.96, 11: 4.84, 12: 4.75,
            13: 4.67, 14: 4.60, 15: 4.54, 16: 4.49, 17: 4.45, 18: 4.41,
            19: 4.3807, 20: 4.3512
        },
        2: {
            1: 199.5, 2: 19.0, 3: 9.5521, 4: 6.9443, 5: 5.7861, 6: 5.1433,
            7: 4.7374, 8: 4.459, 9: 4.2565, 10: 4.1028, 11: 3.9823, 12: 3.8853,
            13: 3.8056, 14: 3.7389, 15: 3.6823, 16: 3.6337, 17: 3.5915, 18: 3.5546,
            19: 3.5219, 20: 3.4928
        },
        3: {
            1: 215.7073, 2: 19.1643, 3: 9.2766, 4: 6.5914, 5: 5.4095, 6: 4.7571,
            7: 4.3468, 8: 4.0662, 9: 3.8625, 10: 3.7083, 11: 3.5874, 12: 3.4903,
            13: 3.4105, 14: 3.3439, 15: 3.2874, 16: 3.2389, 17: 3.1968, 18: 3.1599,
            19: 3.1274, 20: 3.0984
        }
     },
     0.005 : {
          1: {
        1: 16211, 2: 20000, 3: 21615, 4: 31.33, 5: 22.78, 6: 18.63,
        7: 16.24, 8: 14.69, 9: 13.61, 10: 12.83, 11: 12.21, 12: 11.75,
        13: 11.36, 14: 11.06, 15: 10.80, 16: 10.58, 17: 10.40, 18: 10.22,
        19: 10.07, 20: 9.94
    },
    2: {
        1: 198.5, 2: 199.0, 3: 199.2, 4: 26.28, 5: 18.31, 6: 14.54,
        7: 12.40, 8: 11.04, 9: 10.11, 10: 9.43, 11: 8.91, 12: 8.51,
        13: 8.14, 14: 7.92, 15: 7.70, 16: 7.51, 17: 7.35, 18: 7.21,
        19: 7.09, 20: 6.99
    },
    3: {
        1: 55.55, 2: 49.80, 3: 47.47, 4: 24.26, 5: 16.53, 6: 12.92,
        7: 10.80, 8: 9.60, 9: 8.72, 10: 8.08, 11: 7.53, 12: 7.13,
        13: 6.82, 14: 6.58, 15: 6.36, 16: 6.16, 17: 6.03, 18: 5.92,
        19: 5.82, 20: 5.74
    }


       },
     0.01 :
       {
           1: {
        1: 4052, 2: 4999.5, 3: 5403, 4: 21.2, 5: 16.26, 6: 13.75,
        7: 12.25, 8: 11.26, 9: 10.56, 10: 10.04, 11: 9.65, 12: 9.33,
        13: 9.07, 14: 8.86, 15: 8.68, 16: 8.53, 17: 8.4, 18: 8.29,
        19: 8.18, 20: 8.10
    },
    2: {
        1: 98.5, 2: 99.0, 3: 99.17, 4: 30.82, 5: 18.0, 6: 13.27,
        7: 10.92, 8: 8.65, 9: 8.02, 10: 7.56, 11: 7.21, 12: 6.93,
        13: 6.71, 14: 6.51, 15: 6.33, 16: 6.23, 17: 6.11, 18: 6.01,
        19: 5.93, 20: 5.85
    },
    3: {
        1: 34.12, 2: 29.46, 3: 18.0, 4: 12.06, 5: 9.78, 6: 8.59,
        7: 7.74, 8: 7.15, 9: 6.55, 10: 6.22, 11: 5.95, 12: 5.75,
        13: 5.56, 14: 5.4, 15: 5.29, 16: 5.18, 17: 5.09, 18: 5.01,
        19: 4.94, 20: 4.88
    }
       } ,
    0.025 :
       {
            1: {
        1: 647.8, 2: 799.5, 3: 864.2, 4: 12.22, 5: 10.01, 6: 8.81,
        7: 8.07, 8: 7.57, 9: 7.21, 10: 6.94, 11: 6.72, 12: 6.55,
        13: 6.40, 14: 6.30, 15: 6.20, 16: 6.12, 17: 6.04, 18: 5.98,
        19: 5.92, 20: 5.87
    },
    2: {
        1: 38.51, 2: 39.00, 3: 39.17, 4: 10.65, 5: 8.63, 6: 7.26,
        7: 6.54, 8: 6.06, 9: 5.71, 10: 5.46, 11: 5.26, 12: 5.10,
        13: 4.97, 14: 4.86, 15: 4.77, 16: 4.69, 17: 4.62, 18: 4.56,
        19: 4.51, 20: 4.46
    },
    3: {
        1: 17.44, 2: 16.04, 3: 15.44, 4: 9.98, 5: 7.76, 6: 6.60,
        7: 5.89, 8: 5.42, 9: 5.08, 10: 4.83, 11: 4.63, 12: 4.47,
        13: 4.34, 14: 4.24, 15: 4.14, 16: 4.08, 17: 4.02, 18: 3.95,
        19: 3.90, 20: 3.86
    }
       } ,
      0.10 :
       {
        1: {
        1: 39.86, 2: 8.53, 3: 5.54, 4: 4.54, 5: 4.06, 6: 3.78, 7: 3.59, 8: 3.46, 9: 3.36,
        10: 3.29, 11: 3.23, 12: 3.18, 13: 3.14, 14: 3.10, 15: 3.07, 16: 3.05, 17: 3.03,
        18: 3.01, 19: 2.99, 20: 2.97
    },
    2: {
        1: 49.50, 2: 9.00, 3: 5.46, 4: 4.32, 5: 3.78, 6: 3.46, 7: 3.29, 8: 3.11, 9: 3.01,
        10: 2.92, 11: 2.86, 12: 2.81, 13: 2.76, 14: 2.73, 15: 2.70, 16: 2.67, 17: 2.64,
        18: 2.62, 19: 2.61, 20: 2.59
    },
    3: {
        1: 53.59, 2: 9.16, 3: 5.39, 4: 4.19, 5: 3.62, 6: 3.29, 7: 3.07, 8: 2.92, 9: 2.81,
        10: 2.73, 11: 2.66, 12: 2.61, 13: 2.57, 14: 2.52, 15: 2.49, 16: 2.46, 17: 2.44,
        18: 2.42, 19: 2.40, 20: 2.38
    }
       }
    }

In [9]:
import pandas as pd

file_path = "/content/drive/MyDrive/DA/anova_2_way - Sheet1.csv"
data = pd.read_csv(file_path, header=None, names=["Group", "Level", "Value"])


overall_mean = data["Value"].mean()

mean_group = data.groupby("Group")["Value"].mean()
mean_level = data.groupby("Level")["Value"].mean()

mean_group_level = data.groupby(["Group", "Level"])["Value"].mean()

SST =sum((data["Value"] - overall_mean) ** 2)

SSA = sum([len(data[data["Group"] == group]) * (mean_group[group] - overall_mean) ** 2 for group in mean_group.index])

SSB = sum([len(data[data["Level"] == level]) * (mean_level[level] - overall_mean) ** 2 for level in mean_level.index])

SSAB = sum([len(data[(data["Group"] == group) & (data["Level"] == level)]) *
            (mean_group_level[(group, level)] - mean_group[group] - mean_level[level] + overall_mean) ** 2
            for group in mean_group.index for level in mean_level.index])

SSE = SST - SSA - SSB - SSAB


df_group = len(mean_group) - 1
df_level = len(mean_level) - 1
df_interaction = df_group * df_level
df_error = len(data) - (len(mean_group) * len(mean_level))

# Mean Squares
MS_group = SSA / df_group
MS_level = SSB / df_level
MS_interaction = SSAB / df_interaction
MS_error = SSE / df_error

# F-statistics
F_group = MS_group / MS_error
F_level = MS_level / MS_error
F_interaction = MS_interaction / MS_error

alpha = float(input("Enter the significance level (e.g., 0.005, 0.01, 0.025, 0.05, 0.10): "))
f_distribution = get_f_distribution()

critical_value_group = f_distribution[alpha][df_group][df_error]
critical_value_level = f_distribution[alpha][df_level][df_error]
critical_value_interaction = f_distribution[alpha][df_interaction][df_error]

# Print results
print("Two-Way ANOVA Results:")
print(f"SST (Sum of Squares Total): {SST:.2f}")
print(f"SSA (Sum of Squares Group): {SSA:.2f}")
print(f"SSB (Sum of Squares Level): {SSB:.2f}")
print(f"SSAB (Sum of Squares Interaction): {SSAB:.2f}")
print(f"SSE (Sum of Squares Error): {SSE:.2f}")
print(f"F-statistic Group: {F_group:.2f}")
print(f"F-statistic Level: {F_level:.2f}")
print(f"F-statistic Interaction: {F_interaction:.2f}")
print(f"Critical Value Group: {critical_value_group}")
print(f"Critical Value Level: {critical_value_level}")
print(f"Critical Value Interaction: {critical_value_interaction}")

#Group
if F_group > critical_value_group:
    print("Reject the null hypothesis for Group.")
else:
    print("Accept the null hypothesis for Group.")

#Level
if F_level > critical_value_level:
    print("Reject the null hypothesis for Level.")
else:
    print("Accept the null hypothesis for Level.")

#Interaction
if F_interaction > critical_value_interaction:
    print("Reject the null hypothesis for Interaction.")
else:
    print("Accept the null hypothesis for Interaction.")

Enter the significance level (e.g., 0.005, 0.01, 0.025, 0.05, 0.10): 0.05
Two-Way ANOVA Results:
SST (Sum of Squares Total): 1415.67
SSA (Sum of Squares Group): 85.33
SSB (Sum of Squares Level): 1287.17
SSAB (Sum of Squares Interaction): 5.17
SSE (Sum of Squares Error): 38.00
F-statistic Group: 13.47
F-statistic Level: 101.62
F-statistic Interaction: 0.41
Critical Value Group: 5.99
Critical Value Level: 5.1433
Critical Value Interaction: 5.1433
Reject the null hypothesis for Group.
Reject the null hypothesis for Level.
Accept the null hypothesis for Interaction.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [20]:
import pandas as pd

# Load the dataset from the CSV file
file_path = "/content/drive/MyDrive/DA/anova_2_way - Sheet1.csv"
data = pd.read_csv(file_path, header=None, names=["Group", "Level", "Value"])

# Calculate overall mean
#overall_mean = round(data["Value"].mean(), 2)

# Calculate means for each factor level
mean_group = data.groupby("Group")["Value"].mean().round(2)
mean_level = data.groupby("Level")["Value"].mean().round(2)
overall_mean = round(mean_group.mean(), 2)

# Calculate means for each combination of Group and Level
mean_group_level = data.groupby(["Group", "Level"])["Value"].mean().round(2)

# Calculate Sum of Squares Total (SST)
SST = round(sum((data["Value"] - overall_mean) ** 2), 2)

# Calculate Sum of Squares for Group (SSA)
SSA = round(sum([len(data[data["Group"] == group]) * (mean_group[group] - overall_mean) ** 2 for group in mean_group.index]), 2)

# Calculate Sum of Squares for Level (SSB)
SSB = round(sum([len(data[data["Level"] == level]) * (mean_level[level] - overall_mean) ** 2 for level in mean_level.index]), 2)

# Calculate Sum of Squares for Interaction (SSAB)
SSAB = round(sum([len(data[(data["Group"] == group) & (data["Level"] == level)]) *
            (mean_group_level[(group, level)] - mean_group[group] - mean_level[level] + overall_mean) ** 2
            for group in mean_group.index for level in mean_level.index]), 2)

# Calculate Sum of Squares Error (SSE)
SSE = round(SST - SSA - SSB - SSAB, 2)

# Degrees of Freedom
df_group = len(mean_group) - 1  # Degrees of freedom for Group
df_level = len(mean_level) - 1  # Degrees of freedom for Level
df_interaction = df_group * df_level  # Degrees of freedom for Interaction
df_error = len(data) - (len(mean_group) * len(mean_level))  # Degrees of freedom for Error

# Mean Squares
MS_group = round(SSA / df_group, 2)  # Mean square for Group
MS_level = round(SSB / df_level, 2)  # Mean square for Level
MS_interaction = round(SSAB / df_interaction, 2)  # Mean square for Interaction
MS_error = round(SSE / df_error, 2)  # Mean square for Error

# F-statistics
F_group = round(MS_group / MS_error, 2)  # F-statistic for Group
F_level = round(MS_level / MS_error, 2)  # F-statistic for Level
F_interaction = round(MS_interaction / MS_error, 2)  # F-statistic for Interaction

# Critical F-values (from F-distribution table or function)
alpha = float(input("Enter the significance level (e.g., 0.005, 0.01, 0.025, 0.05, 0.10): "))
f_distribution = get_f_distribution()

critical_value_group = round(f_distribution[alpha][df_group][df_error], 2)
critical_value_level = round(f_distribution[alpha][df_level][df_error], 2)
critical_value_interaction = round(f_distribution[alpha][df_interaction][df_error], 2)

# Print results
print("Two-Way ANOVA Results:")
print(f"SST (Sum of Squares Total): {SST}")
print(f"SSA (Sum of Squares Group): {SSA}")
print(f"SSB (Sum of Squares Level): {SSB}")
print(f"SSAB (Sum of Squares Interaction): {SSAB}")
print(f"SSE (Sum of Squares Error): {SSE}")
print(f"F-statistic Group: {F_group}")
print(f"F-statistic Level: {F_level}")
print(f"F-statistic Interaction: {F_interaction}")
print(f"Critical Value Group: {critical_value_group}")
print(f"Critical Value Level: {critical_value_level}")
print(f"Critical Value Interaction: {critical_value_interaction}")

# Hypothesis testing for Group
if F_group > critical_value_group:
    print("Reject the null hypothesis for Group. There is a significant effect.")
else:
    print("Accept the null hypothesis for Group. There is no significant effect.")

# Hypothesis testing for Level
if F_level > critical_value_level:
    print("Reject the null hypothesis for Level. There is a significant effect.")
else:
    print("Accept the null hypothesis for Level. There is no significant effect.")

# Hypothesis testing for Interaction
if F_interaction > critical_value_interaction:
    print("Reject the null hypothesis for Interaction. There is a significant interaction effect.")
else:
    print("Accept the null hypothesis for Interaction. There is no significant interaction effect.")

Enter the significance level (e.g., 0.005, 0.01, 0.025, 0.05, 0.10): 0.05
Two-Way ANOVA Results:
SST (Sum of Squares Total): 1415.67
SSA (Sum of Squares Group): 85.23
SSB (Sum of Squares Level): 1287.17
SSAB (Sum of Squares Interaction): 5.17
SSE (Sum of Squares Error): 38.1
F-statistic Group: 13.42
F-statistic Level: 101.35
F-statistic Interaction: 0.41
Critical Value Group: 5.99
Critical Value Level: 5.14
Critical Value Interaction: 5.14
Reject the null hypothesis for Group. There is a significant effect.
Reject the null hypothesis for Level. There is a significant effect.
Accept the null hypothesis for Interaction. There is no significant interaction effect.
