In [10]:
import pandas as pd
import scipy.stats as stats

# Define the data
data = {
    'TotalEngineCylinders': [4, 4, 4, 6, 4, 4, 6, 4, 6, 4, 6, 8, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 6, 3, 4, 4, 8, 4, 4, 6, 4, 4, 6, 4, 6, 6, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 6, 4, 4, 4, 4, 4, 6, 4, 4, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 6, 8, 4, 6, 4, 4, 6, 4, 6, 8, 4, 6, 4, 4, 6, 4, 4, 6, 6, 6, 4, 4, 6, 8, 4, 6, 4, 4, 4],
    'HorsePower': [119, 174, 168, 301, 192, 201, 335, 255, 335, 255, 362, 463, 147, 174, 147, 184, 184, 261, 335, 188, 221, 255, 228, 248, 335, 123, 160, 187, 450, 247, 247, 296, 184, 261, 335, 340, 252, 450, 330, 150, 250, 320, 187, 250, 320, 147, 181, 235, 201, 290, 147, 185, 365, 190, 141, 280, 400, 271, 180, 295, 285, 186, 227, 148, 187, 227, 138, 169, 115, 260, 284, 170, 147, 180, 120, 147, 120, 368, 382, 106, 203, 381, 121, 182, 120, 208, 301, 134, 302, 416, 241, 181, 311, 300, 400, 268, 295, 400, 420, 177, 187, 316, 316, 316, 316, 316, 252, 375, 420, 152, 78, 148, 179, 181]
}

# Trim the HorsePower array to match the length of TotalEngineCylinders
data['HorsePower'] = data['HorsePower'][:len(data['TotalEngineCylinders'])]

# Convert dictionary to DataFrame
df = pd.DataFrame(data)

# Step 1: State the null and alternate hypothesis
# Null Hypothesis (H0): There is no difference in performance (HorsePower) among cars with different number of cylinders.
# Alternative Hypothesis (H1): There is a difference in performance (HorsePower) among cars with different number of cylinders.

# Step 2: Select the level of significance
alpha = 0.05

# Step 3: Determine the test statistic
# Using ANOVA to test the difference in average HorsePower among cars with different number of cylinders
f_stat, p_value = stats.f_oneway(df[df['TotalEngineCylinders'] == 3]['HorsePower'],
                                 df[df['TotalEngineCylinders'] == 4]['HorsePower'],
                                 df[df['TotalEngineCylinders'] == 6]['HorsePower'],
                                 df[df['TotalEngineCylinders'] == 8]['HorsePower'])

# Step 4: Formulate the decision rule
if p_value < alpha:
    decision = "Reject null hypothesis (H0)"
else:
    decision = "Fail to reject null hypothesis (H0)"

# Step 5: Make the decision regarding H0
print(f"Test Statistic: {f_stat}")
print(f"P-value: {p_value}")
print(f"Decision: {decision}")

# Step 6: Interpret the result
if p_value < alpha:
    print("There is sufficient evidence to reject the null hypothesis. There is a difference in performance (HorsePower) among cars with different number of cylinders.")
else:
    print("There is not enough evidence to reject the null hypothesis. There is no difference in performance (HorsePower) among cars with different number of cylinders.")


Test Statistic: 3.2643011472911376
P-value: 0.024503054690601045
Decision: Reject null hypothesis (H0)
There is sufficient evidence to reject the null hypothesis. There is a difference in performance (HorsePower) among cars with different number of cylinders.


In [8]:
import pandas as pd

# Define the data
data = {
    'TotalEngineCylinders': [4, 4, 4, 6, 4, 4, 6, 4, 6, 4, 6, 8, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 6, 3, 4, 4, 8, 4, 4, 6, 4, 4, 6, 4, 6, 6, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 6, 4, 4, 4, 4, 4, 6, 4, 4, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 6, 8, 4, 6, 4, 4, 6, 4, 6, 8, 4, 6, 4, 4, 6, 4, 4, 6, 6, 6, 4, 4, 6, 8, 4, 6, 4, 4, 4],
    'HorsePower': [119, 174, 168, 301, 192, 201, 335, 255, 335, 255, 362, 463, 147, 174, 147, 184, 184, 261, 335, 188, 221, 255, 228, 248, 335, 123, 160, 187, 450, 247, 247, 296, 184, 261, 335, 340, 252, 450, 330, 150, 250, 320, 187, 250, 320, 147, 181, 235, 201, 290, 147, 185, 365, 190, 141, 280, 400, 271, 180, 295, 285, 186, 227, 148, 187, 227, 138, 169, 115, 260, 284, 170, 147, 180, 120, 147, 120, 368, 382, 106, 203, 381, 121, 182, 120, 208, 301, 134, 302, 416, 241, 181, 311, 300, 400, 268, 295, 400, 420, 177, 187, 316, 316, 316, 316, 316, 252, 375, 420, 152, 78, 148, 179, 181]
}

# Trim the HorsePower array to match the length of TotalEngineCylinders
data['HorsePower'] = data['HorsePower'][:len(data['TotalEngineCylinders'])]

# Convert dictionary to DataFrame
df = pd.DataFrame(data)

# Select the level of significance
alpha = 0.05

# Print the DataFrame
print(df.head())


   TotalEngineCylinders  HorsePower
0                     4         119
1                     4         174
2                     4         168
3                     6         301
4                     4         192
