In [2]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

data = pd.DataFrame({'group': ['A', 'A', 'B', 'B', 'C', 'C'],
                     'dependent_var': [10, 12, 8, 9, 15, 13]})

# Fit the one-way ANOVA model
model = ols('dependent_var ~ group', data=data).fit()

# Calculate the total sum of squares (SST)
SST = sm.stats.anova_lm(model)['sum_sq'][0]

# Calculate the explained sum of squares (SSE)
SSE = sm.stats.anova_lm(model)['sum_sq'][1]

# Calculate the residual sum of squares (SSR)
SSR = SST - SSE

# Print the results
print("SST:", SST)
print("SSE:", SSE)
print("SSR:", SSR)


SST: 30.333333333333336
SSE: 4.5
SSR: 25.833333333333336


In [3]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols


import pandas as pd

data = pd.DataFrame({
    'factor1': ['A', 'A', 'B', 'B', 'A', 'A', 'B', 'B'],
    'factor2': ['X', 'Y', 'X', 'Y', 'Y', 'X', 'Y', 'X'],
    'dependent_var': [10, 12, 8, 9, 15, 13, 11, 14]
})

# Fit the two-way ANOVA model

model = ols('dependent_var ~ factor1 + factor2 + factor1 * factor2', data=data).fit()

# Calculate the main effects
main_effect_factor1 = sm.stats.anova_lm(model)['sum_sq'][1]
main_effect_factor2 = sm.stats.anova_lm(model)['sum_sq'][2]

# Calculate the interaction effect
interaction_effect = sm.stats.anova_lm(model)['sum_sq'][3]

print("Main Effect of Factor 1:", main_effect_factor1)
print("Main Effect of Factor 2:", main_effect_factor2)
print("Interaction Effect:", interaction_effect)


Main Effect of Factor 1: 0.5000000000000012
Main Effect of Factor 2: 4.500000000000016
Interaction Effect: 29.0


In [None]:
Q9. A researcher wants to compare the mean weight loss of three diets: A, B, and C. They collect data from 
50 participants who were randomly assigned to one of the diets. Conduct a one-way ANOVA using Python 
to determine if there are any significant differences between the mean weight loss of the three diets. 
Report the F-statistic and p-value, and interpret the results.

In [1]:
import pandas as pd
import scipy.stats as stats

data = {
    'Diet': ['A'] * 17 + ['B'] * 18 + ['C'] * 15,
    'WeightLoss': [3.2, 2.8, 4.1, 3.9, 2.6, 3.3, 2.4, 3.7, 3.1, 2.9, 3.5, 2.3, 3.6, 3.4, 2.7, 3.8, 3.0, 4.0, 2.5, 3.2, 2.7, 3.1, 2.8, 4.2, 3.3, 2.9, 3.7, 3.2, 2.6, 3.4, 2.4, 3.8, 3.1, 2.9, 3.6, 3.0, 2.3, 3.5, 2.7, 3.9, 3.1, 2.8, 4.0, 3.2, 2.6, 3.3, 2.4, 3.7, 3.1, 2.9]
}
df = pd.DataFrame(data)


f_statistic, p_value = stats.f_oneway(
    df[df['Diet'] == 'A']['WeightLoss'],
    df[df['Diet'] == 'B']['WeightLoss'],
    df[df['Diet'] == 'C']['WeightLoss']
)

print("F-statistic:", f_statistic)
print("p-value:", p_value)


F-statistic: 0.16123919778248813
p-value: 0.8515572350367615


In [2]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

#
data = {
    'Software': ['A']*10 + ['B']*10 + ['C']*10,
    'Experience': ['Novice']*15 + ['Experienced']*15,
    'CompletionTime': [15, 16, 14, 12, 15, 17, 13, 14, 16, 15, 18, 20, 16, 14, 13, 
                       16, 17, 19, 18, 17, 19, 20, 18, 17, 15, 13, 16, 14, 15, 13]
}
df = pd.DataFrame(data)

# Perform two-way ANOVA
model = ols('CompletionTime ~ Software + Experience + Software:Experience', data=df).fit()
anova_table = sm.stats.anova_lm(model)


print(anova_table)


                       df      sum_sq    mean_sq         F    PR(>F)
Software              2.0   22.466667  11.233333  2.605412  0.093043
Experience            1.0    3.600000   3.600000  0.834969  0.369236
Software:Experience   2.0    6.008537   3.004268  0.696797  0.507243
Residual             26.0  112.100000   4.311538       NaN       NaN


In [3]:
import numpy as np
import scipy.stats as stats

control_scores = np.array([82, 75, 88, 79, 92, 85, 78, 86, 80, 84, 89, 81, 76, 83, 87, 79, 82, 84, 88, 80,
                           85, 77, 82, 86, 81, 79, 83, 87, 85, 88, 79, 84, 81, 86, 83, 90, 85, 78, 82,
                           87, 83, 81, 84, 88, 80, 86, 82, 79, 85, 88, 83, 90])
experimental_scores = np.array([88, 79, 92, 85, 78, 86, 80, 84, 89, 81, 76, 83, 87, 79, 82, 84, 88, 80, 85, 77,
                                82, 86, 81, 79, 83, 87, 85, 88, 79, 84, 81, 86, 83, 90, 85, 78, 82, 87, 83,
                                81, 84, 88, 80, 86, 82, 79, 85, 88, 83, 90, 83, 82, 84])

# Perform two-sample t-test
t_statistic, p_value = stats.ttest_ind(control_scores, experimental_scores)

# Report the results
print("t-statistic:", t_statistic)
print("p-value:", p_value)


t-statistic: -0.22129549541485583
p-value: 0.8253004765198882


In [7]:
import pandas as pd
from statsmodels.stats.anova import AnovaRM

# Create a DataFrame with daily sales data
data = {
    'Day': range(30),
    'Store_A': [100, 95, 102, 110, 108, 103, 105, 100, 98, 102, 104, 98, 100, 102, 105, 98, 103, 105, 102, 100,
                99, 98, 102, 100, 96, 103, 105, 108, 104, 106],
    'Store_B': [98, 105, 110, 108, 104, 106, 103, 100, 98, 95, 103, 108, 105, 103, 102, 105, 108, 100, 102, 98,
                99, 100, 98, 100, 102, 105, 103, 100, 96, 104],
    'Store_C': [100, 98, 105, 102, 105, 108, 106, 103, 102, 100, 102, 105, 103, 102, 100, 98, 105, 100, 102, 99,
                98, 102, 100, 96, 100, 102, 104, 105, 103, 100]
}
df = pd.DataFrame(data)

# Reshape the data into long format
df_long = pd.melt(df, id_vars='Day', value_vars=['Store_A', 'Store_B', 'Store_C'],
                  var_name='Store', value_name='Sales')

# Perform repeated measures ANOVA
rm_anova = AnovaRM(df_long, 'Sales', 'Day', within=['Store'])

# Fit the ANOVA model
results = rm_anova.fit()

# Report the results
print(results.summary())


               Anova
      F Value Num DF  Den DF Pr > F
-----------------------------------
Store  0.1722 2.0000 58.0000 0.8422

