#### 1. Marketing Campaign Effectiveness


In [4]:
#Import Libraries
import pandas as pd
import numpy as np
from scipy.stats import f_oneway


In [5]:
#Sample Data
campaign_A = [12, 15, 14, 10, 13, 15, 11, 14, 13, 16]
campaign_B = [18, 17, 16, 15, 20, 19, 18, 16, 17, 19]
campaign_C = [10, 9, 11, 10, 12, 9, 11, 8, 10, 9]


In [6]:
#Define the null and alt hypothesis
alternative_hypothesis = "There is a statistically significant difference in the mean conversion rates among the campaigns and we can conclude that at least one campaign performed significantly differently from the others."
null_hypothesis = "There is no significant difference in the mean conversion rates among the campaigns and we can conclude that no campaign performed significantly differently from the others."

In [7]:
# Significance level
alpha = 0.05

In [8]:
# Perform one-way ANOVA
f_stat, p_val = f_oneway(campaign_A, campaign_B, campaign_C)


In [9]:
# Display results
print(f"F-statistic: {f_stat:.2f}")
print(f"p-value: {p_val:.10f}")

F-statistic: 57.97
p-value: 0.0000000002


In [10]:
if p_val<alpha:
   print(f"Based on the current evidence and testing, {alternative_hypothesis}")
else:
    print(f"Based on the current evidence and testing, {null_hypothesis}")

Based on the current evidence and testing, There is a statistically significant difference in the mean conversion rates among the campaigns and we can conclude that at least one campaign performed significantly differently from the others.


In [11]:
#Question:

#Is there a statistically significant difference in conversion rates among the three campaigns? (Use α = 0.05)

#Answer:

#Based on the current evidence and testing, There is a statistically significant difference in the mean conversion rates among the campaigns and we can conclude that at least one campaign performed significantly differently from the others.


#### 2. Fertilizer Type vs Crop Yield


In [12]:
#Import Libraries
#import pandas as pd
#from scipy.stats import f_oneway

In [13]:
#Sample Data
fertilizer_A = [25, 27, 26, 30, 29, 28, 30, 27]
fertilizer_B = [32, 35, 34, 33, 36, 34, 35, 32]
fertilizer_C = [22, 20, 24, 23, 25, 21, 22, 23]

In [14]:
#Define the null and alt hypothesis
alternative_hypothesis1 = "There is a significant difference in the yield among the fertilizers and we can conclude that at least one fertilizer type performed significantly differently from the others."
null_hypothesis1 = "There is no significant difference in the yield among the fertilizers and we can conclude that no fertilizer type performed significantly differently from the others."

In [15]:
# Perform one-way ANOVA
f_stat1, p_val1 = f_oneway(fertilizer_A, fertilizer_B, fertilizer_C)

In [16]:
# Display results
print(f"F-statistic: {f_stat1:.2f}")
print(f"P-value: {p_val1:.14f}")

F-statistic: 96.59
P-value: 0.00000000002572


In [17]:
if p_val<alpha:
   print(f"Based on the current evidence and testing, {alternative_hypothesis}")
else:
    print(f"Based on the current evidence and testing, {null_hypothesis}")

Based on the current evidence and testing, There is a statistically significant difference in the mean conversion rates among the campaigns and we can conclude that at least one campaign performed significantly differently from the others.


In [18]:
#Performing the comparison of fertilizers
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Combining data into a single array
yields = fertilizer_A + fertilizer_B + fertilizer_C
groups = (['A'] * 8) + (['B'] * 8) + (['C'] * 8)
print(groups)

# Tukey HSD test
tukey_test = pairwise_tukeyhsd(yields, groups, alpha)
print(tukey_test)

['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C']
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj  lower    upper  reject
----------------------------------------------------
     A      B    6.125   0.0   4.0601  8.1899   True
     A      C    -5.25   0.0  -7.3149 -3.1851   True
     B      C  -11.375   0.0 -13.4399 -9.3101   True
----------------------------------------------------


In [19]:
#Question:

#Which fertilizer(s) seem to produce significantly different yields?

#Answer:

#Fertilizer B produces significantly higher crop output than A and C


#### 3. Teaching Method vs Student Performance


In [20]:
#Import Libraries
#import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [21]:
#Sample Data
data = {
    'score': [78, 85, 80, 90, 88, 82, 84, 86, 92],
    'method': ['Lecture', 'Lecture', 'Lecture',
               'Discussion', 'Discussion', 'Discussion',
               'Project', 'Project', 'Project']
}

#Converting sample into dataframe
df = pd.DataFrame(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   score   9 non-null      int64 
 1   method  9 non-null      object
dtypes: int64(1), object(1)
memory usage: 276.0+ bytes


In [22]:
#Performing ANOVA
formula = 'score ~ C(method)'
model = ols(formula, data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)

              sum_sq   df         F    PR(>F)
C(method)  72.666667  2.0  2.286713  0.182729
Residual   95.333333  6.0       NaN       NaN


In [23]:
#Question:

#Does the teaching method have a statistically significant effect on student performance?

#Answer:

#The teaching method does not have a statistically significant  effect on student performance


####  4. Machine Type vs Production Quality

In [24]:
#Import Libraries
#import pandas as pd
#from scipy.stats import f_oneway

In [25]:
#Sample Data
machine_A = [88, 85, 87, 90, 89, 88, 86, 87]
machine_B = [82, 84, 83, 81, 80, 83, 82, 84]
machine_C = [91, 90, 92, 93, 94, 92, 91, 93]

In [26]:
#Define the null and alt hypothesis
alternative_hypothesis3 = "There is a statistically significant difference in the product quality among the machines and we can conclude that at least one machine produces significantly different quality of products from the others."
null_hypothesis3 = "There is no significant difference in the product quality among the machines and we can conclude that no machine produces significantly different quality of products from the others."

In [27]:
# Perform one-way ANOVA
f_stat, p_val = f_oneway(machine_A, machine_B, machine_C)

In [28]:
# Display results
print(f"F-statistic: {f_stat:.2f}")
print(f"p-value: {p_val:.12f}")

F-statistic: 88.81
p-value: 0.000000000057


In [29]:
if p_val<alpha:
   print(f"Based on the current evidence and testing, {alternative_hypothesis}")
else:
    print(f"Based on the current evidence and testing, {null_hypothesis}")

Based on the current evidence and testing, There is a statistically significant difference in the mean conversion rates among the campaigns and we can conclude that at least one campaign performed significantly differently from the others.


In [30]:
#Performing the comparison of machines
#from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Combining data into a single array
produce = machine_A+ machine_B + machine_C
bands = (['A1'] * 8) + (['B1'] * 8) + (['C1'] * 8)
print(bands)

# Tukey HSD test
tukey_test = pairwise_tukeyhsd(produce, bands, alpha)
print(tukey_test)

['A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'B1', 'B1', 'B1', 'B1', 'B1', 'B1', 'B1', 'B1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1']
Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj  lower   upper  reject
---------------------------------------------------
    A1     B1   -5.125   0.0 -6.9467 -3.3033   True
    A1     C1      4.5   0.0  2.6783  6.3217   True
    B1     C1    9.625   0.0  7.8033 11.4467   True
---------------------------------------------------


In [31]:
#Question:

#Which machine produces items with significantly different average quality?

#Answer:

#Machine A produces items with significantly different average quality.

####  5. Diet Plan vs Weight Loss

In [32]:
#Import Libraries
#import pandas as pd
#import statsmodels.api as sm
#from statsmodels.formula.api import ols

In [33]:
#Sample Data
data1 = {
    'diet': ['Keto']*6 + ['Paleo']*6 + ['Vegan']*6 + ['Mediterranean']*6,
    'weight_loss': [6, 5, 7, 8, 6, 7, 5, 6, 4, 5, 6, 5, 3, 4, 2, 4, 3, 2, 5, 6, 7, 6, 8, 7]
}

#Converting sample into dataframe
ds = pd.DataFrame(data1)
print(ds)

             diet  weight_loss
0            Keto            6
1            Keto            5
2            Keto            7
3            Keto            8
4            Keto            6
5            Keto            7
6           Paleo            5
7           Paleo            6
8           Paleo            4
9           Paleo            5
10          Paleo            6
11          Paleo            5
12          Vegan            3
13          Vegan            4
14          Vegan            2
15          Vegan            4
16          Vegan            3
17          Vegan            2
18  Mediterranean            5
19  Mediterranean            6
20  Mediterranean            7
21  Mediterranean            6
22  Mediterranean            8
23  Mediterranean            7


In [34]:
#Performing ANOVA
formula1 = 'weight_loss ~ C(diet)'
model1 = ols(formula1, data=ds).fit()
anova_diet_table = sm.stats.anova_lm(model1, typ=2)
print(anova_diet_table)

             sum_sq    df          F    PR(>F)
C(diet)   49.125000   3.0  18.364486  0.000006
Residual  17.833333  20.0        NaN       NaN


In [35]:
#Performing the comparison of diets
mean_scores = ds.groupby('diet')['weight_loss'].mean()

print(mean_scores)


diet
Keto             6.500000
Mediterranean    6.500000
Paleo            5.166667
Vegan            3.000000
Name: weight_loss, dtype: float64


In [36]:
#Question:

#Is there a significant difference in mean weight loss among the four diets?

#Answer:

#There is a significant difference in mean weight loss among the four diets