In [1]:
import pandas as pd
from scipy import stats as st


In [2]:
def p_value_reader(p_value,alpha):
    if p_value<alpha:
        print("Reject the null hypothesis")
    else:
        print("Fail to reject null hypothesis")

In [3]:
chocolate_weight=pd.read_csv("chocolate_weight.csv")

In [4]:
chocolate_weight.head()

Unnamed: 0,weight
0,101.5
1,98.3
2,99.7
3,100.2
4,102.1


In [5]:
t_value, p_value = st.ttest_1samp(chocolate_weight['weight'], 100)

In [9]:
p_value_reader(p_value,0.05)

Fail to reject null hypothesis


In [10]:
chocolate_thickness=pd.read_csv("chocolate_thickness.csv")

In [12]:
chocolate_thickness.head()

Unnamed: 0,before,after
0,9.4,9.6
1,8.8,8.9
2,10.1,10.3
3,9.3,9.2
4,9.9,10.0


In [14]:
chocolate_thickness.mean()

before    9.470000
after     9.526667
dtype: float64

In [15]:
t_value, p_value = st.ttest_rel(chocolate_thickness['after'],
                                chocolate_thickness['before'], 
                                alternative = 'greater')

In [16]:
p_value_reader(p_value, 0.1)

Reject the null hypothesis


In [17]:
chocolate_sweetness=pd.read_csv("chocolate_sweetness.csv")

In [18]:
chocolate_sweetness.head()

Unnamed: 0,regular_sugar,sugar_substitute
0,35.4,34.0
1,33.8,32.5
2,37.1,35.8
3,36.3,35.2
4,34.9,34.3


In [19]:
chocolate_sweetness.mean()

regular_sugar       35.02
sugar_substitute    34.02
dtype: float64

In [20]:
levene_stat, levene_p_value = st.levene(chocolate_sweetness['sugar_substitute'], 
                                        chocolate_sweetness['regular_sugar'])
if levene_p_value < 0.05:
    print("Reject the null hypothesis. Variances are unequal.")
    print("Perform Welch's Test")
else:
    print("Fail to reject the null hypothesis. Variances can be assumed equal.")
    print("Perform T-Test for 2 samples")

Fail to reject the null hypothesis. Variances can be assumed equal.
Perform T-Test for 2 samples


In [21]:
t_statistic, p_value = st.ttest_ind(chocolate_sweetness['sugar_substitute'], 
                                    chocolate_sweetness['regular_sugar'], 
                                    equal_var=True,
                                    alternative = 'two-sided')

p_value_reader(p_value, 0.05)

Reject the null hypothesis


In [22]:
df4 = pd.read_csv("chocolate_preferences.csv")
df4.head()

Unnamed: 0,Preference
0,Spicy Cinnamon
1,Cool Mint
2,Spicy Cinnamon
3,Spicy Cinnamon
4,Cool Mint


In [23]:
observed_frequencies = df4['Preference'].value_counts()
expected_frequencies = [len(df4) / 2, len(df4) / 2]
chi_square_stat, p_value = st.chisquare(observed_frequencies, expected_frequencies)

In [24]:
p_value_reader(p_value, 0.01)

Fail to reject null hypothesis


In [25]:
df5 = pd.read_csv("chocolate_production.csv")
df5.head()

Unnamed: 0,Month,Chocolate_A,Chocolate_B,Chocolate_C
0,1,100,110,120
1,2,105,115,125
2,3,95,105,115
3,4,90,100,110
4,5,105,115,125


In [26]:
df5.iloc[:,1:].mean()

Chocolate_A     97.5
Chocolate_B    107.5
Chocolate_C    117.5
dtype: float64

In [27]:
F_statistic, p_value = st.f_oneway(df5['Chocolate_A'], 
                                      df5['Chocolate_B'], 
                                      df5['Chocolate_C'])
# Interpret the results
p_value_reader(p_value, 0.05)

Reject the null hypothesis


In [28]:
from statsmodels.multivariate.manova import MANOVA

In [29]:
df6 = pd.read_csv('chocolate_satisfaction.csv')
df6.head()

Unnamed: 0,Region,Chocolate_A_Sales,Chocolate_B_Sales,Chocolate_C_Sales,Chocolate_A_Satisfaction,Chocolate_B_Satisfaction,Chocolate_C_Satisfaction
0,1,100,110,120,90,85,95
1,2,105,115,125,95,88,98
2,3,95,105,115,88,83,93
3,4,90,100,110,80,75,85
4,5,105,115,125,92,87,97


In [30]:
manova = MANOVA.from_formula('Chocolate_A_Sales + Chocolate_B_Sales + Chocolate_C_Sales + Chocolate_A_Satisfaction + Chocolate_B_Satisfaction + Chocolate_C_Satisfaction ~ Region', data=df6)
result = manova.mv_test()
print(result)

                             Multivariate linear model
                                                                                    
------------------------------------------------------------------------------------
       Intercept               Value        Num DF Den DF       F Value       Pr > F
------------------------------------------------------------------------------------
          Wilks' lambda              0.0000 5.0000 6.0000 57800209121331.3594 0.0000
         Pillai's trace              1.0000 5.0000 6.0000 57800209121331.3672 0.0000
 Hotelling-Lawley trace 48166840934442.8047 5.0000 6.0000 57800209121331.3672 0.0000
    Roy's greatest root 48166840934442.8047 5.0000 6.0000 57800209121331.3672 0.0000
------------------------------------------------------------------------------------
                                                                                    
-----------------------------------------------------------------------------------------
     

In [31]:
p_value = result.results['Region']['stat'].values[1, -1]
p_value_reader(p_value, 0.05)

Fail to reject null hypothesis
