# Import Libraries

In [120]:
import pandas as pd
import numpy as np
import statsmodels.stats.multicomp as multi 
import statsmodels.formula.api as smf


# Load datas

In [129]:
df = pd.read_csv("ramen-ratings.csv", index_col = "Review #")

In [130]:
df.head()

Unnamed: 0_level_0,Brand,Variety,Style,Country,Stars,Top Ten
Review #,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
2579,Just Way,Noodles Spicy Hot Sesame Spicy Hot Sesame Guan...,Pack,Taiwan,1.0,
2578,Nissin,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
2577,Wei Lih,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
2576,Ching's Secret,Singapore Curry,Pack,India,3.75,


# Prepare Datas

In [131]:
df = df[df.Stars!= "Unrated"]

In [192]:
def continent(df):
    list_Asia = ['Japan', 'Taiwan', 'India', 'South Korea', 'Singapore', 'Thailand', 'Hong Kong', 'Vietnam', 'Malaysia',
                 'Indonesia', 'China', 'Pakistan', 'Bangladesh', 'Nepal', 'Myanmar', 'Cambodia', 'Philippines', 'Sarawak']
    list_Oceania = ['Fiji', 'Australia']
    list_Europe = ['Hungary', 'Germany', 'UK', 'Netherlands', 'Finland', 'Sweden', 'Estonia', 'Holland', 'Poland']
    list_Africa = ['Ghana', 'Nigeria']
    list_America = ['USA', 'Mexico', 'Canada', 'Brazil', 'United States', 'Colombia']
    list_Middle_East = ['Dubai']
    if df['Country'] in list_Asia:
        df['Continent'] = "Asia"
    if df['Country'] in list_Oceania:
        df['Continent'] = "Oceania"
    if df['Country'] in list_Europe:
        df['Continent'] = "Europe"
    if df['Country'] in list_Africa:
        df['Continent'] = "Africa"
    if df['Country'] in list_America:
        df['Continent'] = "America"
    if df['Country'] in list_Middle_East:
        df['Continent'] = 'Middle_East'
    return df

df = df.apply(continent, axis=1)
df['Stars'] = df['Stars'].astype('float')

# Apply ANOVA

In [195]:
model1 = smf.ols(formula='Stars ~ C(Continent)', data=df)
results1 = model1.fit()
print (results1.summary())

                            OLS Regression Results                            
Dep. Variable:                  Stars   R-squared:                       0.035
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     18.59
Date:                Sun, 23 Feb 2020   Prob (F-statistic):           3.47e-18
Time:                        11:43:34   Log-Likelihood:                -3649.5
No. Observations:                2577   AIC:                             7311.
Df Residuals:                    2571   BIC:                             7346.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                                  coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

We can see the P-value is 6.87e -19 < 0.05. We can reject the H0 hypothesis. Means there is a relation between the continent and the rate of the ramen and all means are not equals.

In [196]:
m2= df.groupby('Continent').mean()
print (m2)

                Stars
Continent            
Africa       2.833333
America      3.359414
Asia         3.753050
Europe       3.169485
Middle_East  3.583333
Oceania      3.251923


As we have more than two categories, we need to use a Post Hoc Test

In [197]:
mc1 = multi.MultiComparison(df['Stars'], df['Continent'])
res1 = mc1.tukeyhsd()
print(res1.summary())

     Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1      group2   meandiff p-adj   lower   upper  reject
--------------------------------------------------------------
     Africa     America   0.5261    0.9  -1.124  2.1761  False
     Africa        Asia   0.9197 0.5882 -0.7254  2.5649  False
     Africa      Europe   0.3362    0.9 -1.3258  1.9981  False
     Africa Middle_East     0.75    0.9 -1.5748  3.0748  False
     Africa     Oceania   0.4186    0.9 -1.3176  2.1548  False
    America        Asia   0.3936  0.001  0.2379  0.5494   True
    America      Europe  -0.1899 0.3933 -0.4725  0.0926  False
    America Middle_East   0.2239    0.9 -1.4261   1.874  False
    America     Oceania  -0.1075    0.9 -0.6837  0.4687  False
       Asia      Europe  -0.5836  0.001 -0.8359 -0.3313   True
       Asia Middle_East  -0.1697    0.9 -1.8149  1.4754  False
       Asia     Oceania  -0.5011 0.1124 -1.0631  0.0609  False
     Europe Middle_East   0.4138    0.9 -1.2481  2.0758

America vs Asia and Asia vs Europe are set to True. It means the rating is significant over those continents.