In [77]:
import pandas as pd
import pycountry

class DataLoader:
    """
    Data Loader
    """
    flavours = {
        'beef': 0,
        'chicken': 0,
        'mushroom': 0,
        'laksa': 0,
        'crab': 0,
        'chilli': 0,
        'pepper': 0,
        'tom yam': 0,
        'seafood': 0,
        'spicy': 0,
        'curry': 0,
        'kyushu white': 0,
        'thai': 0,
        'china': 0,
        'japan': 0,
        'tokyo': 0,
        'cream': 0,
        'sriacha': 0,
        'lime': 0,
        'hot': 0,
        'shrimp': 0,
        'tonkotsu': 0,
        'pork': 0,
        'lamb': 0,
        'oriental': 0,
        'tomato': 0
    }
    def __init__(self, filePath):
        """
        init and filter invalid data
        :param filePath: the path of the csv file
        """
        self.filePath = filePath
        self.data = pd.read_csv(filePath)
        self.data['Stars'] = self.data['Stars'].apply(lambda x: 0 if x=='Unrated' else float(x))

    def topReviewForEachItem(self):
        res = self.data.sort_values(by='Review #', ascending=False)
        return res

    def topReviewForEachBrand(self):
        """
        Top brands with the most popular ramens sorted by the number of review
        :return: DataFrame
        """
        res = self.data.groupby('Brand').agg({'Review #': 'sum'}).sort_values(by='Review #', ascending=False)
        return res

    def topStarForEachItem(self):
        """
        Top items sorted by stars
        :return: DataFrame
        """
        res = self.data.sort_values(by='Stars', ascending=False)
        return res


    def topMeanStarsForEachBrand(self):
        """
        Top brands with the most popular ramens sorted by mean of stars
        :return: DataFrame
        """
        res = self.data.groupby('Brand').agg({'Stars': 'mean'}).sort_values(by='Stars', ascending=False)
        return res

    def topCountryForMeanStars(self):
        """
        Top countries with the most popular ramens sorted by mean of stars
        :return: DataFrame
        """
        res = self.data.groupby('Country').agg({'Stars': 'mean'}).sort_values(by='Stars', ascending=False)
        return res

    def styleInCountry(self):
        """
        Record the number of each style consumed in each country
        dic[country][style] = a number
        :return: a dictionary
        """
        dic = {}
        for index, row in self.data.iterrows():
            if row['Country'] not in dic:
                dic[row['Country']] = {}
            else:
                dic[row['Country']][row['Style']] = dic[row['Country']].setdefault(row['Style'],0)+1
        return dic

    def topStyleInCountry(self):
        """
        The most popular styles in each country
        :return: a dictionary
        """
        dic = self.styleInCountry()
        res = {}
        for country, styles in dic.items():
            theMax = 0
            res[country] = []
            for style, number in styles.items():
                if number==theMax:
                    res[country].append((style,number))
                elif number>theMax:
                    theMax = number
                    res[country].clear()
                    res[country].append((style, number))
        return res

    def countFlavour(self, brand=None, country=None):
        """
        count the number of each similar flavour
        :param brand: default is None, meaning count all the brands in.
        It can receive a brand (string) to specify a brand
        :param country: default is None, meaning count all the countries in.
        It can receive a country (string) to specify a country
        :return: dictionary
        """

        flavours = dict(DataLoader.flavours)
        for index, row in self.data.iterrows():
            if brand:
                if row['Brand'] != brand:
                    continue
            if country:
                if row['Country'] != country:
                    continue
            words = row['Variety'].split()
            for word in words:
                if word.lower() in flavours:
                    flavours[word.lower()]+=1
        return flavours
    
    
    def getTotalCount(self, country=None):
        dictToCount = self.countFlavour(country=country)
        total = 0
        for t in dictToCount:
            total += dictToCount[t]
        return total
        

path = './ramen-ratings.csv'
test = DataLoader(path)
print(test.countFlavour(country='Afghanistan'))

{'beef': 0, 'chicken': 0, 'mushroom': 0, 'laksa': 0, 'crab': 0, 'chilli': 0, 'pepper': 0, 'tom yam': 0, 'seafood': 0, 'spicy': 0, 'curry': 0, 'kyushu white': 0, 'thai': 0, 'china': 0, 'japan': 0, 'tokyo': 0, 'cream': 0, 'sriacha': 0, 'lime': 0, 'hot': 0, 'shrimp': 0, 'tonkotsu': 0, 'pork': 0, 'lamb': 0, 'oriental': 0, 'tomato': 0}


In [84]:
data = pd.read_csv('./ramen-ratings.csv')


Unnamed: 0_level_0,Stars
Country,Unnamed: 1_level_1
Brazil,4.35
Sarawak,4.333333
Cambodia,4.2
Malaysia,4.127564
Singapore,4.126147
Indonesia,4.06746
Japan,3.981605
Myanmar,3.946429
Fiji,3.875
Hong Kong,3.801825


In [80]:
data

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
0,2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
1,2579,Just Way,Noodles Spicy Hot Sesame Spicy Hot Sesame Guan...,Pack,Taiwan,1,
2,2578,Nissin,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
3,2577,Wei Lih,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
4,2576,Ching's Secret,Singapore Curry,Pack,India,3.75,
...,...,...,...,...,...,...,...
2575,5,Vifon,"Hu Tiu Nam Vang [""Phnom Penh"" style] Asian Sty...",Bowl,Vietnam,3.5,
2576,4,Wai Wai,Oriental Style Instant Noodles,Pack,Thailand,1,
2577,3,Wai Wai,Tom Yum Shrimp,Pack,Thailand,2,
2578,2,Wai Wai,Tom Yum Chili Flavor,Pack,Thailand,2,


In [81]:

codeToName = {}
for country in pycountry.countries:
    codeToName[country.alpha_3] = country.name
    if country.name == 'Viet Nam':
        codeToName[country.alpha_3] = 'Vietnam'
    elif country.name == 'Taiwan, Province of China':
        codeToName[country.alpha_3] = 'Taiwan'
    elif country.name == 'United States':
        codeToName[country.alpha_3] = 'USA'
    elif country.name == 'Korea, Republic of':
        codeToName[country.alpha_3] = 'South Korea'
    elif country.name == 'United Kingdom':
        codeToName[country.alpha_3] = 'UK'
    elif country.name == 'Netherlands':
        codeToName[country.alpha_3] = 'Holland'
    
def normalize_row(row):
    countryName = codeToName[row['iso_alpha']]
    return test.getTotalCount(country=countryName)


In [82]:
import plotly.express as px
import numpy as np


gapminder = px.data.gapminder().query("year==2007")




gapminder['counts'] = gapminder.apply(lambda row: normalize_row(row), axis=1) 

fig = px.choropleth(gapminder, locations="iso_alpha",
                    color="counts", 
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma)

fig.show()

In [103]:
countryToRating = {}
for index, row in test.topCountryForMeanStars().iterrows():
    countryToRating[index] = row['Stars']


{'Brazil': 4.35,
 'Sarawak': 4.333333333333333,
 'Cambodia': 4.2,
 'Malaysia': 4.1275641025641026,
 'Singapore': 4.126146788990826,
 'Indonesia': 4.067460317460317,
 'Japan': 3.981605113636364,
 'Myanmar': 3.9464285714285716,
 'Fiji': 3.875,
 'Hong Kong': 3.8018248175182485,
 'South Korea': 3.7660194174757278,
 'United States': 3.75,
 'Mexico': 3.73,
 'Bangladesh': 3.7142857142857144,
 'Taiwan': 3.665401785714286,
 'Germany': 3.638888888888889,
 'Poland': 3.625,
 'Hungary': 3.611111111111111,
 'Dubai': 3.5833333333333335,
 'Finland': 3.5833333333333335,
 'Holland': 3.5625,
 'Nepal': 3.5535714285714284,
 'Estonia': 3.5,
 'Ghana': 3.5,
 'USA': 3.457043343653251,
 'China': 3.4218934911242602,
 'India': 3.3951612903225805,
 'Thailand': 3.3848167539267022,
 'Philippines': 3.3297872340425534,
 'Colombia': 3.2916666666666665,
 'Sweden': 3.25,
 'Vietnam': 3.187962962962963,
 'Australia': 3.1386363636363637,
 'Pakistan': 3.0,
 'UK': 2.9971014492753625,
 'Netherlands': 2.4833333333333334,
 'Cana

In [106]:
def normalize_rowII(row):
    countryName = codeToName[row['iso_alpha']]
    return countryToRating.get(countryName, 0)

import plotly.express as px
import numpy as np


gapminder = px.data.gapminder().query("year==2007")



gapminder['average rating'] = gapminder.apply(lambda row: normalize_rowII(row), axis=1) 

fig = px.choropleth(gapminder, locations="iso_alpha",
                    color='average rating', 
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma)

fig.show()

Unnamed: 0_level_0,Stars
Country,Unnamed: 1_level_1
Brazil,4.35
Sarawak,4.333333
Cambodia,4.2
Malaysia,4.127564
Singapore,4.126147
Indonesia,4.06746
Japan,3.981605
Myanmar,3.946429
Fiji,3.875
Hong Kong,3.801825


In [76]:
# This block of code tests which countries need to be normalized to iso code


input_countries = [d for d in test.styleInCountry()]
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3

codes = [countries.get(country, 'Unknown code') for country in input_countries]


ret = []
for i in range(len(input_countries)):
    if len(codes[i]) != 3:
        ret.append(input_countries[i])
print(ret)

['Taiwan', 'USA', 'South Korea', 'Vietnam', 'UK', 'Sarawak', 'Holland', 'Dubai']


In [None]:
# this block of code tests the current 
gapminderII = px.data.gapminder().query("year==2007")
gapminderII