In [3]:
import pandas as pd

# Load the ramen-ratings.csv dataset
df = pd.read_csv('ramen-ratings.csv')
df.head()

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
0,2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
1,2579,Just Way,Noodles Spicy Hot Sesame Spicy Hot Sesame Guan...,Pack,Taiwan,1.0,
2,2578,Nissin,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
3,2577,Wei Lih,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
4,2576,Ching's Secret,Singapore Curry,Pack,India,3.75,


In [4]:
# Check how large the resulting DataFrame is
df.shape

(2580, 7)

In [5]:
# Get a list of brands of each ramen reviewed
df['Brand']

0            New Touch
1             Just Way
2               Nissin
3              Wei Lih
4       Ching's Secret
             ...      
2575             Vifon
2576           Wai Wai
2577           Wai Wai
2578           Wai Wai
2579          Westbrae
Name: Brand, Length: 2580, dtype: object

In [6]:
# Indexing a location in pandas, get the first 3 rows
df.iloc[[0, 1, 2], 0]

0    2580
1    2579
2    2578
Name: Review #, dtype: int64

In [7]:
# Getting the last 5 elements of the dataset
df.iloc[-5:]

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
2575,5,Vifon,"Hu Tiu Nam Vang [""Phnom Penh"" style] Asian Sty...",Bowl,Vietnam,3.5,
2576,4,Wai Wai,Oriental Style Instant Noodles,Pack,Thailand,1.0,
2577,3,Wai Wai,Tom Yum Shrimp,Pack,Thailand,2.0,
2578,2,Wai Wai,Tom Yum Chili Flavor,Pack,Thailand,2.0,
2579,1,Westbrae,Miso Ramen,Pack,USA,0.5,


In [8]:
# Label based selection in pandas
df.loc[:, 'Country']

0          Japan
1         Taiwan
2            USA
3         Taiwan
4          India
          ...   
2575     Vietnam
2576    Thailand
2577    Thailand
2578    Thailand
2579         USA
Name: Country, Length: 2580, dtype: object

In [9]:
# Manipulating the index with set_index()
df.set_index("Brand")

Unnamed: 0_level_0,Review #,Variety,Style,Country,Stars,Top Ten
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
New Touch,2580,T's Restaurant Tantanmen,Cup,Japan,3.75,
Just Way,2579,Noodles Spicy Hot Sesame Spicy Hot Sesame Guan...,Pack,Taiwan,1,
Nissin,2578,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
Wei Lih,2577,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
Ching's Secret,2576,Singapore Curry,Pack,India,3.75,
...,...,...,...,...,...,...
Vifon,5,"Hu Tiu Nam Vang [""Phnom Penh"" style] Asian Sty...",Bowl,Vietnam,3.5,
Wai Wai,4,Oriental Style Instant Noodles,Pack,Thailand,1,
Wai Wai,3,Tom Yum Shrimp,Pack,Thailand,2,
Wai Wai,2,Tom Yum Chili Flavor,Pack,Thailand,2,


In [10]:
# Conditional selection in pandas demo
df.loc[df.Country == 'Japan']

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
0,2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
6,2574,Acecook,Spice Deli Tantan Men With Cilantro,Cup,Japan,4,
7,2573,Ikeda Shoku,Nabeyaki Kitsune Udon,Tray,Japan,3.75,
8,2572,Ripe'n'Dry,Hokkaido Soy Sauce Ramen,Pack,Japan,0.25,
13,2567,Nissin,Deka Buto Kimchi Pork Flavor,Bowl,Japan,4.5,
...,...,...,...,...,...,...,...
2554,26,Nissin,Soba Noodles With Mayo Mustard Squirt,Pack,Japan,4,
2555,25,Nissin,Demae Ramen Spicy Flavor,Pack,Japan,3,
2556,24,Nissin,Demae Ramen Spicy Seafood With Chili Pepper,Pack,Japan,2,
2567,13,Sapporo Ichiban,Chow Mein,Pack,Japan,5,2012 #4


In [11]:
# Conditional selection for Japan and USA as country and style is cup
df.loc[(df.Country.isin(['Japan', 'USA'])) & (df.Style == 'Cup')]

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
0,2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
2,2578,Nissin,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
6,2574,Acecook,Spice Deli Tantan Men With Cilantro,Cup,Japan,4,
39,2541,Nissin,Cup Noodles Very Veggie Spicy Chicken Flavor R...,Cup,USA,5,
42,2538,Nissin,Cup Noodles Very Veggie Beef Flavor Ramen Nood...,Cup,USA,5,
...,...,...,...,...,...,...,...
2324,256,Nissin,Cup Noodles Shrimp,Cup,USA,3.5,
2364,216,Maruchan,Instant Lunch Roast Chicken Flavor,Cup,USA,2.75,
2424,156,Maruchan,Instant Lunch Cajun Style With chili Piquin Sh...,Cup,USA,3,
2426,154,Nissin,Creamy Chicken,Cup,USA,1.75,


In [12]:
# Conditional selection that finds all rows where the 'Top Ten' columns has actual values
top_ten = df.loc[df['Top Ten'].notna()]

In [13]:
df['Top Ten'] = df['Stars'] == 5.0

In [21]:
df['Top Ten'] = ''
df.loc[df['Stars'] == 5.0, 'Top Ten'] = 'Yes'
df['Top Ten'].value_counts()

Top Ten
    2580
Name: count, dtype: int64

In [22]:
# Using pandas' describe() function to get a summary of Brand column
df.Brand.describe()

count       2580
unique       355
top       Nissin
freq         381
Name: Brand, dtype: object

In [23]:
# Convert 'Stars' to numeric and then get the mean 
df['Stars'] = pd.to_numeric(df['Stars'], errors = 'coerce')
df.Stars.mean()

3.6546759798214974

In [24]:
# Get a list of unique values for Variety
df.Variety.unique()

array(["T's Restaurant Tantanmen ",
       'Noodles Spicy Hot Sesame Spicy Hot Sesame Guan-miao Noodles',
       'Cup Noodles Chicken Vegetable', ...,
       'Hu Tiu Nam Vang ["Phnom Penh" style] Asian Style Instant\xa0Rice\xa0Noodles',
       'Oriental Style Instant Noodles', 'Tom Yum Chili Flavor'],
      dtype=object)

In [25]:
# Get a list of occurences for ramen in each country in the dataset
df.Country.value_counts()

Country
Japan            352
USA              323
South Korea      309
Taiwan           224
Thailand         191
China            169
Malaysia         156
Hong Kong        137
Indonesia        126
Singapore        109
Vietnam          108
UK                69
Philippines       47
Canada            41
India             31
Germany           27
Mexico            25
Australia         22
Netherlands       15
Myanmar           14
Nepal             14
Pakistan           9
Hungary            9
Bangladesh         7
Colombia           6
Brazil             5
Cambodia           5
Fiji               4
Holland            4
Poland             4
Finland            3
Sarawak            3
Sweden             3
Dubai              3
Ghana              2
Estonia            2
Nigeria            1
United States      1
Name: count, dtype: int64

In [26]:
# Using the map() in pandas, Example 1: Clean the Stars column 

In [27]:
# Method 1: Using lambda
df['Stars_Clean'] = df['Stars'].map(lambda x: float(x) if x != 'Unrated' else 0)

In [28]:
# Method 2: Using a function (more readable)
def clean_star_rating(rating):
    """Convert star ratings to numbers, handle 'Unrated'"""
    if rating == 'Unrated':
        return 0
    else:
        return float(rating)

df['Stars_Clean'] = df['Stars'].map(clean_star_rating)


In [29]:
# Example 2: Create categories from countries
def categorize_country(country):
    """Map countries to broader regions"""
    if country in ['Japan', 'South Korea', 'Taiwan', 'China', 'Hong Kong']:
        return 'East Asia'
    elif country in ['Thailand', 'Vietnam', 'Singapore', 'Malaysia']:
        return 'Southeast Asia'
    elif country in ['USA', 'Canada']:
        return 'North America'
    elif country in ['Germany', 'UK', 'Netherlands']:
        return 'Europe'
    else:
        return 'Other'

df['Region'] = df['Country'].map(categorize_country)

In [30]:
# Example 3: Create a dictionary of brand categories
brand_categories = {
    'Nissin': 'Major Brand',
    'Maruchan': 'Major Brand',
    'Top Ramen': 'Major Brand',
    'Nongshim': 'Major Brand'
}

# Use map with a dictionary
df['Brand_Category'] = df['Brand'].map(brand_categories).fillna('Independent')

In [31]:
# Example 4: Transform style descriptions
def style_description(style):
    """Add descriptive text to style"""
    descriptions = {
    'Cup': 'Instant Cup Noodles',
    'Pack': 'Dry Noodle Pack',
    'Bowl': 'Bowl-style Noodles',
    'Tray': 'Tray-style Noodles'
    }
    return descriptions.get(style, f'{style} Style')

df['Style_Description'] = df['Style'].map(style_description)

In [32]:
# Example 5: Cerate rating categories using lambda
df['Rating_Category'] = df['Stars_Clean'].map(
    lambda rating: 'Excellent' if rating >= 4.5
    else 'Good' if rating >= 3.5
    else 'Average' if rating >= 2.5
    else 'Poor' if rating > 0
    else 'Unrated'
)

In [33]:
# Show results
print("Original vs Cleaned Stars:")
print(df[['Stars', 'Stars_Clean']].head())

print("\nCountries and Regions:")
print(df[['Country', 'Region']].head())

print("\nBrand Categories:")
print(df['Brand_Category'].value_counts())

print("\nRating Categories:")
print(df['Rating_Category'].value_counts())

Original vs Cleaned Stars:
   Stars  Stars_Clean
0   3.75         3.75
1   1.00         1.00
2   2.25         2.25
3   2.75         2.75
4   3.75         3.75

Countries and Regions:
  Country         Region
0   Japan      East Asia
1  Taiwan      East Asia
2     USA  North America
3  Taiwan      East Asia
4   India          Other

Brand Categories:
Brand_Category
Independent    2025
Major Brand     555
Name: count, dtype: int64

Rating Categories:
Rating_Category
Good         1233
Excellent     585
Average       509
Poor          224
Unrated        29
Name: count, dtype: int64


In [34]:
# Using groupby() to transform a column of the ramen dataset

In [35]:
# count the number of different ramen brands in the dataset
brands_written = df.groupby('Brand').size()
print(brands_written)

Brand
1 To 3 Noodles       1
7 Select             2
7 Select/Nissin      1
A-One                4
A-Sha Dry Noodle    26
                    ..
Yum Yum             12
Yum-Mie              1
Zow Zow              1
iMee                 4
iNoodle              2
Length: 355, dtype: int64


In [40]:
# sort by #reviews per brand
num_reviews = df.groupby('Review #')['Brand'].max().sort_index()
print(num_reviews)

Review #
1             Westbrae
2              Wai Wai
3              Wai Wai
4              Wai Wai
5                Vifon
             ...      
2576    Ching's Secret
2577           Wei Lih
2578            Nissin
2579          Just Way
2580         New Touch
Name: Brand, Length: 2580, dtype: object


In [58]:
# Get the average review # of ramen per stars
review_mean = df.groupby('Review #')['Stars'].mean()
print(review_mean)

Review #
1       0.50
2       2.00
3       2.00
4       1.00
5       3.50
        ... 
2576    3.75
2577    2.75
2578    2.25
2579    1.00
2580    3.75
Name: Stars, Length: 2580, dtype: float64


In [None]:
# Get the count of country and brand counts
country_brand_counts = df.groupby(['Country', 'Brand']).size().sort_values(