In [30]:
# @title Import library
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
import pandas as pd
import os
import numpy as np
import re

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [78]:
# @title Load data


import os
import pandas as pd

# Path to your folder (replace with your folder path)
folder_path = '/content/drive/MyDrive/Grab - Bootcamp - G2/data'

# Initialize the dictionary
dfs = {}

# Iterate through all CSV files in folder and subfolders
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(root, file)
            try:
                df = pd.read_csv(file_path)
                # Use relative path (excluding base) as key to avoid duplicates
                relative_path = os.path.relpath(file_path, folder_path)
                key = os.path.splitext(relative_path)[0]  # Remove '.csv' extension
                dfs[key] = df
                print(f"✅ Loaded: {relative_path}")
            except Exception as e:
                print(f"⚠️ Skipped {file_path} due to error: {e}")



✅ Loaded: hanoi_hotel_elderly.csv
✅ Loaded: restaurant_or_local_foods_from_foody.csv
✅ Loaded: hanoi_hotel.csv
✅ Loaded: hanoi_attraction_trivisor_web_data.csv
✅ Loaded: data_gg_maps/bệnh viện/bệnh viện.csv
✅ Loaded: data_gg_maps/địa điểm vui chơi/địa điểm vui chơi.csv
✅ Loaded: data_gg_maps/khách sạn/khách sạn.csv
✅ Loaded: data_gg_maps/cafe/cafe.csv
✅ Loaded: data_gg_maps/cửa hàng tiện lợi/cửa hàng tiện lợi.csv
✅ Loaded: data_gg_maps/nhà hàng/nhà hàng.csv
✅ Loaded: data_gg_maps/siêu thị/siêu thị.csv
✅ Loaded: data_gg_maps/quán ăn/quán ăn.csv
✅ Loaded: data_gg_maps/bar/bar.csv
✅ Loaded: foody/binhthuan_food_foody.csv
✅ Loaded: foody/cantho_food_foody.csv
✅ Loaded: foody/danang_food_foody.csv
✅ Loaded: foody/haiphong_food_foody.csv
✅ Loaded: foody/hochiminh_food_foody.csv
✅ Loaded: foody/khanhhoa_food_foody.csv
✅ Loaded: foody/lamdong_food_foody.csv
✅ Loaded: foody/nhatrang_food_foody.csv
✅ Loaded: foody/vungtau_food_foody.csv
✅ Loaded: boo

# Hotel data processing + EDA

In [79]:
# @title Load hotel data
# Filter dfs dictionary for keys containing 'hotel' or 'bookingweb'
hotel_dfs = {
    name: df
    for name, df in dfs.items()
    if 'hotel' in name.lower() or 'bookingweb' in name.lower()
}

# Print out matched keys
print("Matched files:")
for key in hotel_dfs:
    print(f"  - {key}")


Matched files:
  - hanoi_hotel_elderly
  - hanoi_hotel
  - bookingweb/dalat_hotel
  - bookingweb/vungtau_hotel
  - bookingweb/nhatrang_hotel
  - bookingweb/danang_hotel_1
  - bookingweb/danang_hotel
  - bookingweb/hochiminh_hotel


In [None]:
# @title preprocess before concat
for name, df in hotel_dfs.items():
    print(f"\n📝 Data Preview for: {name}")
    print("=" * (20 + len(name)))

    df = df.copy()

    # Replace '[]' strings with NaN
    df.replace('[]', np.nan, inplace=True)

    # Drop columns that are entirely NaN
    df.dropna(axis=1, how='all', inplace=True)

    # Drop 'price_source' if it exists
    if 'price_source' in df.columns:
        df.drop(columns=['price_source'], inplace=True)

    # Extract city from filename pattern like 'bookingweb/nhatrang_final'
    city_match = re.search(r'([a-zA-Z]+)_hotel', name)
    if city_match:
        city = city_match.group(1)
        df['city'] = city
    else:
        df['city'] = np.nan  # or 'unknown'

    # Update dictionary
    hotel_dfs[name] = df

    # Print preview
    print("\n" + "-"*80)
    print("\n📌 Columns:")
    print(df.columns)

In [86]:
# @title Concat data
# 📌 Start with an empty list to store standardized DataFrames
standardized_dfs = []

# Loop through all dfs in your hotel_dfs dictionary
for name, df in hotel_dfs.items():
    df = df.copy()

    # Add 'elderly_friendly' column with default False if not present
    if 'elderly_friendly' not in df.columns:
        df['elderly_friendly'] = False

    # Optional: Ensure consistent column order if needed
    standardized_dfs.append(df)

# 🧩 Concatenate all into one big DataFrame
all_hotels = pd.concat(standardized_dfs, ignore_index=True)

# ✅ Done
print(all_hotels.shape)
all_hotels.head()

(20643, 13)


Unnamed: 0,name,link,price,discounted_price,tax_info,rating,location,description,images,room_types,elderly_friendly,city,room_info
0,NT Elysian hotel,https://www.booking.com/hotel/vn/nt-elysian.vi...,VND 744.480,VND 744.480,Đã bao gồm thuế và phí,"Đạt điểm 9,39,3Tuyệt hảo 792 đánh giá","Quận Hoàn Kiếm, Hà Nội",Nằm tại vị trí thuận tiện ở trung tâm Hà Nội...,[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Phòng Superior Giường Đôi', 'bed_ty...",True,hanoi,
1,Eco Nest Hotel & Apartment,https://www.booking.com/hotel/vn/eco-nest-amp-...,VND 394.181,VND 394.181,Đã bao gồm thuế và phí,"Đạt điểm 7,47,4Tốt 671 đánh giá","Quận Hoàn Kiếm, Hà Nội",Eco Nest Hotel & Apartment cung cấp chỗ nghỉ t...,[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Studio Tiêu Chuẩn', 'bed_type': 'N/...",True,hanoi,
2,VVIP Suite,https://www.booking.com/hotel/vn/vvip-suite.vi...,VND 663.200,VND 663.200,Đã bao gồm thuế và phí,,"Quận Đống Đa, Hà Nội","Nằm ở Hà Nội, cách Bảo tàng mỹ thuật Việt Na...",[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Căn Hộ 1 Phòng Ngủ', 'bed_type': 'N...",True,hanoi,
3,Apartment 1BR-FL24th-R105 Building-Vinhomes Oc...,https://www.booking.com/hotel/vn/1brfl16th-rub...,VND 494.250,VND 494.250,Đã bao gồm thuế và phí,"Đạt điểm 9,79,7Xuất sắc 4 đánh giá",Hà Nội,"Nằm ở Hà Nội, cách Trung tâm mua sắm AEON MA...",[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Căn Hộ Có Tầm Nhìn Và Ban Công', 'b...",True,hanoi,
4,Apartment 1BR-FL22th-R103 Building-Vinhome Oce...,https://www.booking.com/hotel/vn/1br-fl22th-ru...,VND 494.250,VND 494.250,Đã bao gồm thuế và phí,"Đạt điểm 9,79,7Xuất sắc 3 đánh giá",Hà Nội,"Tọa lạc ở Hà Nội, cách Trung tâm mua sắm AEO...",[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Căn Hộ Có Ban Công', 'bed_type': 'N...",True,hanoi,


In [87]:
# @title function
def extract_rating(text):
    if isinstance(text, str):
        match = re.search(r'(\d+,\d+)', text)
        if match:
            return float(match.group(1).replace(',', '.'))
    return None  # or np.nan

def split_price(price_str):
    if pd.isna(price_str): return pd.Series([np.nan, np.nan])

    # Regex to extract currency and numeric value
    match = re.match(r'([A-Z]+)\s*([\d.,]+)', price_str.strip())
    if match:
        currency = match.group(1)
        number_str = match.group(2).replace('.', '').replace(',', '.')
        try:
            value = float(number_str)
        except:
            value = np.nan
        return pd.Series([currency, value])
    return pd.Series([np.nan, np.nan])

In [89]:
# @title reformat
if all_hotels['price'].equals(all_hotels['discounted_price']):
  all_hotels.drop(columns=['discounted_price'], inplace=True)
# Convert numerical value
all_hotels['rating'] = all_hotels['rating'].apply(extract_rating)
all_hotels[['unit', 'price']] = all_hotels['price'].apply(split_price)

# drop duplicates
all_hotels.drop_duplicates(inplace=True)

# fill missing value = avg technique
for col in all_hotels.select_dtypes(include=[np.number]).columns:
    all_hotels[col] = all_hotels[col].fillna(round(all_hotels[col].mean(),2))

In [90]:
all_hotels.head(3)

Unnamed: 0,name,link,price,tax_info,rating,location,description,images,room_types,elderly_friendly,city,room_info,unit
0,NT Elysian hotel,https://www.booking.com/hotel/vn/nt-elysian.vi...,744480.0,Đã bao gồm thuế và phí,9.39,"Quận Hoàn Kiếm, Hà Nội",Nằm tại vị trí thuận tiện ở trung tâm Hà Nội...,[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Phòng Superior Giường Đôi', 'bed_ty...",True,hanoi,,VND
1,Eco Nest Hotel & Apartment,https://www.booking.com/hotel/vn/eco-nest-amp-...,394181.0,Đã bao gồm thuế và phí,7.47,"Quận Hoàn Kiếm, Hà Nội",Eco Nest Hotel & Apartment cung cấp chỗ nghỉ t...,[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Studio Tiêu Chuẩn', 'bed_type': 'N/...",True,hanoi,,VND
2,VVIP Suite,https://www.booking.com/hotel/vn/vvip-suite.vi...,663200.0,Đã bao gồm thuế và phí,8.42,"Quận Đống Đa, Hà Nội","Nằm ở Hà Nội, cách Bảo tàng mỹ thuật Việt Na...",[{'url': 'https://cf.bstatic.com/xdata/images/...,"[{'name': 'Căn Hộ 1 Phòng Ngủ', 'bed_type': 'N...",True,hanoi,,VND


In [91]:
all_hotels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20643 entries, 0 to 20642
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              20643 non-null  object 
 1   link              20643 non-null  object 
 2   price             20643 non-null  float64
 3   tax_info          20643 non-null  object 
 4   rating            20643 non-null  float64
 5   location          20643 non-null  object 
 6   description       20638 non-null  object 
 7   images            20638 non-null  object 
 8   room_types        10190 non-null  object 
 9   elderly_friendly  20643 non-null  bool   
 10  city              20643 non-null  object 
 11  room_info         80 non-null     object 
 12  unit              20643 non-null  object 
dtypes: bool(1), float64(2), object(10)
memory usage: 1.9+ MB


In [92]:
all_hotels.to_csv('/content/drive/MyDrive/Grab - Bootcamp - G2/processed_data/hotel_processed.csv', index=False)

## EDA

In [None]:
hotel.info()
hotel.describe(include='all')

# FnB data

## Processing

In [29]:
# @title Read data
# ggmap data: bar, cafe, convenience store, nhà hàng, quán ăn
base_path = '/content/drive/MyDrive/Grab - Bootcamp - G2/data/data_gg_maps'

# Folders to include
target_folders = ['bar', 'cafe', 'cửa hàng tiện lợi', 'nhà hàng', 'quán ăn']

# Dictionary to store DataFrames
ggmap = {}

# Loop through folders and read CSVs
for folder in target_folders:
    folder_path = os.path.join(base_path, folder)
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            key = f"{folder}/{filename}"
            ggmap[key] = pd.read_csv(file_path)
            print(f"{key}")

# foody
food = dfs['restaurant_or_local_foods_from_foody']
bar = ggmap['bar/bar.csv']
cafe = ggmap['cafe/cafe.csv']
convenience_store = ggmap["cửa hàng tiện lợi/cửa hàng tiện lợi.csv"]
restaurant = ggmap["nhà hàng/nhà hàng.csv"]
food_spot = ggmap["quán ăn/quán ăn.csv"]

bar/bar.csv
cafe/cafe.csv
cửa hàng tiện lợi/cửa hàng tiện lợi.csv
nhà hàng/nhà hàng.csv
quán ăn/quán ăn.csv


In [33]:
restaurant.head(3)

Unnamed: 0,name,address,vicinity,phone,international_phone,rating,total_ratings,price_level,price_description,website,...,photos,editorial_summary,plus_code,utc_offset,opening_hours,open_now,current_opening_hours,current_open_now,features,reviews
0,Nhà Hàng Ngói Vàng,"D04_L19, Đường Chùa Võ/An Phú Shop Villa/D04 K...","D04_L19, Đường Chùa Võ/An Phú Shop Villa/D04 K...",0989 855 433,+84 989 855 433,4.7,147.0,,,,...,[{'url': 'https://maps.googleapis.com/maps/api...,,"{'compound_code': 'XQJ3+H2 Hà Đông, Hà Nội, Vi...",420.0,"['Thứ Hai: 09:00–22:00', 'Thứ Ba: 09:00–22:00'...",True,"['Thứ Hai: 09:00–22:00', 'Thứ Ba: 09:00–22:00'...",True,"{'delivery': True, 'dine_in': True, 'takeout':...",[]
1,Nhà Hàng Mai Châu 2,"Liền kề 9, Ô số 18, Khu đô thị Văn Phú, Hà Đôn...","Liền kề 9, Ô số 18, Khu đô thị Văn Phú",0986 638 289,+84 986 638 289,4.3,449.0,,,https://nhahangmaichau.com/,...,[{'url': 'https://maps.googleapis.com/maps/api...,,"{'compound_code': 'XQ57+8M Hà Đông, Hà Nội, Vi...",420.0,"['Thứ Hai: 09:00–22:00', 'Thứ Ba: 09:00–22:00'...",True,"['Thứ Hai: 09:00–22:00', 'Thứ Ba: 09:00–22:00'...",True,"{'delivery': True, 'dine_in': True, 'takeout':...",[]
2,Nhà Hàng Hà Nội Phố,"BT9-12 Khu Đô Thị mới An Hưng, Khu đô thị An H...","Dương Nội, BT9-12 Khu Đô Thị mới An Hưng",0814 660 999,+84 814 660 999,4.2,68.0,,,,...,[{'url': 'https://maps.googleapis.com/maps/api...,,"{'compound_code': 'XQG4+4J Hà Đông, Hà Nội, Vi...",420.0,"['Thứ Hai: 06:00–23:00', 'Thứ Ba: 06:00–23:00'...",True,"['Thứ Hai: 06:00–23:00', 'Thứ Ba: 06:00–23:00'...",True,"{'delivery': True, 'dine_in': True, 'takeout':...",[]


In [42]:
restaurant.reviews.unique()

array(['[]', nan], dtype=object)

In [47]:
restaurant[(restaurant['reviews'] != '[]') & (~restaurant['reviews'].isna())]
# -> there's no review

Unnamed: 0,name,address,vicinity,phone,international_phone,rating,total_ratings,price_level,price_description,website,...,photos,editorial_summary,plus_code,utc_offset,opening_hours,open_now,current_opening_hours,current_open_now,features,reviews


In [57]:
bar.price_description.unique(), bar.price_level.unique(), cafe.price_description.unique(), cafe.price_level.unique(), food_spot.price_description.unique(), food_spot.price_level.unique()

(array([nan, 'Trung bình', 'Rẻ', 'Đắt'], dtype=object),
 array([nan,  2.,  1.,  3.]),
 array([nan, 'Trung bình', 'Rẻ', 'Đắt'], dtype=object),
 array([nan,  2.,  1.,  3.]),
 array([nan, 'Trung bình', 'Rẻ', 'Đắt'], dtype=object),
 array([nan,  2.,  1.,  3.]))

In [58]:
food_spot.head(3)

Unnamed: 0,name,address,phone,rating,total_ratings,price_level,website,type,place_id,opening_hours,...,location,photos,editorial_summary,plus_code,utc_offset,open_now,current_opening_hours,current_open_now,features,reviews
0,Nem nướng Mỗ Lao,"54 Ng. 140 Đ. Trần Phú, P. Mộ Lao, Hà Đông, Hà...",0943 363 663,4.2,211.0,,https://shopeefood.vn/ha-noi/nem-nuong-mo-lao,quán ăn,ChIJs0GvsdKsNTERXzY6BZuV5eE,"['Thứ Hai: 16:00–23:00', 'Thứ Ba: 16:00–22:30'...",...,,,,,,,,,,
1,Nem Nướng Xuân Dần 3,"Unnamed Road, XQ79+VX4, Khu đô thị Văn Phú, Hà...",0982 401 986,3.9,217.0,2.0,,quán ăn,ChIJL4Pk39lSNDER0fiVm1co4MI,"['Thứ Hai: 09:30–22:30', 'Thứ Ba: 09:30–22:30'...",...,,,,,,,,,,
2,Quán Ăn X76,"172 QL21B, Phú Lương, Hà Đông, Hà Nội, Việt Nam",0985 666 018,4.0,142.0,,,quán ăn,ChIJ68rv3pVSNDERJP7ppwAiYow,"['Thứ Hai: 08:00–22:30', 'Thứ Ba: 08:00–22:30'...",...,,,,,,,,,,


In [51]:
restaurant.price_description.unique(), restaurant.price_level.unique()
# -> There's no price

(array([], dtype=object), array([], dtype=float64))

In [31]:
restaurant.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1022 entries, 0 to 1021
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   name                   1022 non-null   object 
 1   address                1022 non-null   object 
 2   vicinity               465 non-null    object 
 3   phone                  971 non-null    object 
 4   international_phone    439 non-null    object 
 5   rating                 1021 non-null   float64
 6   total_ratings          1021 non-null   float64
 7   price_level            386 non-null    float64
 8   price_description      173 non-null    object 
 9   website                324 non-null    object 
 10  google_maps_url        465 non-null    object 
 11  business_status        465 non-null    object 
 12  permanently_closed     465 non-null    object 
 13  type                   1022 non-null   object 
 14  place_id               1022 non-null   object 
 15  loca

In [53]:
food.price_range.unique()

array(["{'min': None, 'max': None}", nan], dtype=object)

In [32]:
food.head(3)

Unnamed: 0,id,name,address,rating,total_reviews,total_pictures,phone,photo_url,url,location,reviews,services,is_delivery,is_booking,is_opening,price_range,crawled_at,last_page,items_collected,last_update
0,10337.0,Phở Cao Vân,"25 Mạc Đĩnh Chi, Quận 1, TP. HCM",6.574,32.0,132.0,(028) 38 220 509 - 0908 357 209,https://images.foody.vn/res/g2/10337/prof/s640...,https://www.foody.vn/ho-chi-minh/pho-cao-van,"{'lat': 10.784871, 'lon': 106.699262}","[{'user': 'Huy', 'rating': 5.0, 'comment': 'Mớ...","[{'name': 'ShopeeFood', 'url': 'https://www.de...",True,False,True,"{'min': None, 'max': None}",2025-04-19T20:34:48.035913,,,
1,719632.0,Texas Chicken - Vạn Hạnh Mall,"16 Vạn Hạnh Mall, 11 Sư Vạn Hạnh, P. 12, Quận ...",6.38,21.0,113.0,Đang cập nhật,https://images.foody.vn/res/g72/719632/prof/s6...,https://www.foody.vn/ho-chi-minh/texas-chicken...,"{'lat': 10.770823, 'lon': 106.669862}","[{'user': 'Nga Trần', 'rating': 8.0, 'comment'...","[{'name': 'ShopeeFood', 'url': 'https://www.de...",True,False,True,"{'min': None, 'max': None}",2025-04-19T20:55:15.340429,,,
2,1052688.0,Long Bánh Tráng Trộn - Bánh Tráng Cuốn,"240 Bùi Đình Túy, P. 12, Quận Bình Thạnh, TP. HCM",3.934,3.0,2.0,Đang cập nhật,https://images.foody.vn/res/g106/1052688/prof/...,https://www.foody.vn/ho-chi-minh/long-banh-tra...,"{'lat': 10.8075456, 'lon': 106.7014461}","[{'user': 'Nị', 'rating': 7.4, 'comment': 'Cảm...","[{'name': 'ShopeeFood', 'url': 'https://www.de...",True,False,True,"{'min': None, 'max': None}",2025-04-19T20:53:13.310726,,,


## FnB EDA

# Attraction data processing + EDA

In [11]:
coffee= pd.read_csv('/content/drive/MyDrive/Grab - Bootcamp - G2/data/data_gg_maps/địa điểm vui chơi/địa điểm vui chơi.csv')
coffee.head(3)

Unnamed: 0,name,address,vicinity,phone,international_phone,rating,total_ratings,price_level,price_description,website,...,photos,editorial_summary,plus_code,utc_offset,opening_hours,open_now,current_opening_hours,current_open_now,features,reviews
0,Khu vui chơi trẻ em Phú Huyền,"Cạnh nhà Văn Hóa, thôn Lưu Xá, Chương Mỹ, Hà N...","Cạnh nhà Văn Hóa, thôn Lưu Xá",0857 991 888,+84 857 991 888,5.0,1.0,,,,...,[{'url': 'https://maps.googleapis.com/maps/api...,,"{'compound_code': 'RP66+R8 Chương Mỹ, Hà Nội, ...",420.0,"['Thứ Hai: 08:30–21:00', 'Thứ Ba: 08:30–21:00'...",True,"['Thứ Hai: 08:30–21:00', 'Thứ Ba: 08:30–21:00'...",True,"{'delivery': False, 'dine_in': False, 'takeout...",[]
1,Khu vui chơi Wolfoo World Aeon Mall Hà Đông,"Dương Nội, Hà Nội, Việt Nam",Dương Nội,0967 464 463,+84 967 464 463,3.0,14.0,,,https://wolfoocity.vn/,...,[{'url': 'https://maps.googleapis.com/maps/api...,,"{'compound_code': 'XQQ2+XJ Hà Đông, Hà Nội, Vi...",420.0,"['Thứ Hai: 10:00–22:00', 'Thứ Ba: 10:00–22:00'...",True,"['Thứ Hai: 10:00–22:00', 'Thứ Ba: 10:00–22:00'...",True,"{'delivery': False, 'dine_in': False, 'takeout...",[]
2,Khu vui chơi Wolfoo World Vincom Mega Mall Sma...,"L3-38, tầng 3, Trung tâm thương mại Vincom Meg...","Đại Mỗ, L3-38, tầng 3, Trung tâm thương mại Vi...",0967 464 463,+84 967 464 463,3.7,67.0,,,https://wolfoocity.vn/,...,[{'url': 'https://maps.googleapis.com/maps/api...,,"{'compound_code': '2Q43+FC Nam Từ Liêm, Hà Nội...",420.0,"['Thứ Hai: 04:15–23:58', 'Thứ Ba: 04:15–23:58'...",True,"['Thứ Hai: 04:15–23:58', 'Thứ Ba: 04:15–23:58'...",True,"{'delivery': False, 'dine_in': False, 'takeout...",[]


In [12]:
coffee.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 558 entries, 0 to 557
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   name                   558 non-null    object 
 1   address                558 non-null    object 
 2   vicinity               277 non-null    object 
 3   phone                  314 non-null    object 
 4   international_phone    157 non-null    object 
 5   rating                 490 non-null    float64
 6   total_ratings          490 non-null    float64
 7   price_level            4 non-null      float64
 8   price_description      2 non-null      object 
 9   website                157 non-null    object 
 10  google_maps_url        278 non-null    object 
 11  business_status        278 non-null    object 
 12  permanently_closed     278 non-null    object 
 13  type                   558 non-null    object 
 14  place_id               558 non-null    object 
 15  locati

In [59]:
attraction.loc[0,'url']

'https://www.tripadvisor.com/Attraction_Review-g293924-d4971485-Reviews-Gallery27-Hanoi.html'

In [6]:
attraction = dfs['hanoi_attraction_trivisor_web_data']
attraction.head(3)

Unnamed: 0,name,url,address,duration,type,categories,image_urls,main_image,price,rating,description,opening_hours,reviews
0,Gallery27,https://www.tripadvisor.com/Attraction_Review-...,Reach out directlyVisit websiteCall,9 min,Attraction,"['Ancient Ruins in Hanoi', 'Also popular with ...",['https://dynamic-media-cdn.tripadvisor.com/me...,https://dynamic-media-cdn.tripadvisor.com/medi...,$86.00,5.0,,,
1,1960 Cafe,https://www.tripadvisor.com/Attraction_Review-...,,2 miN,Attraction,"['Cigar Bars in Hanoi', 'Bars & Clubs in Hanoi...",['https://dynamic-media-cdn.tripadvisor.com/me...,https://dynamic-media-cdn.tripadvisor.com/medi...,$86.00,5.0,Live music cafe & bar in industrial vintage st...,,
2,Grandpa's Garden,https://www.tripadvisor.com/Attraction_Review-...,,1 hour,Attraction,"['Bus Tours in Hanoi', 'Sightseeing Tours in H...",,,$86.00,5.0,Join us to make natural soap from Vietnamese H...,,


In [7]:
attraction.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 995 entries, 0 to 994
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   name           995 non-null    object 
 1   url            995 non-null    object 
 2   address        54 non-null     object 
 3   duration       577 non-null    object 
 4   type           995 non-null    object 
 5   categories     538 non-null    object 
 6   image_urls     680 non-null    object 
 7   main_image     680 non-null    object 
 8   price          917 non-null    object 
 9   rating         964 non-null    float64
 10  description    671 non-null    object 
 11  opening_hours  580 non-null    object 
 12  reviews        390 non-null    object 
dtypes: float64(1), object(12)
memory usage: 101.2+ KB
