In [42]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


# pandas 출력 옵션
pd.set_option('display.float_format', '{:.2f}'.format)

In [43]:
df_listings = pd.read_csv('./data/listings.csv')

In [44]:
import ast # 리스트형 문자열 파싱용 라이브러리
amenities_ser = df_listings['amenities'].apply(ast.literal_eval)

In [45]:
amenity_keywords = {
    "self_checkin": ["Self check-in"],
    "instant_book": ["Instant book", "immediate booking"],
    "kitchen": [
        "kitchen", "refrigerator", "fridge", "stove", "oven", "microwave", "freezer",
        "induction", "gas stove", "electric stove", "dishwasher", 
        "single oven", "double oven", "bistro",
        "kitchenette", "outdoor kitchen"
    ],
    "hair_dryer": ["Hair dryer", "hair dryers", "ドライヤー"],  # "dryer" 제거
    "free_parking": [
        "Free parking", "parking", "garage", "carport", "driveway parking",
        "residential garage", "street parking"
    ],
    "wifi": ["Wifi", "Fast wifi", "internet"],
    "private_bathroom": ["private bathroom", "bedroom bathroom", "en-suite"],
    "bbq_grill": [
        "BBQ grill", "Barbecue", "Private BBQ", "Shared BBQ", "charcoal", 
        "electric", "gas", "barbecue utensils"
    ],
    "washer": ["Washer", "washing machine", "laundromat", "Free washer", "Paid washer"],
    "pets_allowed": ["Pets allowed"],
    "clothes_dryer": [
        "Dryer", "Free dryer", "Paid dryer",
        "Dryer – In building", "Dryer – In unit",
        "Free dryer – In building", "Free dryer – In unit", 
        "Paid dryer – In building", "Paid dryer – In unit",
        "clothes dryer", "laundry dryer"
    ],
    "heating": [
        "Heating", "heater", "Central heating", "Radiant heating", 
        "Portable heater", "heated"
    ],
    "air_conditioning": [
        "AC", "Air conditioning", "Central air conditioning", "Portable air conditioning",
        "split type ductless system", "air conditioner", "cooling"
    ],
    "workspace": ["Dedicated workspace", "workspace", "work space"],
    "iron": ["Iron", "ironing"],
    "pool": [
        "Pool", "swimming", "Private pool", "Shared pool", "indoor pool", 
        "outdoor pool", "pool toys", "Pool view", "pool table"
    ],
    "bathtub": ["Bathtub", "Hot tub", "Private hot tub", "Shared hot tub"],
    "ev_charger": ["EV charger", "electric vehicle", "electric car"],
    "crib": [
        "Crib", "baby bed", "Pack 'n play", "Travel crib", "baby bath",
        "changing table", "baby monitor", "babysitter", "high chair"
    ],
    "king_bed": ["king size", "king bed"],
    "gym": [
        "Gym", "fitness", "exercise equipment", "workout", "stationary bike",
        "treadmill", "yoga mat", "free weights", "elliptical", "workout bench"
    ],
    "breakfast": ["Breakfast", "morning meal"],
    "fireplace": [
        "Indoor fireplace", "fireplace", "electric", "gas", "wood-burning",
        "ethanol", "fireplace guards"
    ],
    "smoking_allowed": ["Smoking allowed"],
    "waterfront": [
        "Waterfront", "beach", "lake", "river", "Beach access", "Lake access",
        "Beach view", "Lake view", "Marina view", "Sea view", "Canal view"
    ],
    "smoke_alarm": ["Smoke alarm", "fire alarm"],
    "carbon_monoxide_alarm": ["Carbon monoxide alarm"],
    # "other": [
    #     "TV", "HDTV", "Netflix", "Amazon Prime", "Disney+", "Hulu", "DVD player",
    #     "sound system", "Bluetooth", "Game console", "Nintendo", "PS4", "Xbox",
    #     "shampoo", "conditioner", "body soap", "clothing storage", "closet",
    #     "wardrobe", "dresser", "hangers", "bed linens", "housekeeping",
    #     "long term stays", "mountain view", "valley view", "resort view",
    #     "ping pong", "climbing wall", "kayak", "skate ramp", "children's books",
    #     "children's toys", "movie theater", "fire pit", "outdoor shower",
    #     "trash compactor", "table corner guards", "keypad", "hot water",
    #     "toaster", "coffee maker", "Nespresso"
    # ]
}

In [46]:
# 영어-한글 매핑 딕셔너리 (나중에 한글로 변환용)
amenity_eng_to_kor = {
    "self_checkin": "셀프 체크인",
    "instant_book": "즉시예약",
    "kitchen": "주방",
    "hair_dryer": "헤어드라이어",
    "free_parking": "무료 주차 공간",
    "wifi": "와이파이",
    "private_bathroom": "침실에 딸린 개인 욕실",
    "bbq_grill": "바베큐 그릴",
    "washer": "세탁기",
    "pets_allowed": "반려동물 동반 가능",
    "clothes_dryer": "건조기",
    "heating": "난방",
    "air_conditioning": "냉방",
    "workspace": "업무 전용 공간",
    "iron": "다리미",
    "pool": "수영장",
    "bathtub": "대형 욕조",
    "ev_charger": "전기차 충전시설",
    "crib": "아기 침대",
    "king_bed": "킹사이즈 침대",
    "gym": "헬스장",
    "breakfast": "조식",
    "fireplace": "실내 벽난로",
    "smoking_allowed": "흡연 가능",
    "waterfront": "수변",
    "smoke_alarm": "화재경보기",
    "carbon_monoxide_alarm": "일산화탄소 경보기",
    "other": "기타"
}

# 한글-영어 매핑 딕셔너리 (반대 변환용)
amenity_kor_to_eng = {v: k for k, v in amenity_eng_to_kor.items()}

In [47]:
amenities_ser2 = amenities_ser.copy()


def classify_amenities(amenities_list):
    new_list = []

    # Series 객체의 반복 가능 값 반복
    for amnt in amenities_list:
        is_changed = False

        for key,value in amenity_keywords.items():
            for key_word in value:
                if key_word.lower() in amnt.lower():
                    new_list.append(key)
                    is_changed = True
                    break
        
        if not is_changed:
            new_list.append('기타')
        
    return list(set(new_list))


amenities_ser2 = amenities_ser2.apply(classify_amenities)

amenities_ser2

0        [clothes_dryer, hair_dryer, fireplace, wifi, h...
1        [clothes_dryer, hair_dryer, fireplace, wifi, h...
2        [clothes_dryer, hair_dryer, wifi, kitchen, was...
3        [clothes_dryer, iron, hair_dryer, wifi, free_p...
4        [clothes_dryer, hair_dryer, iron, wifi, free_p...
                               ...                        
23002    [clothes_dryer, hair_dryer, wifi, heating, was...
23003    [clothes_dryer, hair_dryer, wifi, heating, was...
23004    [clothes_dryer, hair_dryer, wifi, washer, work...
23005    [clothes_dryer, iron, hair_dryer, wifi, heatin...
23006    [clothes_dryer, hair_dryer, iron, wifi, heatin...
Name: amenities, Length: 23007, dtype: object

In [48]:
def has_amenity(amenity_list, target):
    return 1 if target in amenity_list else 0

for k in amenity_keywords.keys():
    df_listings['amnt_'+ k] = amenities_ser2.apply(lambda x: has_amenity(x, k))

In [None]:
df_listings = df_listings.drop('amenities', axis=1)



In [53]:
df_listings

Unnamed: 0,neighbourhood_cleansed,property_type,room_type,accommodates,bathrooms,bedrooms,beds,price,number_of_reviews,first_review,...,amnt_ev_charger,amnt_crib,amnt_king_bed,amnt_gym,amnt_breakfast,amnt_fireplace,amnt_smoking_allowed,amnt_waterfront,amnt_smoke_alarm,amnt_carbon_monoxide_alarm
0,Bunkyo Ku,Entire rental unit,Entire home/apt,3,1.00,0.00,2.00,100000.00,0,,...,0,0,0,0,0,1,0,0,1,0
1,Bunkyo Ku,Entire rental unit,Entire home/apt,8,1.00,2.00,5.00,100000.00,0,,...,0,0,0,0,0,1,0,0,1,0
2,Taito Ku,Entire serviced apartment,Entire home/apt,4,2.00,2.00,2.00,14550.00,24,2023-12-04,...,0,0,0,0,0,0,0,0,1,1
3,Kita Ku,Entire home,Entire home/apt,8,1.00,3.00,6.00,22012.00,15,2024-01-10,...,0,0,0,0,0,0,0,0,1,1
4,Sumida Ku,Entire rental unit,Entire home/apt,3,1.00,1.00,1.00,15429.00,17,2023-12-26,...,0,1,0,0,0,1,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23002,Shinjuku Ku,Entire rental unit,Entire home/apt,4,1.00,1.00,2.00,12306.00,0,,...,0,0,0,0,0,0,0,0,1,0
23003,Shibuya Ku,Entire home,Entire home/apt,11,1.00,4.00,7.00,40732.00,0,,...,0,0,0,0,0,0,0,0,0,0
23004,Minato Ku,Entire home,Entire home/apt,7,1.00,3.00,4.00,64436.00,0,,...,0,0,0,0,0,0,0,0,1,1
23005,Shibuya Ku,Entire rental unit,Entire home/apt,2,1.00,1.00,1.00,17046.00,0,,...,0,0,0,0,0,0,0,0,1,0


In [50]:
# def has_amenity(amenity_list, target):
#     return 1 if target in amenity_list else 0

# # has_kitchen = amenities_ser2.apply(has_amenity)
# # 또는 다른 값으로
# has_wifi = amenities_ser2.apply(lambda x: has_amenity(x, '대형 욕조'))

# has_wifi

In [51]:
# # 방법 1: apply() 메서드 사용
# def is_contained(category_list, target_value='주방'):
#     return target_value in category_list

# # 사용 예시
# kitchen_mask = amenities_ser2.apply(lambda x: '주방' in x)
# print("주방이 있는 숙소:")
# print(kitchen_mask.sum(), "개")

# # 방법 2: str.contains() 사용 (문자열로 변환 후)
# # amenities_ser2.astype(str).str.contains('주방')

# # 방법 3: 리스트 컴프리헨션
# # [True if '주방' in item else False for item in amenities_ser2]

In [52]:
# # 여러 조건을 동시에 확인
# def check_multiple_amenities(amenity_list, target_amenities=['주방', '와이파이', '무료 주차 공간']):
#     """여러 편의시설이 모두 있는지 확인"""
#     return all(amenity in amenity_list for amenity in target_amenities)

# # 주방, 와이파이, 무료 주차가 모두 있는 숙소
# all_three_mask = amenities_ser2.apply(lambda x: check_multiple_amenities(x))
# print(f"주방, 와이파이, 무료 주차가 모두 있는 숙소: {all_three_mask.sum()}개")

# # 특정 편의시설별 개수 확인
# amenity_counts = {}
# for amenity in amenity_keywords.keys():
#     count = amenities_ser2.apply(lambda x: amenity in x).sum()
#     amenity_counts[amenity] = count

# print("\n편의시설별 개수:")
# for amenity, count in sorted(amenity_counts.items(), key=lambda x: x[1], reverse=True):
#     print(f"{amenity}: {count}개")