In [1]:
# Import dependencies
import pandas as pd
import ast                          # importing abstract syntax tree to allow the parsing of string based lists as lists  

### **Import and Limit CSV**

In [2]:
# Filepath
csv_filepath =  "../../data/processed/cleaned_listings.csv"

# Read CSV
listings_cleaned = pd.read_csv(csv_filepath)

# Limit CSV
listings_amenities = listings_cleaned[['id', 'amenities']].copy()

# Display CSV
listings_amenities

Unnamed: 0,id,amenities
0,696407278180533419,"[""Hair dryer"", ""Central heating"", ""Paid parkin..."
1,696457318817239920,"[""Hair dryer"", ""Central heating"", ""Shampoo"", ""..."
2,696460661218975906,"[""Hair dryer"", ""Central heating"", ""Shampoo"", ""..."
3,696602542310304703,"[""Hair dryer"", ""Room-darkening shades"", ""Keypa..."
4,696638535490228478,"[""Pool table"", ""Dedicated workspace"", ""Exercis..."
...,...,...
15106,1366954101885217722,"[""Heating"", ""Shampoo"", ""Iron"", ""Body soap"", ""B..."
15107,1366970333908472070,"[""Hair dryer"", ""Room-darkening shades"", ""Eleva..."
15108,1366999672978459092,"[""Pool table"", ""Fire extinguisher"", ""Outdoor d..."
15109,1367290343089381102,"[""Keypad"", ""Heating"", ""Iron"", ""Body soap"", ""Be..."


### **Apply Abstract Syntax Tree**
Allows the stringified list in amenities to be turned into a accessible list

In [3]:
# Apply AST
listings_amenities['amenities'] = listings_amenities['amenities'].apply(ast.literal_eval)

### **Explode Dataset**

In [4]:
# Explode amenities Column
exploded_amenities = listings_amenities.explode('amenities')

# Display Exploded amenities
exploded_amenities

Unnamed: 0,id,amenities
0,696407278180533419,Hair dryer
0,696407278180533419,Central heating
0,696407278180533419,Paid parking garage on premises
0,696407278180533419,Shampoo
0,696407278180533419,Iron
...,...,...
15110,1367402901303182875,Fire pit
15110,1367402901303182875,BBQ grill
15110,1367402901303182875,Washer
15110,1367402901303182875,Pool


In [None]:
# List of Amenities (to KEEP)
basic_amenities = [
    "Smoke alarm", "Carbon monoxide alarm", "Hot water", "Heating", "Essentials", 
    "Bed linens", "Shampoo", "Shower gel", "Body soap", "Hangers", "Hair dryer",
    "Conditioner", "Fire extinguisher", "Cleaning products"
]

convenience_amenities = [
    "Wifi", "Kitchen", "Microwave", "Refrigerator", "Freezer", "Cooking basics",
    "Stove", "Oven", "Iron", "Air conditioning", "TV", "Washer", "Dishwasher",
    "Coffee maker", "Hot water kettle", "Self check-in", "Elevator",
    "Dedicated workspace", "Room-darkening shades", "Free parking on premises",
    "Dishes and silverware", "Dining table", "Toaster", "Extra pillows and blankets",
    "First aid kit", "Wine glasses", "Bathtub"
]

special_amenities = [
    "Hot tub", "Pool", "Private entrance", "BBQ grill", "Fire pit",
    "Outdoor dining area", "Patio or balcony", "Gym", "Lake access",
    "Indoor fireplace", "Sound system", "Game console", "EV charger"
]

complete_amenities = basic_amenities + convenience_amenities + special_amenities

In [6]:
# Standardized Amenities
amenities_clean = []

# Cycle and filter out the amenities on the lists above
for index, row in exploded_amenities.iterrows():
    if row['amenities'] not in complete_amenities:
        amenities_clean.append(row)

# Convert amenities_cleaned back into Dataframe
amenities_cleaned = pd.DataFrame(amenities_clean)

In [9]:
top_amenities = amenities_cleaned['amenities'].value_counts().head(20)

top_amenities

amenities
Dishes and silverware                    11548
Fire extinguisher                         9219
Cleaning products                         8268
Long term stays allowed                   8171
Dining table                              8133
Freezer                                   7751
Toaster                                   7602
Extra pillows and blankets                7307
First aid kit                             6984
Shower gel                                6980
Oven                                      6726
Wine glasses                              6674
Bathtub                                   6360
Stove                                     6029
Conditioner                               6016
Coffee                                    5694
Central heating                           5556
Exterior security cameras on property     4809
Free dryer – In unit                      4541
Dryer                                     4369
Name: count, dtype: int64