**FEATURE ENGINEERING:**

IMPORTING LIBRARIES:

In [171]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [173]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows",100)
pd.set_option("display.width",1000)

In [174]:
data_filter = pd.read_csv("/content/geneva_airbnb_filtered.csv")

In [175]:
data_filter.shape

(1957, 67)

In [176]:
data_obj = data_filter.select_dtypes(include=["object"])

In [177]:
data_obj.columns


Index(['listing_url', 'last_scraped', 'source', 'name', 'description', 'picture_url', 'host_url', 'host_since', 'host_location', 'host_response_time', 'host_is_superhost', 'host_thumbnail_url', 'host_picture_url', 'host_verifications', 'host_has_profile_pic', 'host_identity_verified', 'neighbourhood_cleansed', 'property_type', 'room_type', 'bathrooms_text', 'amenities', 'calendar_last_scraped', 'instant_bookable'], dtype='object')

The next process is encoding the categorical features.Encoding all the categorical features is unnecessary. We like to drop the following features

listing_url, picture_url, host_url, host_thumbnail_url, host_picture_url — no predictive value  
name, description	-too textual and unstructured (can be used for NLP)  
host_verifications  
last_scraped,calendar_last_scraped, host_since — are required to extract “host experience in years”  
source and bathrooms_text	.

The features to be encoded are host_response_time,host_is_superhost,host_has_profile_pic,host_identity_verified,neighbourhood_cleansed,property_type,room_type,instant_bookable.

In [178]:
categorical_features = [
    'host_response_time',
    'host_is_superhost',
    'host_has_profile_pic',
    'host_identity_verified',
    'neighbourhood_cleansed',
    'property_type',
    'room_type',
    'instant_bookable'
]


In [179]:
for col in categorical_features:
    print(f"{col}: {data_filter[col].unique()} unique values")


host_response_time: ['within a few hours' 'within an hour' 'within a day' 'Unknown'
 'a few days or more'] unique values
host_is_superhost: ['f' 't'] unique values
host_has_profile_pic: ['t' 'f'] unique values
host_identity_verified: ['t' 'f'] unique values
neighbourhood_cleansed: ['Commune de Genève' 'Versoix' 'Genthod' 'Meinier' 'Chêne-Bougeries'
 'Carouge' 'Pregny-Chambésy' 'Plan-les-Ouates' 'Vernier' 'Veyrier'
 'Vandoeuvres' 'Confignon' 'Thônex' 'Meyrin' 'Collonge-Bellerive'
 'Cologny' 'Bellevue' 'Onex' 'Lancy' 'Grand-Saconnex' 'Soral'
 'Chêne-Bourg' 'Bardonnex' 'Hermance' 'Anières' 'Céligny' 'Russin'
 'Troinex' 'Presinge' 'Bernex' 'Laconnex' 'Corsier' 'Satigny' 'Avully'
 'Collex-Bossy' 'Jussy' 'Chancy' 'Dardagny' 'Choulex' 'Cartigny'
 'Puplinge'] unique values
property_type: ['Private room in rental unit' 'Entire rental unit' 'Entire loft'
 'Entire condo' 'Private room in home' 'Private room in condo'
 'Entire guesthouse' 'Entire home' 'Shared room in loft'
 'Entire serviced apart

In [180]:
columns_to_drop = [
    'id','host_id','listing_url', 'last_scraped', 'source', 'name', 'description', 'picture_url',
    'host_url', 'host_since', 'host_location', 'host_thumbnail_url', 'host_picture_url',
    'host_verifications', 'calendar_last_scraped', 'bathrooms_text'
]


In [181]:
data_filter.drop(columns=columns_to_drop, axis=1, inplace=True)

In [182]:
numeric_cols = data_filter.select_dtypes(include=['float64', 'int64'])

**ENCODING CATEGORICAL VARIABLES:**

In [184]:
binary_cols = ['host_is_superhost', 'host_has_profile_pic', 'host_identity_verified', 'instant_bookable']


In [185]:
for col in binary_cols:
    if col in data_filter.columns:
        data_filter[col] =data_filter[col].astype(str).str.lower().map({'t': 1, 'true': 1, 'f': 0, 'false': 0})


**One-Hot Encoding:**

In [186]:
# One-Hot Encoding for nominal categorical features
one_hot_cols = ['room_type', 'property_type', 'neighbourhood_cleansed', 'host_response_time']

data_filter= pd.get_dummies(data_filter, columns=one_hot_cols, drop_first=True)


In [187]:
# Convert all boolean columns to integers (True→1, False→0)
data_final= data_filter.astype({col: int for col in data_filter.select_dtypes("bool").columns})


In [188]:
data_final.drop(columns=['amenities', 'has_availability'], inplace=True, errors='ignore')


In [189]:
data_final.shape


(1957, 124)

In [191]:
data_final.head(5)

Unnamed: 0,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,host_has_profile_pic,host_identity_verified,latitude,longitude,accommodates,bathrooms,bedrooms,beds,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,availability_eoy,number_of_reviews_ly,estimated_occupancy_l365d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,room_type_Private room,room_type_Shared room,property_type_Camper/RV,property_type_Casa particular,property_type_Entire cabin,property_type_Entire condo,property_type_Entire cottage,property_type_Entire guest suite,property_type_Entire guesthouse,property_type_Entire home,property_type_Entire loft,property_type_Entire place,property_type_Entire rental unit,property_type_Entire serviced apartment,property_type_Entire townhouse,property_type_Entire vacation home,property_type_Entire villa,property_type_Houseboat,property_type_Private room in bed and breakfast,property_type_Private room in casa particular,property_type_Private room in chalet,property_type_Private room in condo,property_type_Private room in guest suite,property_type_Private room in home,property_type_Private room in loft,property_type_Private room in rental unit,property_type_Private room in serviced apartment,property_type_Private room in townhouse,property_type_Private room in villa,property_type_Room in aparthotel,property_type_Room in boutique hotel,property_type_Room in hotel,property_type_Shared room in loft,property_type_Shared room in rental unit,property_type_Yurt,neighbourhood_cleansed_Avully,neighbourhood_cleansed_Bardonnex,neighbourhood_cleansed_Bellevue,neighbourhood_cleansed_Bernex,neighbourhood_cleansed_Carouge,neighbourhood_cleansed_Cartigny,neighbourhood_cleansed_Chancy,neighbourhood_cleansed_Choulex,neighbourhood_cleansed_Chêne-Bougeries,neighbourhood_cleansed_Chêne-Bourg,neighbourhood_cleansed_Collex-Bossy,neighbourhood_cleansed_Collonge-Bellerive,neighbourhood_cleansed_Cologny,neighbourhood_cleansed_Commune de Genève,neighbourhood_cleansed_Confignon,neighbourhood_cleansed_Corsier,neighbourhood_cleansed_Céligny,neighbourhood_cleansed_Dardagny,neighbourhood_cleansed_Genthod,neighbourhood_cleansed_Grand-Saconnex,neighbourhood_cleansed_Hermance,neighbourhood_cleansed_Jussy,neighbourhood_cleansed_Laconnex,neighbourhood_cleansed_Lancy,neighbourhood_cleansed_Meinier,neighbourhood_cleansed_Meyrin,neighbourhood_cleansed_Onex,neighbourhood_cleansed_Plan-les-Ouates,neighbourhood_cleansed_Pregny-Chambésy,neighbourhood_cleansed_Presinge,neighbourhood_cleansed_Puplinge,neighbourhood_cleansed_Russin,neighbourhood_cleansed_Satigny,neighbourhood_cleansed_Soral,neighbourhood_cleansed_Thônex,neighbourhood_cleansed_Troinex,neighbourhood_cleansed_Vandoeuvres,neighbourhood_cleansed_Vernier,neighbourhood_cleansed_Versoix,neighbourhood_cleansed_Veyrier,host_response_time_a few days or more,host_response_time_within a day,host_response_time_within a few hours,host_response_time_within an hour
0,100.0,88.0,0,1,1,1,1,46.20198,6.15672,1,1.5,1.0,1.0,89.0,3,1125,3,3,1125,1125,3.0,1125.0,23,53,83,358,79,6,0,277,6,36,4.74,4.73,4.78,4.86,4.84,4.85,4.55,0,1,0,1,0,0.48,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,100.0,100.0,1,6,12,1,1,46.19964,6.1558,2,1.0,1.0,0.0,128.0,5,730,5,5,1125,1125,5.0,1125.0,20,40,70,78,92,8,0,78,7,80,4.91,4.98,4.89,4.97,4.99,4.96,4.81,1,2,2,0,0,0.61,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,100.0,100.0,1,2,2,1,1,46.28031,6.16833,3,1.0,1.0,4.0,70.0,7,360,7,7,1125,1125,7.0,1125.0,0,0,0,238,122,4,0,157,4,56,4.64,4.7,4.74,4.88,4.89,4.77,4.68,0,1,1,0,0,0.77,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1
3,100.0,90.0,1,2,3,1,1,46.18904,6.13908,2,1.0,1.0,1.0,55.0,2,1125,2,2,1125,1125,2.0,1125.0,4,5,22,223,58,8,0,142,7,48,4.85,4.81,4.88,4.95,4.9,4.71,4.71,0,2,0,2,0,0.38,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,100.0,92.0,0,2,2,1,1,46.1995,6.17308,2,1.5,1.0,1.0,100.0,2,14,2,2,1125,1125,2.0,1125.0,21,37,67,67,81,3,0,67,4,18,4.94,4.98,5.0,4.99,4.99,4.86,4.91,1,2,1,1,0,0.56,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [193]:
data_final.to_csv("geneva_airbnb_final.csv", index=False)