In [29]:
import pandas as pd

In [30]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)
pd.set_option('display.max_colwidth', 100)
pd.options.mode.chained_assignment = None  # default='warn'

## Read file

In [31]:
listing_file = pd.read_csv('listings.csv', encoding='iso-8859-1', low_memory=False)

## Delete useless columns

In [56]:
cols = [c for c in listing_file.columns if c.lower()[-3:] != 'url']
listing = listing_file[cols].drop(
    columns=[
        'name', 'host_name', 'scrape_id', 'market', 'host_location', 'smart_location', 
        'country_code', 'country', 'city', 'state', 'neighbourhood_cleansed', 'notes',
        'neighbourhood_group_cleansed', 'neighbourhood', 'transit', 'calendar_last_scraped',
        'calendar_updated'
    ]
)

listing.shape

(24194, 73)

## Replace type with numbers

In [33]:
def make_dict(table, col_name: str) -> dict:
    a_dict = {}
    for b in range(len(table)):
        a_dict.update({table[col_name][b]:table.Num_Represent[b]})
        
    return a_dict

In [34]:
def replacing(target_col, target_dict):
    for i in range(len(target_col)):
        target_col[i] = target_dict[target_col[i]]

In [35]:
def make_table(col_name: str) -> pd.core.frame.DataFrame:
    grouping = listing.groupby([col_name])
    grouping_list = grouping.size().reset_index(name='counts')
    gp_col = grouping_list.sort_values(['counts'], ascending=False)
    
    replace_num = [i + 1 for i in range(len(grouping_list))]
    replace_num.sort(reverse=True)
    gp_col['Num_Represent'] = replace_num
    gp_col
    return gp_col

### Replace bed type

In [36]:
bed_gp_table = make_table('bed_type')
bed_gp_table

Unnamed: 0,bed_type,counts,Num_Represent
4,Real Bed,24051,5
2,Futon,69,4
3,Pull-out Sofa,54,3
0,Airbed,11,2
1,Couch,9,1


In [37]:
bed_dict = make_dict(bed_gp_table, 'bed_type')
bed_dict

{'Airbed': 2, 'Couch': 1, 'Futon': 4, 'Pull-out Sofa': 3, 'Real Bed': 5}

In [38]:
replacing(listing['bed_type'], bed_dict)

In [39]:
listing['bed_type']

0        5
1        4
2        5
3        5
4        5
        ..
24189    5
24190    5
24191    5
24192    5
24193    5
Name: bed_type, Length: 24194, dtype: object

### Replace room type

In [40]:
room_gp_table = make_table('room_type')
room_gp_table

Unnamed: 0,room_type,counts,Num_Represent
0,Entire home/apt,14868,4
2,Private room,8454,3
1,Hotel room,436,2
3,Shared room,436,1


In [41]:
room_dict = make_dict(room_gp_table, 'room_type')
room_dict

{'Entire home/apt': 4, 'Hotel room': 2, 'Private room': 3, 'Shared room': 1}

In [42]:
replacing(listing['room_type'], room_dict)

In [43]:
listing['room_type']

0        3
1        3
2        4
3        3
4        3
        ..
24189    1
24190    1
24191    1
24192    3
24193    4
Name: room_type, Length: 24194, dtype: object

### Replace property type

In [44]:
property_gp_table = make_table('property_type')
property_gp_table

Unnamed: 0,property_type,counts,Num_Represent
1,Apartment,13049,37
22,House,6352,36
33,Townhouse,1530,35
13,Condominium,559,34
30,Serviced apartment,516,33
36,Villa,381,32
19,Guesthouse,323,31
18,Guest suite,305,30
14,Cottage,214,29
3,Bed and breakfast,199,28


In [45]:
property_dict = make_dict(property_gp_table, 'property_type')
property_dict

{'Aparthotel': 17,
 'Apartment': 37,
 'Barn': 9,
 'Bed and breakfast': 28,
 'Boat': 13,
 'Boutique hotel': 25,
 'Bungalow': 27,
 'Cabin': 21,
 'Camper/RV': 18,
 'Campsite': 14,
 'Casa particular (Cuba)': 2,
 'Castle': 6,
 'Chalet': 8,
 'Condominium': 34,
 'Cottage': 29,
 'Dome house': 10,
 'Earth house': 16,
 'Farm stay': 24,
 'Guest suite': 30,
 'Guesthouse': 31,
 'Hostel': 23,
 'Hotel': 19,
 'House': 36,
 'Houseboat': 5,
 'Hut': 11,
 'Kezhan (China)': 4,
 'Loft': 26,
 'Minsu (Taiwan)': 3,
 'Nature lodge': 12,
 'Other': 22,
 'Serviced apartment': 33,
 'Tent': 15,
 'Tiny house': 20,
 'Townhouse': 35,
 'Train': 1,
 'Treehouse': 7,
 'Villa': 32}

In [46]:
replacing(listing['property_type'], property_dict)

In [47]:
listing['property_type']

0        36
1        37
2        37
3        36
4        36
         ..
24189    36
24190    36
24191    36
24192    35
24193    37
Name: property_type, Length: 24194, dtype: object

## Convert string

### Delete '$'

In [53]:
for c in listing:
    try: 
        listing[c] = listing[c].str.replace('$', '')
    except AttributeError:
        pass

### Convert f to 0 and t to 1

In [54]:
amount = len(listing)
for tf in listing:
    if 'is' in tf:
        for i in range(amount):
            if listing[tf][i] == 'f':
                listing[tf][i] = 0
            elif listing[tf][i] == 't':
                listing[tf][i] = 1
            else:
                listing[tf][i] = 0


In [55]:
listing

Unnamed: 0,id,last_scraped,experiences_offered,host_id,host_since,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,zipcode,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,9835,2019/09/15,none,33057,2009/08/21,0,0,0.0,0,,0.0,0.0,"['email', 'phone', 'reviews']",f,f,"Bulleen, VIC, Australia",3105,-37.77268,145.09213,0,36,3,2,1.0,1.0,2.0,5,"{Internet,Wifi,""Pets live on this property"",Dog(s)}",,60.00,,,,,1,22.00,1,365,1,1,365,365,1.0,365.0,26 months ago,t,30,60,90,365,2019/09/15,4,0,2011/05/24,2015/09/12,90.0,9.0,10.0,10.0,10.0,9.0,9.0,f,,0.0,f,0,strict_14_with_grace_period,f,f,0,0,0,0,0.04
1,10803,2019/09/15,none,38901,2009/09/16,within an hour,100%,0.0,0,Brunswick,0.0,0.0,"['email', 'phone', 'reviews', 'jumio', 'government_id']",t,t,"Brunswick East, VIC, Australia",3057,-37.76651,144.98074,0,37,3,2,1.0,1.0,1.0,4,"{TV,Internet,Wifi,""Air conditioning"",Kitchen,Heating,""Family/kid friendly"",Washer,""Smoke detecto...",226.0,35.00,200.00,803.00,0.00,11.00,1,15.00,3,30,3,3,30,30,3.0,30.0,2 months ago,t,12,19,28,204,2019/09/15,129,30,2013/01/12,2019/08/05,89.0,10.0,9.0,10.0,9.0,9.0,9.0,f,,0.0,t,0,strict_14_with_grace_period,t,t,0,0,0,0,1.59
2,12936,2019/09/14,none,50121,2009/10/31,within an hour,100%,0.0,0,St Kilda,0.0,0.0,"['email', 'phone', 'google', 'reviews', 'jumio', 'government_id', 'work_email']",t,t,"St Kilda, VIC, Australia",3182,-37.85976,144.97737,0,37,4,2,1.0,1.0,1.0,5,"{TV,Internet,Wifi,""Air conditioning"",Kitchen,""Free parking on premises"",Elevator,""Buzzer/wireles...",0.0,159.00,1253.00,4452.00,450.00,100.00,2,0.00,3,27,3,3,27,27,3.0,27.0,4 days ago,t,15,23,48,49,2019/09/14,30,17,2010/08/04,2019/07/17,92.0,9.0,9.0,10.0,10.0,9.0,9.0,f,,0.0,f,0,strict_14_with_grace_period,f,f,0,0,0,0,0.27
3,15246,2019/09/14,none,59786,2009/12/03,0,0,0.0,0,Thornbury,0.0,0.0,"['email', 'phone', 'facebook', 'reviews', 'jumio', 'offline_government_id', 'government_id', 'wo...",t,t,"Thornbury, VIC, Australia",3071,-37.75897,144.98923,0,36,3,1,1.5,1.0,1.0,5,"{TV,Internet,Wifi,Kitchen,""Free parking on premises"",Breakfast,Heating,Washer,""Smoke detector"",""...",0.0,49.00,250.00,920.00,200.00,15.00,1,20.00,2,200,2,2,200,200,2.0,200.0,20 months ago,t,0,0,0,0,2019/09/14,29,0,2010/12/31,2017/05/15,94.0,9.0,9.0,10.0,10.0,9.0,9.0,f,,0.0,f,0,moderate,f,f,0,0,0,0,0.27
4,16760,2019/09/14,none,65090,2009/12/22,within a few hours,100%,0.0,0,St Kilda East,0.0,0.0,"['email', 'phone', 'facebook', 'reviews', 'jumio', 'offline_government_id', 'government_id']",t,t,"St Kilda East, VIC, Australia",3183,-37.86453,144.99224,0,36,3,2,1.0,1.0,1.0,5,"{Internet,Wifi,Heating,Washer,""Smoke detector"",""Carbon monoxide detector"",Essentials,Shampoo,Han...",,68.00,400.00,1900.00,,15.00,1,20.00,1,90,1,1,90,90,1.0,90.0,6 weeks ago,t,23,53,83,302,2019/09/14,75,15,2011/12/10,2019/05/29,90.0,9.0,10.0,10.0,10.0,10.0,9.0,f,,0.0,f,0,moderate,f,f,0,0,0,0,0.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24189,38610335,2019/09/14,none,285240939,2019/08/13,within a few hours,100%,0.0,0,Yarraville/Seddon,0.0,0.0,"['email', 'phone', 'jumio', 'offline_government_id', 'selfie', 'government_id', 'identity_manual']",t,f,"Yarraville, VIC, Australia",3013,-37.81227,144.89830,0,36,1,1,1.5,1.0,1.0,5,"{TV,Wifi,Kitchen,""Free parking on premises"",Heating,""Suitable for events"",Washer,Dryer,""Smoke de...",,35.00,,,,,1,0.00,5,1125,5,5,1125,1125,5.0,1125.0,today,t,30,60,90,365,2019/09/14,0,0,,,,,,,,,,f,,0.0,f,0,flexible,f,f,0,0,0,0,
24190,38610345,2019/09/14,none,160772288,2017/11/30,within a few hours,96%,0.0,0,Footscray,0.0,0.0,"['email', 'phone']",t,f,"Flemington, VIC, Australia",3031,-37.78645,144.93335,0,36,1,4,2.0,1.0,1.0,5,"{TV,Wifi,Kitchen,""Free parking on premises"",""Smoking allowed"",Heating,Washer,""Smoke detector"",""F...",,25.00,,,200.00,25.00,1,24.00,14,1125,14,14,1125,1125,14.0,1125.0,today,t,30,60,90,365,2019/09/14,0,0,,,,,,,,,,f,,0.0,f,0,flexible,f,f,0,0,0,0,
24191,38610653,2019/09/14,none,285240939,2019/08/13,within a few hours,100%,0.0,0,Yarraville/Seddon,0.0,0.0,"['email', 'phone', 'jumio', 'offline_government_id', 'selfie', 'government_id', 'identity_manual']",t,f,"Yarraville, VIC, Australia",3013,-37.81227,144.89830,0,36,1,1,1.5,1.0,1.0,5,"{TV,Wifi,Kitchen,""Free parking on premises"",Heating,""Suitable for events"",Washer,Dryer,""Smoke de...",,35.00,,,,,1,0.00,5,1125,5,5,1125,1125,5.0,1125.0,today,t,26,56,86,361,2019/09/14,0,0,,,,,,,,,,f,,0.0,f,0,flexible,f,f,0,0,0,0,
24192,38612012,2019/09/15,none,50268144,2015/12/01,0,0,0.0,0,,0.0,0.0,"['email', 'phone', 'facebook', 'reviews', 'work_email']",t,f,"Richmond, VIC, Australia",3121,-37.81287,145.00256,0,35,3,1,1.0,1.0,1.0,5,"{TV,Wifi,Kitchen,""Free parking on premises"",""Free street parking"",Heating,Washer,""Smoke detector...",,65.00,,,200.00,70.00,1,0.00,2,1125,2,2,1125,1125,2.0,1125.0,today,t,28,58,88,116,2019/09/15,0,0,,,,,,,,,,f,,0.0,f,0,flexible,f,f,0,0,0,0,
