In [1]:
# Import Dependencies
import pandas as pd
import json

### **Import and Display CSV**

In [2]:
# Filepath
csv_filepath =  "../../data/processed/cleaned_listings.csv"

# Read CSV
listings_csv = pd.read_csv(csv_filepath)

listings_csv    

Unnamed: 0,id,latitude,longitude,downtown_distance,downtown_zone,neighbourhood,neighbourhood_freq,luxury_neighborhood_flag,room_type,property_type,...,bathrooms_count,amenities,min_stay,allow_long_term,price,number_of_reviews_ly,host_is_superhost,host_listings_count,estimated_occupancy_l365d,estimated_revenue_l365d
0,696407278180533419,43.670920,-79.395190,3.215592,Downtown,Annex,288,0,Entire home/apt,condo,...,1.0,"[""Hair dryer"", ""Central heating"", ""Paid parkin...",Short-term,0,450.0,2,0,7,56,25200.0
1,696457318817239920,43.655704,-79.399910,1.784751,City Center,Kensington-Chinatown,219,0,Private room,rental unit,...,1.0,"[""Hair dryer"", ""Central heating"", ""Shampoo"", ""...",Short-term,0,78.0,2,1,8,112,8736.0
2,696602542310304703,43.648778,-79.401183,1.325140,City Center,Kensington-Chinatown,219,0,Entire home/apt,rental unit,...,1.0,"[""Hair dryer"", ""Room-darkening shades"", ""Keypa...",Short-term,0,132.0,4,1,1,224,29568.0
3,696973520016945803,43.612110,-79.539850,12.753377,Outer City,Alderwood,37,0,Entire home/apt,rental unit,...,1.0,"[""Hair dryer"", ""Heating"", ""Shampoo"", ""Body soa...",Short-term,0,81.0,12,0,1,60,4860.0
4,697004718610327555,43.735735,-79.480703,12.802003,Outer City,Downsview-Roding-CFB,80,0,Entire home/apt,townhouse,...,2.5,"[""Room-darkening shades"", ""Keypad"", ""Elevator""...",Short-term,0,236.0,15,0,1,90,21240.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9609,1355385238383046824,43.645749,-79.393083,0.595242,City Center,Waterfront Communities-The Island,1730,1,Entire home/apt,rental unit,...,1.0,"[""Hair dryer"", ""Elevator"", ""Central heating"", ...",Short-term,0,170.0,0,1,1,24,4080.0
9610,1362331480938692703,43.649540,-79.388640,0.781578,City Center,Waterfront Communities-The Island,1730,1,Entire home/apt,home,...,1.0,"[""Hair dryer"", ""Cleaning available during stay...",Short-term,0,145.0,0,0,1,6,870.0
9611,1363197896119450651,43.653270,-79.396790,1.419694,City Center,Kensington-Chinatown,219,0,Entire home/apt,home,...,1.0,"[""Hair dryer"", ""Keypad"", ""Mini fridge"", ""Heati...",Short-term,0,114.0,0,1,5,6,684.0
9612,1364280421231205552,43.705119,-79.272289,11.558048,Outer City,Clairlea-Birchmount,51,0,Private room,home,...,2.0,"[""Hair dryer"", ""Room-darkening shades"", ""Firep...",Short-term,1,55.0,0,0,2,6,330.0


### **Calculate Lookup Values**

In [3]:
# Filter by neighbourhood, property_type, room_type 
counts = listings_csv.groupby(['neighbourhood', 'property_type', 'room_type']).size().reset_index(name='count')

# Merge back into database
listing_base_categories = listings_csv.merge(counts,on=['neighbourhood', 'property_type', 'room_type'])

# Limit to those with counts over 10
filtered_base_categories = listing_base_categories[listing_base_categories['count']>=10]

In [4]:
# Calculate lower baseline values
neighbourhood_min_baseline = filtered_base_categories.groupby(['neighbourhood', 'room_type', 'property_type'])['price'].quantile(0.10).reset_index()

global_min_baseline = listing_base_categories.groupby(['room_type', 'property_type'])['price'].quantile(0.10).reset_index()

In [5]:
# Define output Dictionary
lookup_dict = {}

# Build Neighbourhood Database
for index, row in neighbourhood_min_baseline.iterrows():
    neighbourhood = row['neighbourhood']
    key = f"{row['room_type']}|{row['property_type']}"
    adjusted_price = round(row['price'] * 0.9, 2)          # setting baseline to 10% less than lowest 10 quartile as to not affect the prediction Model
    
    if neighbourhood not in lookup_dict:
        lookup_dict[neighbourhood] = {}
    lookup_dict[neighbourhood][key] = adjusted_price


# Fill missing counts
for _, row in global_min_baseline.iterrows():
    global_key = f"{row['room_type']}|{row['property_type']}"
    adjusted_price = round(row['price'] * 0.9, 2)
    
    for neighbourhood in listing_base_categories['neighbourhood'].unique():
        if neighbourhood not in lookup_dict:
            lookup_dict[neighbourhood] = {}
        
        if global_key not in lookup_dict[neighbourhood]:
            lookup_dict[neighbourhood][global_key] = adjusted_price

### **Check and Verify price lookup Dict**

In [6]:
lookup_dict

{'Agincourt North': {'Private room|home': 35.1,
  'Entire home/apt|aparthotel': 240.3,
  'Entire home/apt|bungalow': 76.5,
  'Entire home/apt|cave': 55.8,
  'Entire home/apt|condo': 82.8,
  'Entire home/apt|cottage': 236.88,
  'Entire home/apt|farm stay': 54.0,
  'Entire home/apt|guest suite': 63.0,
  'Entire home/apt|guesthouse': 72.0,
  'Entire home/apt|home': 73.8,
  'Entire home/apt|island': 178.2,
  'Entire home/apt|loft': 97.29,
  'Entire home/apt|place': 102.96,
  'Entire home/apt|rental unit': 70.2,
  'Entire home/apt|serviced apartment': 85.86,
  'Entire home/apt|shipping container': 409.5,
  'Entire home/apt|tiny home': 43.2,
  'Entire home/apt|townhouse': 95.76,
  'Entire home/apt|vacation home': 92.52,
  'Entire home/apt|villa': 408.42,
  'Private room|barn': 70.2,
  'Private room|bed and breakfast': 50.58,
  'Private room|boutique hotel': 148.14,
  'Private room|bungalow': 37.8,
  'Private room|casa particular': 32.22,
  'Private room|condo': 45.9,
  'Private room|cottage'

In [7]:
# Save the Price Lookup JSON
with open('../../results/baseline_pricing.json', 'w') as f:
    json.dump(lookup_dict, f, indent=2)