# Economic Drivers of Airbnb Pricing in Los Angeles: An Examination of Crime, Demographics, and Geography

---

## Libraries

In [1]:
# Uncomment the line below to install the required packages
# !pip -q install pandas numpy matplotlib scipy shapely

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
from functions import *

---

## Data Loading and Cleaning

In [3]:
base_url = "http://data.insideairbnb.com/united-states/ca/los-angeles"
df = pd.read_csv(f"{base_url}/2023-12-03/visualisations/listings.csv")
df = merge_past_data(df)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 61654 entries, 0 to 42448
Data columns (total 19 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              61654 non-null  int64  
 1   name                            61654 non-null  object 
 2   host_id                         61654 non-null  int64  
 3   host_name                       61634 non-null  object 
 4   neighbourhood_group             61654 non-null  object 
 5   neighbourhood                   61654 non-null  object 
 6   latitude                        61654 non-null  float64
 7   longitude                       61654 non-null  float64
 8   room_type                       61654 non-null  object 
 9   price                           61654 non-null  int64  
 10  minimum_nights                  61654 non-null  int64  
 11  number_of_reviews               61654 non-null  int64  
 12  last_review                     

In [4]:
# Removing price anomalies
display(len(df[df.price < 30]))
df = df[df.price >= 30]

702

In [5]:
# Dropping columns
df.drop(["id", "name", "host_id", "host_name", "last_review",
         "reviews_per_month", "number_of_reviews_ltm",
         "calculated_host_listings_count"], axis=1, inplace=True)

In [6]:
# Creating new columns
df["log_price"] = np.log(df["price"])
df["has_reviews"] = np.where(df["number_of_reviews"] != 0, 1, 0)
df["has_license"] = np.where(df["license"].notna(), 1, 0)
df["is_short_term"] = np.where(df["minimum_nights"] < 30, 1, 0)
df["is_hol_hills_w"] = np.where(df["neighbourhood"] == "Hollywood Hills West", 1, 0)

# Creating coordinates
df["coordinates"] = list(zip(df.longitude, df.latitude))
df["coordinates"] = df["coordinates"].apply(Point)

# Renaming columns
df.rename(columns={"number_of_reviews": "n_reviews"}, inplace=True)