In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Make plots pretty
sns.set(style='whitegrid', palette='muted', font_scale=1.1)
df = pd.read_csv('cleaned_property_rentals.csv')
df.head()

Unnamed: 0,latitude,longitude,property_type,room_type,bathrooms,bedrooms,minimum_nights,price
0,37.76931,-122.43386,Apartment,Entire home/apt,1,1,1,170.0
1,37.75402,-122.45805,House,Private room,1,1,1,99.0
2,37.74511,-122.42102,Apartment,Entire home/apt,1,2,30,235.0
3,37.76669,-122.4525,Apartment,Private room,4,1,32,65.0
4,37.76487,-122.45183,Apartment,Private room,4,1,32,65.0


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [3]:
X = df.drop('price', axis=1)
y = df['price']


In [4]:
categorical_features = X.select_dtypes(include='object').columns.tolist()
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()


In [5]:
# Drop target from numerics if it snuck in
if 'price' in numerical_features:
    numerical_features.remove('price')


In [6]:
# OneHotEncoder for categorical
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)


In [7]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"R² Score: {r2}")
print(f"RMSE: {rmse}")


R² Score: -0.7000393851592674
RMSE: 398.8517044311588


In [10]:
sample = pd.DataFrame([{
    'latitude': 17.42,
    'longitude': 78.45,
    'property_type': 'Apartment',
    'room_type': 'Entire home/apt',
    'bathrooms': 2,
    'bedrooms': 2,
    'minimum_nights': 5
}])

predicted_rent = model.predict(sample)
print(f"Predicted Rent: ${predicted_rent[0]:.2f}")


Predicted Rent: $459.36
