# Rent Price Prediction using Multiple Regression


In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

In [35]:
df = pd.read_csv('data/combined_geocoded_data.csv')
df.head()

Unnamed: 0,id,bathrooms,bedrooms,fee,has_photo,pets_allowed,price,square_feet,cityname,state,...,View,Washer Dryer,Wood Floors,no amenities available,address,neighborhood,county,postcode,place_importance,place_rank
0,5668640009,1.0,1.0,No,Thumbnail,Cats,2195.0,542,Redondo Beach,CA,...,0,0,0,1,"19715, Redbeam Avenue, Torrance, Los Angeles C...",,Los Angeles County,90503.0,6.3e-05,30
1,5668639818,1.5,3.0,No,Thumbnail,"Cats,Dogs",1250.0,1500,Newport News,VA,...,0,0,0,1,"City Center Boulevard, Mammoth Oak, Port Warwi...",Mammoth Oak,Newport News,23606.0,5.9e-05,30
2,5668639686,2.0,3.0,No,Thumbnail,,1395.0,1650,Raleigh,NC,...,0,0,0,1,"Crabtree Creek Trail, Green Acres, Raleigh, Wa...",Green Acres,Wake County,27608.0,0.04007,27
3,5668639659,1.0,2.0,No,Thumbnail,"Cats,Dogs",1600.0,820,Vacaville,CA,...,0,0,0,1,"Noodle or Rice, 1347, East Monte Vista Avenue,...",,Solano County,95688.0,5.5e-05,30
4,5668639374,1.0,1.0,No,Thumbnail,"Cats,Dogs",975.0,624,Albuquerque,NM,...,0,0,0,1,"1805, Richmond Drive Northeast, Netherwood Par...",Netherwood Park,Bernalillo County,87106.0,6.9e-05,30


In [36]:
df.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
id,91720.0,,,,5354721218.922176,183519623.695781,5121046168.0,5197939517.5,5502517074.0,5509001395.75,5669438542.0
bathrooms,91720.0,,,,1.398463,0.498396,1.0,1.0,1.0,2.0,3.5
bedrooms,91720.0,,,,1.654437,0.656452,0.0,1.0,2.0,2.0,3.0
fee,91720.0,2.0,No,91571.0,,,,,,,
has_photo,91720.0,3.0,Yes,51468.0,,,,,,,
pets_allowed,36577.0,4.0,"Cats,Dogs",34777.0,,,,,,,
price,91720.0,,,,1390.143066,522.249449,100.0,995.0,1305.0,1690.0,2966.0
square_feet,91720.0,,,,906.74481,253.074398,175.0,720.0,881.0,1089.0,1691.0
cityname,91720.0,2814.0,Dallas,2771.0,,,,,,,
state,91720.0,51.0,TX,10889.0,,,,,,,


In [37]:
# Select relevant features
features = ['bedrooms', 'bathrooms', 'square_feet'] + ['AC', 'Alarm', 'Basketball', 'Cable or Satellite', 'Clubhouse', 'Dishwasher', 'Doorman', 'Elevator', 'Fireplace', 'Garbage Disposal', 'Gated', 'Golf', 'Gym', 'Hot Tub', 'Internet Access', 'Luxury', 'Parking', 'Patio/Deck', 'Playground', 'Pool', 'Refrigerator', 'Storage', 'TV', 'Tennis', 'View', 'Washer Dryer', 'Wood Floors', 'no amenities available']
df_model = df.dropna(subset=features + ['price'])

X = df_model[features]
y = df_model['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('MAE:', mean_absolute_error(y_test, y_pred))
print('MSE:', mean_squared_error(y_test, y_pred))
print('MAPE', mean_absolute_percentage_error(y_test, y_pred) * 100 , '%')
print('R²:', r2_score(y_test, y_pred))

MAE: 376.48349841826604
MSE: 226332.76076077682
MAPE 30.73917644455627 %
R²: 0.16182202202902318
