# Predicting the rent using Neural Networks

In this project, you will use PyTorch to train a neural network to predict the rent of apartments based on different types of features.

In [1]:
import pandas as pd
import numpy as np

# Task 1: Import and clean the data

In [2]:
data_street = pd.read_csv('streeteasy.csv')
data_street.head()

Unnamed: 0,rental_id,building_id,rent,bedrooms,bathrooms,size_sqft,min_to_subway,floor,building_age_yrs,no_fee,has_roofdeck,has_washer_dryer,has_doorman,has_elevator,has_dishwasher,has_patio,has_gym,neighborhood,submarket,borough
0,1545,44518357,2550,0.0,1,480,9,2.0,17,1,1,0,0,1,1,0,1,Upper East Side,All Upper East Side,Manhattan
1,2472,94441623,11500,2.0,2,2000,4,1.0,96,0,0,0,0,0,0,0,0,Greenwich Village,All Downtown,Manhattan
2,10234,87632265,3000,3.0,1,1000,4,1.0,106,0,0,0,0,0,0,0,0,Astoria,Northwest Queens,Queens
3,2919,76909719,4500,1.0,1,916,2,51.0,29,0,1,0,1,1,1,0,0,Midtown,All Midtown,Manhattan
4,2790,92953520,4795,1.0,1,975,3,8.0,31,0,0,0,1,1,1,0,1,Greenwich Village,All Downtown,Manhattan


In [3]:
data_street.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   rental_id         5000 non-null   int64  
 1   building_id       5000 non-null   int64  
 2   rent              5000 non-null   int64  
 3   bedrooms          5000 non-null   float64
 4   bathrooms         5000 non-null   int64  
 5   size_sqft         5000 non-null   int64  
 6   min_to_subway     5000 non-null   int64  
 7   floor             5000 non-null   float64
 8   building_age_yrs  5000 non-null   int64  
 9   no_fee            5000 non-null   int64  
 10  has_roofdeck      5000 non-null   int64  
 11  has_washer_dryer  5000 non-null   int64  
 12  has_doorman       5000 non-null   int64  
 13  has_elevator      5000 non-null   int64  
 14  has_dishwasher    5000 non-null   int64  
 15  has_patio         5000 non-null   int64  
 16  has_gym           5000 non-null   int64  


#### Notice we have non-numbers features such as neighborhood, submarket and borough. So we need to clean the data and convert every interger into a floating object

In [4]:
data_street.drop(['rental_id', 'building_id','neighborhood', 'submarket', 'borough'], axis=1, inplace=True)
data_street.head()

Unnamed: 0,rent,bedrooms,bathrooms,size_sqft,min_to_subway,floor,building_age_yrs,no_fee,has_roofdeck,has_washer_dryer,has_doorman,has_elevator,has_dishwasher,has_patio,has_gym
0,2550,0.0,1,480,9,2.0,17,1,1,0,0,1,1,0,1
1,11500,2.0,2,2000,4,1.0,96,0,0,0,0,0,0,0,0
2,3000,3.0,1,1000,4,1.0,106,0,0,0,0,0,0,0,0
3,4500,1.0,1,916,2,51.0,29,0,1,0,1,1,1,0,0
4,4795,1.0,1,975,3,8.0,31,0,0,0,1,1,1,0,1


In [5]:
for j in data_street.columns:
    data_street[j] = data_street[j].astype(float)
    
data_street.head()

Unnamed: 0,rent,bedrooms,bathrooms,size_sqft,min_to_subway,floor,building_age_yrs,no_fee,has_roofdeck,has_washer_dryer,has_doorman,has_elevator,has_dishwasher,has_patio,has_gym
0,2550.0,0.0,1.0,480.0,9.0,2.0,17.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
1,11500.0,2.0,2.0,2000.0,4.0,1.0,96.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3000.0,3.0,1.0,1000.0,4.0,1.0,106.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4500.0,1.0,1.0,916.0,2.0,51.0,29.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0
4,4795.0,1.0,1.0,975.0,3.0,8.0,31.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0


In [6]:
data_street.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   rent              5000 non-null   float64
 1   bedrooms          5000 non-null   float64
 2   bathrooms         5000 non-null   float64
 3   size_sqft         5000 non-null   float64
 4   min_to_subway     5000 non-null   float64
 5   floor             5000 non-null   float64
 6   building_age_yrs  5000 non-null   float64
 7   no_fee            5000 non-null   float64
 8   has_roofdeck      5000 non-null   float64
 9   has_washer_dryer  5000 non-null   float64
 10  has_doorman       5000 non-null   float64
 11  has_elevator      5000 non-null   float64
 12  has_dishwasher    5000 non-null   float64
 13  has_patio         5000 non-null   float64
 14  has_gym           5000 non-null   float64
dtypes: float64(15)
memory usage: 586.1 KB


# Task 2: Selecting features

In [7]:
import torch
from torch import nn
from torch import optim

numerical_features = data_street.drop(['rent'], axis=1).columns
X = data_street[numerical_features]

y = data_street['rent']

# Task 3: Train-Test-Split

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size = 0.8, test_size = 0.2, random_state = 2)

In [9]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float).view(-1,1)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float).view(-1,1)

# Task 4: Design the neural network, select the loss function and the optimizer

In [10]:
model = nn.Sequential(
    nn.Linear(14, 26),
    nn.ReLU(),
    nn.Linear(26,13),
    nn.ReLU(),
    nn.Linear(13,1)
)

loss = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), lr = 0.0007)

# Task 5: Training loop

In [11]:
num_epochs = 5000
for epochs in range(num_epochs):
    predictions = model(X_train_tensor)
    MSE = loss(predictions, y_train_tensor)
    MSE.backward()
    optimizer.step()
    optimizer.zero_grad()
    if (epochs + 1) % 500 == 0:
        print(f'Epoch [{epochs + 1}/{num_epochs}], MSE Loss: {MSE.item()}')


Epoch [500/5000], MSE Loss: 3114045.25
Epoch [1000/5000], MSE Loss: 2985033.75
Epoch [1500/5000], MSE Loss: 2875991.5
Epoch [2000/5000], MSE Loss: 2792925.75
Epoch [2500/5000], MSE Loss: 2736123.5
Epoch [3000/5000], MSE Loss: 2700254.75
Epoch [3500/5000], MSE Loss: 2671397.75
Epoch [4000/5000], MSE Loss: 2638499.25
Epoch [4500/5000], MSE Loss: 2614407.75
Epoch [5000/5000], MSE Loss: 2591374.5


# Task 6: Save and evaluate

In [12]:
torch.save(model, 'model.pth') 

model.eval() 
with torch.no_grad(): 
    predictions = model(X_test_tensor) 
    test_loss = loss(predictions, y_test_tensor) 
    
print('Neural Network - Test Set MSE:', test_loss.item()) 

Neural Network - Test Set MSE: 2353260.75
