## Example of user_input

In [1]:
user_input = {
    "borough": "Manhattan",
    "bedrooms": 2,
    "bathrooms": 1,
    "size_sqft": 1000,
    "min_to_subway": 5,
    "building_age_yrs": 30,    
    "floor": 5,
    "no_fee": 0,
    "has_roofdeck": 0,
    "has_patio": 0,
    "has_gym": 0,
    "has_washer_dryer": 1,
    "has_doorman": 0,
    "has_elevator": 1,
    "has_dishwasher": 0
}

## import dependencies

In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.4.0'

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

## Import Source Data

In [5]:
df=pd.read_csv('../input_data/streeteasy.csv')
df.shape

(5000, 20)

In [6]:
df.head(3)

Unnamed: 0,rental_id,building_id,rent,bedrooms,bathrooms,size_sqft,min_to_subway,floor,building_age_yrs,no_fee,has_roofdeck,has_washer_dryer,has_doorman,has_elevator,has_dishwasher,has_patio,has_gym,neighborhood,submarket,borough
0,1545,44518357,2550,0.0,1,480,9,2.0,17,1,1,0,0,1,1,0,1,Upper East Side,All Upper East Side,Manhattan
1,2472,94441623,11500,2.0,2,2000,4,1.0,96,0,0,0,0,0,0,0,0,Greenwich Village,All Downtown,Manhattan
2,10234,87632265,3000,3.0,1,1000,4,1.0,106,0,0,0,0,0,0,0,0,Astoria,Northwest Queens,Queens


## "Dummify" Source Data

In [8]:
# Use Pandas get_dummies to convert categorical data

trimmed_df = df.drop(columns=["rental_id","building_id"])
### BEGIN SOLUTION
dummied_df = pd.get_dummies(df)
dummied_df.head(3)
### END SOLUTION

Unnamed: 0,rental_id,building_id,rent,bedrooms,bathrooms,size_sqft,min_to_subway,floor,building_age_yrs,no_fee,...,submarket_Northeast Queens,submarket_Northwest Brooklyn,submarket_Northwest Queens,submarket_Prospect Park,submarket_South Brooklyn,submarket_South Queens,submarket_The Rockaways,borough_Brooklyn,borough_Manhattan,borough_Queens
0,1545,44518357,2550,0.0,1,480,9,2.0,17,1,...,0,0,0,0,0,0,0,0,1,0
1,2472,94441623,11500,2.0,2,2000,4,1.0,96,0,...,0,0,0,0,0,0,0,0,1,0
2,10234,87632265,3000,3.0,1,1000,4,1.0,106,0,...,0,0,1,0,0,0,0,0,0,1


## Dummify User Input

In [9]:
new_input_dict = {}

for feature in user_input:
    if feature == "borough":
        boroughs = sorted(list(df["borough"].unique()))
        for borough in boroughs:
            if user_input["borough"] == borough:
                new_input_dict[f"borough_{borough}"] = 1
            else:
                new_input_dict[f"borough_{borough}"] = 0
    elif feature == "neighborhood":
        neighborhoods = sorted(list(df["neighborhood"].unique()))
        for neighborhood in neighborhoods:
            if user_input["neighborhood"] == neighborhood:
                new_input_dict[f"neighborhood_{neighborhood}"] = 1
            else:
                new_input_dict[f"neighborhood_{neighborhood}"] = 0
    elif feature == "submarket":
        submarkets = sorted(list(df["submarket"].unique()))
        for submarket in submarkets:
            if user_input["submarket"] == submarket:
                new_input_dict[f"submarket_{submarket}"] = 1
            else:
                new_input_dict[f"submarket_{submarket}"] = 0
        
    else:
        new_input_dict[feature] = user_input[feature]

new_input_dict    

{'borough_Brooklyn': 0,
 'borough_Manhattan': 1,
 'borough_Queens': 0,
 'bedrooms': 2,
 'bathrooms': 1,
 'size_sqft': 1000,
 'min_to_subway': 5,
 'building_age_yrs': 30,
 'floor': 5,
 'no_fee': 0,
 'has_roofdeck': 0,
 'has_patio': 0,
 'has_gym': 0,
 'has_washer_dryer': 1,
 'has_doorman': 0,
 'has_elevator': 1,
 'has_dishwasher': 0}

## Select Features

In [11]:
selected_features = list(new_input_dict.keys())
selection_df = dummied_df[selected_features]
selection_df.head(3)

Unnamed: 0,borough_Brooklyn,borough_Manhattan,borough_Queens,bedrooms,bathrooms,size_sqft,min_to_subway,building_age_yrs,floor,no_fee,has_roofdeck,has_patio,has_gym,has_washer_dryer,has_doorman,has_elevator,has_dishwasher
0,0,1,0,0.0,1,480,9,17,2.0,1,1,0,1,0,0,1,1
1,0,1,0,2.0,2,2000,4,96,1.0,0,0,0,0,0,0,0,0
2,0,0,1,3.0,1,1000,4,106,1.0,0,0,0,0,0,0,0,0


In [12]:
# Assign X (data) and y (target)

### BEGIN SOLUTION
X = selection_df
y = df["rent"].values.reshape(-1, 1)
print(X.shape, y.shape)
### END SOLUTION

(5000, 17) (5000, 1)


## Train/Test Split Data

In [13]:
# Split the data into training and testing

### BEGIN SOLUTION
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
### END SOLUTION

In [14]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(4000, 17)
(1000, 17)
(4000, 1)
(1000, 1)


## Scale Data

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [14]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data

### BEGIN SOLUTION
X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)
### END SOLUTION

In [15]:
# Transform the training and testing data using the X_scaler and y_scaler models

### BEGIN SOLUTION
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)
### END SOLUTION

In [16]:
print(X_train_scaled.shape)
print(X_test_scaled.shape)
print(y_train_scaled.shape)
print(y_test_scaled.shape)

(4000, 17)
(1000, 17)
(4000, 1)
(1000, 1)


# Model Variables

In [17]:
all_models = {"models":[],
             "mse":[],
             "r2":[]
             }

# LinearRegression 

In [18]:
# train_scaled
from sklearn.linear_model import LinearRegression 
lm_model = LinearRegression()
lm_model.fit(X_train_scaled, y_train_scaled)
lm_predictions = lm_model.predict(X_train_scaled)

In [19]:
#pred_train_scaled= model.predict(X_train_scaled)
from sklearn.metrics import mean_squared_error
print(np.sqrt(mean_squared_error(y_train_scaled,lm_predictions)))

lm_pred= lm_model.predict(X_test_scaled)
print(np.sqrt(mean_squared_error(y_test_scaled,lm_pred))) 

0.48021902868070415
0.4843363415274868


In [20]:
from sklearn.metrics import mean_squared_error

MSE = mean_squared_error(y_train_scaled, lm_predictions)
r2 = lm_model.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.23061031550703898, R2: 0.7804376129722346


In [21]:
all_models["models"].append(lm_model)
all_models["mse"].append(MSE)
all_models["r2"].append(r2)

# Lasso

In [22]:
# LASSO model
# Note: Use an alpha of .01 when creating the model for this activity
from sklearn.linear_model import Lasso

### BEGIN SOLUTION
lasso = Lasso(alpha=.01).fit(X_train_scaled, y_train_scaled)

lasso_predictions = lasso.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, lasso_predictions)
r2 = lasso.score(X_test_scaled, y_test_scaled)
### END SOLUTION
print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.23624135377250588, R2: 0.7788842122856748


In [23]:
all_models["models"].append(lasso)
all_models["mse"].append(MSE)
all_models["r2"].append(r2)

# Ridge

In [24]:
# Ridge model
# Note: Use an alpha of .01 when creating the model for this activity
from sklearn.linear_model import Ridge

### BEGIN SOLUTION
ridge = Ridge(alpha=.01).fit(X_train_scaled, y_train_scaled)

ridge_predictions = ridge.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, ridge_predictions)
r2 = ridge.score(X_test_scaled, y_test_scaled)
### END SOLUTION

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.23465920726355552, R2: 0.7803650604353299


In [25]:
all_models["models"].append(ridge)
all_models["mse"].append(MSE)
all_models["r2"].append(r2)

# ElasticNet model

In [26]:
# ElasticNet model
# Note: Use an alpha of .01 when creating the model for this activity
from sklearn.linear_model import ElasticNet

### BEGIN SOLUTION
elasticnet = ElasticNet(alpha=.01).fit(X_train_scaled, y_train_scaled)

el_predictions = elasticnet.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, el_predictions)
r2 = elasticnet.score(X_test_scaled, y_test_scaled)
### END SOLUTION

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.23521413555947124, R2: 0.7798456619247942


In [27]:
all_models["models"].append(elasticnet)
all_models["mse"].append(MSE)
all_models["r2"].append(r2)

# XGBoost Regressor

In [28]:
from xgboost import XGBRegressor
XGBModel = XGBRegressor()
XGBModel.fit(X_train_scaled, y_train_scaled , verbose=False)

# Get the mean absolute error on the validation data :
XGBpredictions = XGBModel.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, XGBpredictions)
r2 = XGBModel.score(X_test_scaled, y_test_scaled)
### END SOLUTION

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.19080492190149234, R2: 0.8214115355660948


In [29]:
all_models["models"].append(XGBModel)
all_models["mse"].append(MSE)
all_models["r2"].append(r2)

# Neural Network

In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [31]:
# Define model
model = Sequential()
model.add(Dense(500, input_dim=len(new_input_dict.values()), activation= "relu"))
model.add(Dense(100, activation= "relu"))
model.add(Dense(50, activation= "relu"))
model.add(Dense(1))

In [32]:
# Compile and fit the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])   

In [33]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_scaled,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
125/125 - 0s - loss: 0.2590 - mean_squared_error: 0.2590
Epoch 2/100
125/125 - 0s - loss: 0.2003 - mean_squared_error: 0.2003
Epoch 3/100
125/125 - 0s - loss: 0.1907 - mean_squared_error: 0.1907
Epoch 4/100
125/125 - 0s - loss: 0.1822 - mean_squared_error: 0.1822
Epoch 5/100
125/125 - 0s - loss: 0.1659 - mean_squared_error: 0.1659
Epoch 6/100
125/125 - 0s - loss: 0.1640 - mean_squared_error: 0.1640
Epoch 7/100
125/125 - 0s - loss: 0.1574 - mean_squared_error: 0.1574
Epoch 8/100
125/125 - 0s - loss: 0.1611 - mean_squared_error: 0.1611
Epoch 9/100
125/125 - 0s - loss: 0.1483 - mean_squared_error: 0.1483
Epoch 10/100
125/125 - 0s - loss: 0.1421 - mean_squared_error: 0.1421
Epoch 11/100
125/125 - 0s - loss: 0.1429 - mean_squared_error: 0.1429
Epoch 12/100
125/125 - 0s - loss: 0.1387 - mean_squared_error: 0.1387
Epoch 13/100
125/125 - 0s - loss: 0.1354 - mean_squared_error: 0.1354
Epoch 14/100
125/125 - 0s - loss: 0.1254 - mean_squared_error: 0.1254
Epoch 15/100
125/125 - 0s - l

<tensorflow.python.keras.callbacks.History at 0x257bb490220>

In [34]:
pred_train_scaled= model.predict(X_train_scaled)
MSE = np.sqrt(mean_squared_error(y_train_scaled,pred_train_scaled))
print(MSE)

pred = model.predict(X_test_scaled)
r2 = np.sqrt(mean_squared_error(y_test_scaled, pred))
print(r2) 

0.21091259093753198
0.4804766653616795


In [35]:
all_models["models"].append(model)
all_models["mse"].append(MSE)
all_models["r2"].append(r2)

In [36]:
all_models

{'models': [LinearRegression(),
  Lasso(alpha=0.01),
  Ridge(alpha=0.01),
  ElasticNet(alpha=0.01),
  XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.300000012, max_delta_step=0, max_depth=6,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', validate_parameters=1, verbosity=None),
  <tensorflow.python.keras.engine.sequential.Sequential at 0x257bb319070>],
 'mse': [0.23061031550703898,
  0.23624135377250588,
  0.23465920726355552,
  0.23521413555947124,
  0.19080492190149234,
  0.21091259093753198],
 'r2': [0.7804376129722346,
  0.7788842122856748,
  0.7803650604353299,
  0.77984566192

## Scale User Input Data

In [38]:
input_values = list(new_input_dict.values())
input_values_scaled = X_scaler.transform(pd.DataFrame(new_input_dict, index=[0]))
input_values_scaled

array([[-0.50156202,  0.64143012, -0.31544448,  0.65852899, -0.56368355,
         0.19680243, -0.01438414, -0.54614348, -0.4898286 , -0.86589911,
        -0.39081678, -0.21197188, -0.41305871,  2.53399887, -0.54884987,
         1.76740243, -0.432777  ]])

## Get Predictions

In [None]:
lm_pred_array = y_scaler.inverse_transform([lm_model.predict(input_values_scaled),])
lm_pred = lm_pred_array[0][0][0]

lasso_pred_array = y_scaler.inverse_transform([lasso.predict(input_values_scaled),])
lasso_pred = lasso_pred_array[0][0]

ridge_pred_array = y_scaler.inverse_transform([ridge.predict(input_values_scaled),])
ridge_pred = ridge_pred_array[0][0][0]

elas_pred_array = y_scaler.inverse_transform([elasticnet.predict(input_values_scaled),])
elasticnet_pred = elas_pred_array[0][0]

XGB_pred_array = y_scaler.inverse_transform([XGBModel.predict(input_values_scaled),])
XGB_pred = XGB_pred_array[0][0]

nn_pred_array = y_scaler.inverse_transform([model.predict(input_values_scaled),])
nn_pred = nn_pred_array[0][0][0]

## Package Results

In [None]:
results = {
    "lm": {"model":"Linear Regression",
          "r2": all_models["r2"][0],
          "prediction": lm_pred},
    "lasso": {"model":"Linear Regression",
          "r2": all_models["r2"][1],
          "prediction": lasso_pred},
    "ridge": {"model":"Ridge",
          "r2": all_models["r2"][2],
          "prediction": ridge_pred},
    "elas": {"model":"ElasticNet",
          "r2": all_models["r2"][3],
          "prediction": elasticnet_pred},
    "xgb": {"model":"XGBoost Regressor",
          "r2": all_models["r2"][4],
          "prediction": XGB_pred},
    "nn": {"model":"Neural Network",
          "r2": all_models["r2"][5],
          "prediction": nn_pred},
}

In [None]:
results