In [45]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (StandardScaler,
    LabelEncoder, OneHotEncoder)

data_dir = '../data'
filename = 'AB_NYC_2019.csv'
data_path = os.path.join(data_dir, filename)

df = pd.read_csv(data_path)

#### Data preparation

In [46]:
df['last_review'] = pd.to_datetime(df['last_review'])

In [47]:
no_hostname = df[df['host_name'].isnull()]
df.drop(index=no_hostname.index, inplace=True)

In [48]:
no_name = df[df['name'].isnull()]
df.drop(index=no_name.index, inplace=True)

In [49]:
no_info_cond = df['number_of_reviews'] == 0 & \
               df['last_review'].isnull() & \
               df['reviews_per_month'].isnull()
no_info_sample = df[no_info_cond]

df.drop(index=no_info_sample.index, inplace=True)

df.index = range(len(df))

In [50]:
def extract_date(data):
    return data.dt.year, data.dt.month, data.dt.day

year, month, day = extract_date(df['last_review'])

df[['review_year', 'review_month',
    'review_day']] = pd.DataFrame({'year': year,
                        'month': month,
                        'day': day})

df.drop(columns='last_review', inplace=True)

In [51]:
# FE
df['year_available'] = df['availability_365'] == 365
df['review_period'] = df['number_of_reviews'] / df['reviews_per_month']
df['is_rare_type'] = df['room_type'] == 'Shared room'
df['reviews_per_host'] = df['number_of_reviews'] / df['calculated_host_listings_count']
df['min_available'] = df['minimum_nights'] * df['availability_365']

In [52]:
target = df['price']
df.drop(columns=['price'], inplace=True)

target.replace(to_replace=0, value=target.mean(), inplace=True)
target = np.log(target)

In [53]:
%%time
to_encode = ['room_type', 'neighbourhood_group']

for col in to_encode:
    le = LabelEncoder()
    ohe = OneHotEncoder(categories='auto')
    labeled = le.fit_transform(df[col])
    labeled = labeled.reshape(len(labeled), 1)
    encoded = ohe.fit_transform(labeled).toarray()
    encoded_df = pd.DataFrame(
        encoded, columns=['is_'+cat for cat in le.classes_]
    )
    df = df.join(encoded_df)

df.drop(columns=to_encode, inplace=True)

Wall time: 113 ms


In [54]:
%%time
# Useless unique identifier
df.drop(columns='id', inplace=True)
# Have to make specific FE for sentences
df.drop(columns='name', inplace=True)

to_label = ['host_name', 'neighbourhood',
            'year_available', 'is_rare_type']

for col in to_label:
    le = LabelEncoder()
    labeled = le.fit_transform(df[col])
    df[col+'_label'] = labeled

df.drop(columns=to_label, inplace=True)

Wall time: 102 ms


In [55]:
num_df = df.select_dtypes(include=np.number)

scaler = StandardScaler()
scaled = scaler.fit_transform(df[num_df.columns])
df[num_df.columns] = pd.DataFrame(scaled)

#### Neural network implementation

In [56]:
# Activations and their derivatives
def linear(x):
    return x

def linear_backward(da, x):
    return np.array(da, copy=True)

def relu(x):
    return np.maximum(0, x)

def relu_backward(da, x):
    dx = np.array(da, copy = True)
    dx[x <= 0] = 0
    return dx

In [57]:
architecture_list = (
    {'input': df.shape[1], 'output': 40, 'activation': relu},
    {'input': 40, 'output': 50, 'activation': relu},
    {'input': 50, 'output': 50, 'activation': relu},
    {'input': 50, 'output': 20, 'activation': relu},
    {'input': 20, 'output': 1, 'activation': linear}
)

backwards = {
    'relu': relu_backward,
    'linear': linear_backward
}

In [58]:
def init_layers(architecture, seed=1):
    np.random.seed(seed)
    params = {}
    
    for idx, layer in enumerate(architecture):
        layer_idx = idx + 1
        layer_input = layer['input']
        layer_output = layer['output']
        # Weight matrix W and bias vector b initialization
        params['W_' + str(layer_idx)] = np.random.randn(
            layer_output, layer_input) * 0.1
        params['b_' + str(layer_idx)] = np.random.randn(
            layer_output, 1) * 0.1
        
    return params

In [59]:
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation):
    # calculation of the input value for the activation function
    # W * A + b
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    # return of calculated activation A and the intermediate Z matrix
    return activation(Z_curr), Z_curr

In [60]:
def full_forward_propagation(X, params, architecture):
    # memory for information needed for a backward step
    memory = {}
    # X vector is the activation for layer 0 
    A_curr = X
    
    # iteration over network layers
    for idx, layer in enumerate(architecture):
        # we number network layers from 1
        layer_idx = idx + 1
        # transfer the activation from the previous iteration
        A_prev = A_curr
        
        # extraction of the matrix W, vector b and
        # activation function for the current layer
        W_curr = params['W_' + str(layer_idx)]
        b_curr = params['b_' + str(layer_idx)]
        activation = layer['activation']
                
        # calculation of activation for the current layer
        A_curr, Z_curr = single_layer_forward_propagation(
            A_prev, W_curr, b_curr, activation
        )
        
        # saving calculated values in the memory
        memory['A_' + str(idx)] = A_prev
        memory['Z_' + str(layer_idx)] = Z_curr
    
    # return of prediction vector and a dictionary for backward
    return A_curr, memory

In [61]:
# Mean squared error
def mse(y_hat, y):
    m = y_hat.shape[1]
    result = 1 / m * np.sum((y_hat - y)**2)
    return np.squeeze(result)

In [62]:
def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr,
                                      A_prev, activation):
    # number of examples
    m = A_prev.shape[1]
    # selection of activation function
    act_name = activation.__name__
    backward_activation = backwards[act_name]
    
    # calculation of the activation function derivative
    dZ_curr = backward_activation(dA_curr, Z_curr)
    
    # derivative of the matrix W, vector b
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    
    # derivative of the matrix A_prev
    dA_prev = np.dot(W_curr.T, dZ_curr)

    return dA_prev, dW_curr, db_curr

In [63]:
def full_backward_propagation(y_hat, y, memory, params, architecture):
    grads_values = {}
    # number of examples
    m = y.shape[1]
    # a hack ensuring the same shape of the prediction vector and labels vector
    y = y.reshape(y_hat.shape)
    
    # initiation of gradient descent algorithm
    dA_prev = -(np.divide(y, y_hat) - np.divide(1 - y, 1 - y_hat))
    
    for layer_idx_prev, layer in reversed(list(enumerate(architecture))):
        # we number network layers from 1
        layer_idx_curr = layer_idx_prev + 1
        # extraction of the activation function for the current layer
        activation = layer['activation']
        
        dA_curr = dA_prev
        A_prev = memory['A_' + str(layer_idx_prev)]
        Z_curr = memory['Z_' + str(layer_idx_curr)]
        
        W_curr = params['W_' + str(layer_idx_curr)]
        b_curr = params['b_' + str(layer_idx_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activation
        )
        
        grads_values['dW_' + str(layer_idx_curr)] = dW_curr
        grads_values['db_' + str(layer_idx_curr)] = db_curr
    
    return grads_values

In [64]:
def update_params(params, grads_values, architecture, learning_rate):
    for layer_idx, layer in enumerate(architecture, 1):
        w_updating = learning_rate * grads_values['dW_' + str(layer_idx)]
        params['W_' + str(layer_idx)] -= w_updating
        b_updating = learning_rate * grads_values['db_' + str(layer_idx)]
        params['b_' + str(layer_idx)] -= b_updating
    return params

In [65]:
def train(x, y, architecture, epochs, learning_rate):
    # initialization of neural net parameters
    params = init_layers(architecture, 2)
    cost_history = []

    for i in range(epochs):
        # step forward
        y_hat, memory = full_forward_propagation(x, params, architecture)
        
        cost = mse(y_hat, y)
        cost_history.append(cost)
        
        # step backward - calculating gradient
        grads_values = full_backward_propagation(
            y_hat, y, memory, params, architecture
        )

        # updating model state
        params = update_params(params, grads_values, architecture,
                               learning_rate)
            
    return params

#### Custom dense network training

In [66]:
# Train-test split
x, x_test, y, y_test = train_test_split(
    df, target, test_size=0.2, random_state=0, shuffle=True
)
# Train-val split
x_train, x_val, y_train, y_val = train_test_split(
    x, y, train_size=0.8, random_state=0, shuffle=True
)

In [79]:
num_epochs = 25
lr = 0.001
params_values = train(np.transpose(x_train),
    np.transpose(y_train.to_numpy().reshape((y_train.shape[0], 1))),
    architecture_list, num_epochs, lr)

In [80]:
# Prediction
y_test_hat, _ = full_forward_propagation(np.transpose(x_test),
                                         params_values, architecture_list)

In [81]:
mse_test = mse(y_test_hat,
    np.transpose(y_test.to_numpy().reshape((y_test.shape[0], 1))))
print('Test set MSE: {:.3f}'.format(mse_test))

Test set MSE: 34.734
