# Ultra-basic real estate estimator with PyTorch
In `pytorch`, we use `torch.Tensor` object to represent data matrix. The great thing with `torch.Tensor` is the `backward` API that computes the partial derivatives for each weight of the model. 

Let's how it works.

In [19]:
from __future__ import print_function
import torch
import random

A `torch.Tensor` looks like a regular Matrix. By setting the `requires_grad` to `True`, we can get a built-in gradient calculation. 

In [20]:
X = torch.tensor([[1., -1.], [1., 1.]], requires_grad=True)
Y = torch.tensor([[2.,-2.]])
YX = torch.mm(Y, X) # Y.X matrix product
error = YX.pow(2).sum() # compute a scalar error  (a Tensor containing a single number)
print(error) # tensor(16., grad_fn=<SumBackward0>) -> the tensor contains the gradient of the error

error.backward()

print(X.grad)
#tensor([[  0., -16.],
#        [ -0.,  16.]])

# Now we have the 4 gradients -> we know how to adjust the 4 parameters of X to minimize the error
learning_rate = 0.1
newX = X - X.grad * learning_rate
YX = torch.mm(Y, newX) 
error = YX.pow(2).sum() 
print(error) # tensor(5.76, grad_fn=<SumBackward0>) -> the tensor error drops from 16.0 to 5.76


tensor(16., grad_fn=<SumBackward0>)
tensor([[  0., -16.],
        [ -0.,  16.]])
tensor(5.7600, grad_fn=<SumBackward0>)


### Now, let's try to build a simple model.

Firstly, we have to get some data. We generate a fictive set of prices, with 3 boolean features : 
- the age of the house (0=old, 1=recent)
- the location (0=city, 1=suburd)
- the size (0=small, 1=big)

We also add a 4th feature constant (=1).

We'll need the data to be normalized : `mean = 0`, `range = [-1, 1]`

In [21]:
def normalize(data):
  prices = [estimation['price'] for estimation in data]
  min_price = min(prices)
  max_price = max(prices)
  price_range = max_price - min_price
  mean_price = sum(prices)/len(prices)
  return [ {'price': (estimation['price']-mean_price)/price_range, 'features':estimation['features'] } for estimation in data ], mean_price, price_range



This is our fictive training dataset.

In [22]:
data = [
    {'price': 203602, 'features': [1., 1., 1., 1.]},
    {'price': 185488, 'features': [0., 1., 1., 1.]},
    {'price': 103203, 'features': [1., 1., 0., 1.]},
    {'price': 81608, 'features': [0., 1., 0., 1.]},
    {'price': 204089, 'features': [1., 1., 1., 1.]},
    {'price': 185425, 'features': [0., 1., 1., 1.]},
    {'price': 108829, 'features': [1., 1., 0., 1.]},
    {'price': 85070, 'features': [0., 1., 0., 1.]},
    {'price': 164995, 'features': [1., 0., 1., 1.]},
    {'price': 145936, 'features': [0., 0., 1., 1.]},
    {'price': 63804, 'features': [1., 0., 0., 1.]},
    {'price': 41816, 'features': [0., 0., 0., 1.]},
    {'price': 169410, 'features': [1., 0., 1., 1.]},
    {'price': 143767, 'features': [0., 0., 1., 1.]},
    {'price': 68952, 'features': [1., 0., 0., 1.]},
    {'price': 49133, 'features': [0., 0., 0., 1.]},
    {'price': 208421, 'features': [1., 1., 1., 1.]},
    {'price': 185919, 'features': [0., 1., 1., 1.]},
    {'price': 104752, 'features': [1., 1., 0., 1.]},
    {'price': 87972, 'features': [0., 1., 0., 1.]},
    {'price': 203095, 'features': [1., 1., 1., 1.]},
    {'price': 182987, 'features': [0., 1., 1., 1.]},
    {'price': 105929, 'features': [1., 1., 0., 1.]},
    {'price': 81803, 'features': [0., 1., 0., 1.]},
    {'price': 164606, 'features': [1., 0., 1., 1.]},
    {'price': 149610, 'features': [0., 0., 1., 1.]},
    {'price': 69360, 'features': [1., 0., 0., 1.]},
    {'price': 48567, 'features': [0., 0., 0., 1.]},
    {'price': 167751, 'features': [1., 0., 1., 1.]},
    {'price': 145253, 'features': [0., 0., 1., 1.]},
    {'price': 62292, 'features': [1., 0., 0., 1.]},
    {'price': 40270, 'features': [0., 0., 0., 1.]},
    {'price': 203670, 'features': [1., 1., 1., 1.]},
    {'price': 189057, 'features': [0., 1., 1., 1.]},
    {'price': 109181, 'features': [1., 1., 0., 1.]},
    {'price': 83718, 'features': [0., 1., 0., 1.]},
    {'price': 209023, 'features': [1., 1., 1., 1.]},
    {'price': 181662, 'features': [0., 1., 1., 1.]},
    {'price': 105240, 'features': [1., 1., 0., 1.]},
    {'price': 89964, 'features': [0., 1., 0., 1.]},
    {'price': 167683, 'features': [1., 0., 1., 1.]},
    {'price': 146370, 'features': [0., 0., 1., 1.]},
    {'price': 60447, 'features': [1., 0., 0., 1.]},
    {'price': 44079, 'features': [0., 0., 0., 1.]},
    {'price': 163198, 'features': [1., 0., 1., 1.]},
    {'price': 140351, 'features': [0., 0., 1., 1.]},
    {'price': 69402, 'features': [1., 0., 0., 1.]},
    {'price': 41489, 'features': [0., 0., 0., 1.]}
]
random.shuffle(data)
training_data, mean, amplitude = normalize(data)

And an additionnal data set, to measure the performance of the trained model.

In [23]:
validation = [
    {'price': 204336, 'features': [1., 1., 1., 1.]},
    {'price': 187985, 'features': [0., 1., 1., 1.]},
    {'price': 102359, 'features': [1., 1., 0., 1.]},
    {'price': 80877, 'features': [0., 1., 0., 1.]},
    {'price': 205677, 'features': [1., 1., 1., 1.]},
    {'price': 189538, 'features': [0., 1., 1., 1.]},
    {'price': 107926, 'features': [1., 1., 0., 1.]},
    {'price': 84547, 'features': [0., 1., 0., 1.]},
    {'price': 165947, 'features': [1., 0., 1., 1.]},
    {'price': 140893, 'features': [0., 0., 1., 1.]},
    {'price': 67607, 'features': [1., 0., 0., 1.]},
    {'price': 43645, 'features': [0., 0., 0., 1.]},
    {'price': 163268, 'features': [1., 0., 1., 1.]},
    {'price': 148483, 'features': [0., 0., 1., 1.]},
    {'price': 66367, 'features': [1., 0., 0., 1.]},
    {'price': 47951, 'features': [0., 0., 0., 1.]},
    {'price': 206812, 'features': [1., 1., 1., 1.]},
    {'price': 182590, 'features': [0., 1., 1., 1.]},
    {'price': 105576, 'features': [1., 1., 0., 1.]},
    {'price': 81945, 'features': [0., 1., 0., 1.]},
    {'price': 206268, 'features': [1., 1., 1., 1.]},
    {'price': 181688, 'features': [0., 1., 1., 1.]},
    {'price': 101628, 'features': [1., 1., 0., 1.]},
    {'price': 87166, 'features': [0., 1., 0., 1.]},
    {'price': 161408, 'features': [1., 0., 1., 1.]},
    {'price': 145788, 'features': [0., 0., 1., 1.]},
    {'price': 68108, 'features': [1., 0., 0., 1.]},
    {'price': 42171, 'features': [0., 0., 0., 1.]},
    {'price': 160948, 'features': [1., 0., 1., 1.]},
    {'price': 146560, 'features': [0., 0., 1., 1.]},
    {'price': 63783, 'features': [1., 0., 0., 1.]},
    {'price': 48589, 'features': [0., 0., 0., 1.]},
    {'price': 207613, 'features': [1., 1., 1., 1.]},
    {'price': 185696, 'features': [0., 1., 1., 1.]},
    {'price': 102860, 'features': [1., 1., 0., 1.]},
    {'price': 86879, 'features': [0., 1., 0., 1.]},
    {'price': 205130, 'features': [1., 1., 1., 1.]},
    {'price': 189379, 'features': [0., 1., 1., 1.]},
    {'price': 100105, 'features': [1., 1., 0., 1.]},
    {'price': 80001, 'features': [0., 1., 0., 1.]},
    {'price': 168345, 'features': [1., 0., 1., 1.]},
    {'price': 144731, 'features': [0., 0., 1., 1.]},
    {'price': 67754, 'features': [1., 0., 0., 1.]},
    {'price': 46734, 'features': [0., 0., 0., 1.]},
    {'price': 162555, 'features': [1., 0., 1., 1.]},
    {'price': 140001, 'features': [0., 0., 1., 1.]},
    {'price': 67056, 'features': [1., 0., 0., 1.]},
    {'price': 41378, 'features': [0., 0., 0., 1.]}
]
random.shuffle(validation)
validation = [ {'price': (estimation['price']-mean)/amplitude, 'features':estimation['features'] } for estimation in validation ]

This is our simple Model.

In [24]:
nb_agents = 4
nb_supervisors = 2
learning_rate = 0.05
nb_iterations = 40

flair_agents = torch.rand(4, nb_agents, requires_grad=True)
flair_supervisor = torch.rand(nb_agents, nb_supervisors, requires_grad=True)
weighted_mean = torch.rand(nb_supervisors, 1, requires_grad=True)
# activitation func : X -> X

def forward (X):
  estimation_agents = torch.mm(X, flair_agents) 
  estimation_superviseur = torch.mm(estimation_agents, flair_supervisor) 
  estimation_finale = torch.mm(estimation_superviseur, weighted_mean) / nb_supervisors
  return estimation_finale

# training
for epoch in range (0, nb_iterations):
  for estimation in training_data:
    # Step 1 : compute the error 
    X = torch.tensor([estimation['features']])
    estimation_finale = forward(X)
    erreur = (estimation_finale - torch.tensor([[estimation['price']]]) ).pow(2).sum()


    # Step 2 : back-propagation
    erreur.backward()

    # Step 3 : update the parameters
    weighted_mean = (weighted_mean - weighted_mean.grad * learning_rate).clone().detach().requires_grad_(True)
    flair_supervisor = (flair_supervisor - flair_supervisor.grad * learning_rate).clone().detach().requires_grad_(True)
    flair_agents = (flair_agents - flair_agents.grad * learning_rate).clone().detach().requires_grad_(True)

# validation
errors = []
for v in validation:
  X = torch.tensor([v['features']])
  estimation_finale = forward(X)
  erreur = (estimation_finale - torch.tensor([[v['price']]]) ).abs().sum()
  errors.append(erreur.item() )

print("Mean error for the validation dataset (€):", amplitude * sum(errors) / len(errors) )


Mean error for the validation dataset (€): 2491.212074656971
