<a href="https://colab.research.google.com/github/maslovalyudmila/ds_school_2020/blob/master/Maslova_3dTask_MistakesCorrection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# import libraries

import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [0]:
# import our data

df = pd.read_csv('/content/houses_to_rent_v2.csv')

**Preprocessing the data**

In [0]:
# create a binary column with target variable 'best demand'

df['best demand'] = df['total (R$)']
df_1 = df['best demand']
df_1.loc[df_1 <= 3580] = 1
df_1.loc[df_1 > 3580] = 0

In [0]:
# make categorical features as quantitive

from sklearn.preprocessing import LabelEncoder

df['animal'] = df['animal'].factorize()[0]
df['furniture'] = df['furniture'].factorize()[0]

labelencoder = LabelEncoder()
df.city = labelencoder.fit_transform(df.city)
df.floor = labelencoder.fit_transform(df.floor)

In [0]:
# delete hierarchy

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

transformer = ColumnTransformer(transformers=[('city', OneHotEncoder(), [0,4])])
transformer = ColumnTransformer(transformers=[('floor', OneHotEncoder(), [0,34])])

In [0]:
# read first 5 rows of data

df.head()

Unnamed: 0,city,area,rooms,bathroom,parking spaces,floor,animal,furniture,hoa (R$),rent amount (R$),property tax (R$),fire insurance (R$),total (R$),best demand
0,4,70,2,1,1,32,0,0,2065,3300,211,42,5618,0
1,4,320,4,4,0,13,0,1,1200,4960,1750,63,7973,0
2,2,80,1,1,1,31,0,1,1000,2800,0,41,3841,0
3,2,51,2,1,0,12,0,1,270,1112,22,17,1421,1
4,4,25,1,1,0,1,1,1,0,800,25,11,836,1


In [0]:
dataset = df.drop(['total (R$)'], axis=1).values

In [0]:
dataset

array([[   4,   70,    2, ...,  211,   42,    0],
       [   4,  320,    4, ..., 1750,   63,    0],
       [   2,   80,    1, ...,    0,   41,    0],
       ...,
       [   3,   70,    3, ...,  332,   78,    0],
       [   3,  120,    2, ...,  279,  155,    0],
       [   4,   80,    2, ...,  165,   22,    1]])

In [0]:
X = dataset[:, 0:12] # first 11 rows 
Y = dataset[:, 12] # 12th column

In [0]:
from sklearn.preprocessing import StandardScaler

standard_scaler = StandardScaler()
X_scale = standard_scaler.fit_transform(X)

In [0]:
X_scale

array([[ 0.75256677, -0.14752165, -0.432099  , ..., -0.17493534,
        -0.05010297, -0.23658936],
       [ 0.75256677,  0.31803478,  1.27553453, ...,  0.31209868,
         0.44512065,  0.20305577],
       [-0.64949698, -0.12889939, -1.28591577, ..., -0.32163234,
        -0.11799913, -0.25752484],
       ...,
       [ 0.05153489, -0.14752165,  0.42171777, ...,  0.61722843,
        -0.01116726,  0.51708801],
       [ 0.05153489, -0.05441036, -0.432099  , ...,  2.37759236,
        -0.02822175,  2.12912015],
       [ 0.75256677, -0.12889939, -0.432099  , ..., -0.73238392,
        -0.06490498, -0.655299  ]])

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X_scale, Y, test_size=0.3)

In [0]:
Y_train = Y_train.reshape(7484, 1)
Y_test = Y_test.reshape(3208, 1)

**Building and training the Neural Network**

In [0]:
# make random weights more certain

np.random.seed(1)

In [0]:
from math import log

In [0]:
# define input and output values

x = X_train 
y = Y_train 

In [0]:
class Neural_Network(object):
  def __init__(self):
    self.inputSize = 12
    self.outputSize = 1
    self.hiddenSize = 64

    self.syn0 = np.random.randn(self.inputSize, self.hiddenSize) # (64x12) matrix from input to hidden 
    self.syn1 = np.random.randn(self.hiddenSize, self.outputSize) # (64x1) matrix from hidden to output 

  def sigmoid(self, x):
    return 1/(1+np.exp(-x))

  def deriv(self, x):
    return x*(1-x)

  def forward(self, x):
    self.l0 = x # input 
    self.l1 = self.sigmoid(np.dot(self.l0, self.syn0)) # hidden 
    l2 = self.sigmoid(np.dot(self.l1, self.syn1)) # output
    return l2

  def backward(self, x, y, l2):
    self.l2_error = y - l2 # output error
    self.l2_delta = self.l2_error * self.deriv(l2)

    self.l1_error = self.l2_delta.dot(self.syn1.T) # how much l1's values have an influence on l2's errors
    self.l1_delta = self.l1_error * self.deriv(self.l1)
    
    # update weights
    self.syn1 += self.l1.T.dot(self.l2_delta) # input --> hidden weights
    self.syn0 += self.l0.T.dot(self.l1_delta) # hidden --> output weights

  def train(self, x, y):
    for i in range(1000):
      l2 = self.forward(x)
      self.backward(x, y, l2)
    i += 1

**Let's train our Neural Network**

In [0]:
NN = Neural_Network()

# define testing dataset
x = X_test
y = Y_test

#begin training
NN.train(x, y)
print('Input: \n' + str(x)) 
print('Actual output: \n' + str(y))
print('Predicted output: \n' + str(NN.forward(x)))

Input: 
[[ 0.75256677 -0.13634829 -0.432099   ...  0.10378895 -0.04817228
   0.01463643]
 [ 0.75256677 -0.19407729 -0.432099   ... -0.70304452 -0.10190999
  -0.69716997]
 [ 0.75256677 -0.19966397 -1.28591577 ... -0.26295354 -0.05621685
  -0.2993958 ]
 ...
 [ 0.75256677  0.19326566  1.27553453 ...  0.05977985 -0.11799913
  -0.02723453]
 [-2.05156074 -0.158695    0.42171777 ... -0.73238392 -0.109311
  -0.71810545]
 [ 0.75256677  0.12808776  0.42171777 ...  1.14533761 -0.0105237
   0.95673314]]
Actual output: 
[[0]
 [1]
 [0]
 ...
 [0]
 [1]
 [0]]
Predicted output: 
[[1.]
 [1.]
 [1.]
 ...
 [1.]
 [1.]
 [1.]]


In [0]:
from sklearn.metrics import log_loss

l2 = NN.forward(x)

log_loss(y, l2, eps=1e-15, normalize=True, sample_weight=None, labels=None)

17.398988405119344