In [87]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor

### **loading data**

In [88]:
data = pd.read_csv("housing.csv", delim_whitespace=True, header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [89]:
data.columns = ["CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS","RAD","TAX","PTRATIO","B","LSTAT","MEDV"]

***data description***

**1-CRIM:** crime rate per capita by town.

**2-ZN:** percentage of residential land alocated to plots of more than 25000 square feet.

**3-INDUS:** percentage of non-retail business acres per town.

**4-CHAS:** Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).

**5-NOX:** nitric oxides concentration (parts per 10 million).

**6-RM:** average number of rooms per dwelling.

**7-AGE:** proportion of owner-occupied units built prior to 1940.

**8-DIS:** weighted distances to ﬁve Boston employment centers.

**9-RAD:** index of accessibility to radial highways.

**10-TAX:** full-value property-tax rate per $10,000.

**11-PTRATIO:** pupil-teacher ratio by town 12.

**12-B:** 1000(Bk−0.63)2 where Bk is the proportion of blacks by town 13.

**13-LSTAT:** % lower status of the population.

**14-MEDV:** (target)Median value of owner-occupied homes in $1000s.

In [90]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [91]:
data.dtypes

CRIM       float64
ZN         float64
INDUS      float64
CHAS         int64
NOX        float64
RM         float64
AGE        float64
DIS        float64
RAD          int64
TAX        float64
PTRATIO    float64
B          float64
LSTAT      float64
MEDV       float64
dtype: object

In [92]:
X = data.drop("MEDV",axis=1)
Y = data["MEDV"]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=46)

In [93]:
scaler = preprocessing.StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [94]:
linear_model = LinearRegression()
linear_model.fit(x_train,y_train)
y_predection = linear_model.predict(x_test)
print(f"mean squared error of linear regression model = {mean_squared_error(y_test, y_predection)}")

mean squared error of linear regression model = 15.650696020264427


In [95]:
polynomial_model = preprocessing.PolynomialFeatures(degree=2)
poly_features = polynomial_model.fit_transform(x_train)
poly = LinearRegression()
poly.fit(poly_features,y_train)
x_test_poly = polynomial_model.transform(x_test)
y2_predection = poly.predict(x_test_poly)
print(f"mean squared error of polynomial model = {mean_squared_error(y_test, y2_predection)}")
print(f"mean squared error of polynomial model (train) = {mean_squared_error(y_train, poly.predict(poly_features))}")

mean squared error of polynomial model = 10.699552073328515
mean squared error of polynomial model (train) = 5.746578191654034


## **NEURAL NETWORKS**

In [96]:
NN_model = Sequential()
NN_model.add(Dense(128, input_dim=13, activation='relu'))
NN_model.add(Dense(64, activation='relu'))
NN_model.add(Dense(1, activation='linear'))
NN_model.compile(loss='mean_squared_error',optimizer='adam',metrics=['mae'])
NN_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 128)               1792      
                                                                 
 dense_10 (Dense)            (None, 64)                8256      
                                                                 
 dense_11 (Dense)            (None, 1)                 65        
                                                                 
Total params: 10,113
Trainable params: 10,113
Non-trainable params: 0
_________________________________________________________________


In [97]:
results = NN_model.fit(x_train, y_train, validation_split=0.25, epochs=150)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

### **Model Predection(test)**

In [98]:
model_predection = NN_model.predict(x_test)
print(f"mean squared error of sequential model = {mean_squared_error(y_test, model_predection)}")

mean squared error of sequential model = 7.442311628062688


### **predicted output & real**

In [99]:
print(f"predicted values is  {model_predection[:10]}")
print("////////////////////////////////////////////////")
print(f"real values is  {y_test[:10]}")

predicted values is  [[17.623072]
 [26.127672]
 [15.432506]
 [12.477351]
 [23.102737]
 [18.30456 ]
 [26.465038]
 [19.792452]
 [13.47441 ]
 [20.491379]]
////////////////////////////////////////////////
real values is  113    18.7
249    26.2
433    14.3
423    13.4
165    25.0
346    17.2
305    28.4
106    19.5
142    13.4
337    18.5
Name: MEDV, dtype: float64
