In [1]:
import pandas as pd
import matplotlib.pyplot as plt

### Load Dataset

In [2]:
df = pd.read_csv('/content/Boston.csv')
df.head(10)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV,CAT. MEDV,Unnamed: 15,Unnamed: 16
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0,0,,
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6,0,,
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7,1,,
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4,1,,
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2,1,,
5,0.02985,0.0,2.18,0,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21,28.7,0,,
6,0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43,22.9,0,,
7,0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15,27.1,0,,
8,0.21124,12.5,7.87,0,0.524,5.631,100.0,6.0821,5,311,15.2,386.63,29.93,16.5,0,,
9,0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1,18.9,0,,


In [3]:
df.drop(columns=['Unnamed: 15','Unnamed: 16'],inplace=True)

In [4]:
df.drop(columns=['CAT. MEDV'],inplace=True)

#### Checking for null values

In [5]:
df.isnull().sum()

Unnamed: 0,0
CRIM,0
ZN,0
INDUS,0
CHAS,0
NOX,0
RM,0
AGE,0
DIS,0
RAD,0
TAX,0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CRIM     506 non-null    float64
 1   ZN       506 non-null    float64
 2   INDUS    506 non-null    float64
 3   CHAS     506 non-null    int64  
 4   NOX      506 non-null    float64
 5   RM       506 non-null    float64
 6   AGE      506 non-null    float64
 7   DIS      506 non-null    float64
 8   RAD      506 non-null    int64  
 9   TAX      506 non-null    int64  
 10  PTRATIO  506 non-null    float64
 11  B        506 non-null    float64
 12  LSTAT    506 non-null    float64
 13  MEDV     506 non-null    float64
dtypes: float64(11), int64(3)
memory usage: 55.5 KB


In [7]:
df.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


#### Checking correlation with target variable MEDV

In [8]:
df.corr()['MEDV'].sort_values()

Unnamed: 0,MEDV
LSTAT,-0.737663
PTRATIO,-0.507787
INDUS,-0.483725
TAX,-0.468536
NOX,-0.427321
CRIM,-0.388305
RAD,-0.381626
AGE,-0.376955
CHAS,0.17526
DIS,0.249929


In [9]:
X = df.loc[:,['LSTAT','PTRATIO','RM']]
Y = df.loc[:,"MEDV"]
X.shape,Y.shape

((506, 3), (506,))

### Preparing training and testing data set

In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.25,random_state=10)

### Normalizing training and testing dataset

In [11]:
from sklearn.preprocessing import StandardScaler

In [12]:
scaler = StandardScaler()

In [13]:
scaler.fit(x_train)

In [14]:
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

### Preparing model

In [15]:
from keras.models import Sequential
from keras.layers import Dense

In [16]:
model = Sequential()

In [17]:
model.add(Dense(128,input_shape=(3,),activation='relu',name='input'))
model.add(Dense(64,activation='relu',name='layer_1'))
model.add(Dense(1,activation='linear',name='output'))
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
model.fit(x_train,y_train,epochs=100,validation_split=0.05)

Epoch 1/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - loss: 519.8208 - mae: 21.1766 - val_loss: 695.2317 - val_mae: 23.6122
Epoch 2/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 502.2825 - mae: 20.6687 - val_loss: 647.9392 - val_mae: 22.5809
Epoch 3/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 469.1874 - mae: 19.8983 - val_loss: 583.8881 - val_mae: 21.1167
Epoch 4/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 387.9230 - mae: 17.9412 - val_loss: 496.7190 - val_mae: 19.1382
Epoch 5/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 296.9182 - mae: 15.3858 - val_loss: 391.8758 - val_mae: 16.6870
Epoch 6/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 202.1886 - mae: 12.9407 - val_loss: 283.3829 - val_mae: 13.5937
Epoch 7/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7a88e65f1c50>

In [19]:
output = model.evaluate(x_test,y_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 20.2445 - mae: 3.1180 


In [20]:
print(f"Mean Squared Error: {output[0]}"
      ,f"Mean Absolute Error: {output[1]}",sep="\n")

Mean Squared Error: 22.505544662475586
Mean Absolute Error: 3.1634976863861084


In [21]:
y_pred = model.predict(x=x_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [22]:
print(*zip(y_pred,y_test))

(array([25.147537], dtype=float32), 28.4) (array([31.20865], dtype=float32), 31.1) (array([26.781433], dtype=float32), 23.5) (array([27.70317], dtype=float32), 26.6) (array([19.896172], dtype=float32), 19.6) (array([16.58222], dtype=float32), 14.3) (array([42.318863], dtype=float32), 50.0) (array([14.446366], dtype=float32), 14.3) (array([19.94313], dtype=float32), 20.7) (array([42.87922], dtype=float32), 37.6) (array([17.967922], dtype=float32), 20.4) (array([26.830574], dtype=float32), 27.5) (array([22.85928], dtype=float32), 36.2) (array([32.715813], dtype=float32), 32.0) (array([31.411808], dtype=float32), 33.1) (array([51.62595], dtype=float32), 48.8) (array([26.058653], dtype=float32), 24.6) (array([19.789602], dtype=float32), 26.4) (array([21.72386], dtype=float32), 23.2) (array([20.431475], dtype=float32), 17.0) (array([33.984432], dtype=float32), 41.3) (array([15.885105], dtype=float32), 14.9) (array([22.5943], dtype=float32), 18.5) (array([25.063211], dtype=float32), 25.0) (a