Import necessary plugins

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


read and standardize the data

In [2]:
column_names = [
    "CRIM",    # per capita crime rate by town
    "ZN",      # proportion of residential land zoned for lots over 25,000 sq.ft.
    "INDUS",   # proportion of non-retail business acres per town
    "CHAS",    # Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
    "NOX",     # nitric oxides concentration (parts per 10 million)
    "RM",      # average number of rooms per dwelling
    "AGE",     # proportion of owner-occupied units built before 1940
    "DIS",     # weighted distances to five Boston employment centers
    "RAD",     # index of accessibility to radial highways
    "TAX",     # full-value property-tax rate per $10,000
    "PTRATIO", # pupil-teacher ratio by town
    "B",       # 1000(Bk - 0.63)^2 where Bk is proportion of Black population
    "LSTAT",   # % lower status of the population
    "MEDV"     # Median value of owner-occupied homes in $1000s (target variable)
]
df = pd.read_csv(
    r"C:\Users\Ondra\.vscode\python\Linear regression\archive\housing.csv",
    header=None,
    names=column_names,
    delim_whitespace=True
)
print(df.head())

from sklearn.preprocessing import StandardScaler
X_raw = df.iloc[:, :-1].values  # all columns except MEDV
y = df.iloc[:, -1].values.reshape(-1, 1)  # MEDV column

# Initialize scaler and fit-transform the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_raw)
X = np.hstack((np.ones((X_scaled.shape[0], 1)), X_scaled))
print(X)

  df = pd.read_csv(


      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296.0   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242.0   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242.0   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222.0   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222.0   

   PTRATIO       B  LSTAT  MEDV  
0     15.3  396.90   4.98  24.0  
1     17.8  396.90   9.14  21.6  
2     17.8  392.83   4.03  34.7  
3     18.7  394.63   2.94  33.4  
4     18.7  396.90   5.33  36.2  
[[ 1.         -0.42023335  0.28464542 ... -1.46382318  0.44157346
  -1.07357933]
 [ 1.         -0.4177927  -0.48714485 ... -0.30646048  0.44157346
  -0.49055393]
 [ 1.         -0.41779502 -0.48714485 ... -0.30646048  0.39698405
  -1.20672215]
 ...
 [ 1.         -0.41390323 -0.48714485 ...  1.17496378  0.44157346
  -0.98108011]
 [ 1.         -0.40822543 -0

Set the training algorithm and initial weight vector to zero with the bias term

In [3]:
iterations=5000
learning_rate=0.005
# initial weight vector
w= np.zeros((X.shape[1], 1))
#Training algorithm
for i in range(iterations):
    y_pred=X@w
    error=y_pred-y
    gradient=(1/X.shape[0])*X.T@error
    w=w-learning_rate*gradient
#print w
print("Final weights:", w.flatten())

Final weights: [22.52376238 -0.8867289   1.04378964 -0.03461132  0.71341981 -1.97523641
  2.71174791 -0.06806462 -3.15647392  2.20382541 -1.58671573 -1.99149039
  0.84061584 -3.75650464]


Set the testing data (standardized)

In [4]:
x_test=np.array([
    1.0,       # bias
   -0.417,     # CRIM
   -0.487,     # ZN
   -0.593,     # INDUS
   -0.273,     # CHAS
   -0.739,     # NOX
    0.194,     # RM
    0.368,     # AGE
    0.558,     # DIS
   -0.867,     # RAD
   -0.985,     # TAX
   -0.303,     # PTRATIO
    0.441,     # B
   -0.493      # LSTAT
]).reshape(-1, 1)
y_pred = x_test.T @ w
print("Predicted value for the test data:", y_pred[0, 0])

Predicted value for the test data: 24.888670580493567


 0.14455  12.50   7.870  0  0.5240  6.1720  96.10  5.9505   5  311.0  15.20 396.90  19.15  27.10

 -2.30297
-1.85747
-2.02188
-2.30426
-2.28394
-2.08859
1.12586
-2.09633
-2.12521
8.79092
-1.76271
11.85557
-1.62047
-1.33607


In [5]:
x_test=np.array([
    1,
 -2.30297,
-1.85747,
-2.02188,
-2.30426,
-2.28394,
-2.08859,
1.12586,
-2.09633,
-2.12521,
8.79092,
-1.76271,
11.85557,
-1.62047
]).reshape(-1, 1)
y_pred = x_test.T @ w
print("Predicted value for the test data:", y_pred[0, 0])

Predicted value for the test data: 27.372531559443146
