In [20]:
from matplotlib import pyplot as plt     # Data Visualisation
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import warnings
warnings.filterwarnings('ignore')

In [21]:
#import data from .csv file of guithub repository
housing = pd.read_csv("https://raw.githubusercontent.com/Tareq-BD/ECGR-5105/main/Housing.csv")
housing

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [22]:
# List of variables to map (string to numerical values)
varlist = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

# Defining the map function
def binary_map(x):
    return x.map({'yes': 1, "no": 0})

# Applying the function to the housing list
housing[varlist] = housing[varlist].apply(binary_map)
           
# Check the housing dataframe now
housing.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,furnished
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,furnished
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,semi-furnished
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,furnished
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,furnished


In [23]:
#selected input variables:area, bedrooms, bathrooms, stories, parking
num_vars = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']
new_df = housing[num_vars]
new_df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,13300000,7420,4,2,3,2
1,12250000,8960,4,4,4,3
2,12250000,9960,3,2,2,2
3,12215000,7500,4,2,2,3
4,11410000,7420,4,1,2,2
...,...,...,...,...,...,...
540,1820000,3000,2,1,1,2
541,1767150,2400,3,1,1,0
542,1750000,3620,2,1,1,0
543,1750000,2910,3,1,1,0


In [24]:
new_df.shape

(545, 6)

In [25]:
#Scaling the data by Normalization

scaler = MinMaxScaler()   # input Normalization
new_df[num_vars] = scaler.fit_transform(new_df[num_vars])
new_df.head(10)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,1.0,0.396564,0.6,0.333333,0.666667,0.666667
1,0.909091,0.502405,0.6,1.0,1.0,1.0
2,0.909091,0.571134,0.4,0.333333,0.333333,0.666667
3,0.906061,0.402062,0.6,0.333333,0.333333,1.0
4,0.836364,0.396564,0.6,0.0,0.333333,0.666667
5,0.787879,0.402062,0.4,0.666667,0.0,0.666667
6,0.727273,0.476289,0.6,0.666667,1.0,0.666667
7,0.727273,1.0,0.8,0.666667,0.333333,0.0
8,0.70303,0.443299,0.6,0.0,0.333333,0.666667
9,0.69697,0.281787,0.4,0.333333,1.0,0.333333


In [26]:
#Splitting the dataset into labels and Ground truth
X = new_df.iloc[:, 1:6].values
Y = new_df.iloc[:, 0].values

In [27]:
#Converting into tensors
X = torch.tensor(X)
Y = torch.tensor(Y)

In [28]:
#Defining the model as per the given equation
def model(X, W1, W2, W3, W4, W5, B):
    return W5*X[:,4] + W4*X[:,3] + W3*X[:,2] + W2*X[:,1] + W1*X[:,0] + B

In [29]:
#Mean Squared Error (MSE) calculation
def loss_fn(U, Y):
    squared_diffs = (U - Y)**2
    return squared_diffs.mean()

In [30]:
#Defining the weights and biases as arrays of ones and zeros respectively 
W1 = torch.ones(())
W2 = torch.ones(())
W3 = torch.ones(())
W4 = torch.ones(())
W5 = torch.ones(())
B = torch.zeros(())

In [31]:
#80% (training) and 20% (validation) split
n_samples = X.shape[0]
n_val = int(0.2 * n_samples)

#For efficient training shuffling the indices of the original dataset
shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

In [32]:
train_X = X[train_indices]
train_Y = Y[train_indices]

val_X = X[val_indices]
val_Y = Y[val_indices]

In [33]:
train_X.size()

torch.Size([436, 5])

In [34]:
#Definition of the training loop 
def training_loop(n_epochs, optimizer, params, train_X, val_X, train_Y, val_Y):
    
    for epoch in range(1, n_epochs + 1):
        train_U = model(train_X, *params) 
        train_loss = loss_fn(train_U, train_Y)
                             
        val_U = model(val_X, *params) 
        val_loss = loss_fn(val_U, val_Y)
        
        optimizer.zero_grad()
        train_loss.backward() 
        optimizer.step()

        if epoch <= 1 or epoch % 500 ==0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [35]:
#Initializing the parameters (weights and the biases) as ones and zeros respectively 
params = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0, 0.0], requires_grad=True)

learning_rate = 0.1

optimizer = optim.SGD([params], lr=learning_rate)    #Using the SGD optimizer

#Training for learning rate of 0.1
training_loop(
    n_epochs = 5000,         # running the training for 5000 epochs
    optimizer = optimizer,
    params = params,
    train_X = train_X,
    val_X = val_X,
    train_Y = train_Y,
    val_Y = val_Y)

Epoch 1, Training loss 1.2347, Validation loss 1.2044
Epoch 500, Training loss 0.0111, Validation loss 0.0143
Epoch 1000, Training loss 0.0109, Validation loss 0.0142
Epoch 1500, Training loss 0.0109, Validation loss 0.0142
Epoch 2000, Training loss 0.0109, Validation loss 0.0141
Epoch 2500, Training loss 0.0109, Validation loss 0.0141
Epoch 3000, Training loss 0.0109, Validation loss 0.0141
Epoch 3500, Training loss 0.0109, Validation loss 0.0141
Epoch 4000, Training loss 0.0109, Validation loss 0.0141
Epoch 4500, Training loss 0.0109, Validation loss 0.0141
Epoch 5000, Training loss 0.0109, Validation loss 0.0141


tensor([0.4167, 0.0809, 0.2541, 0.1504, 0.1046, 0.0394], requires_grad=True)

In [36]:
#Initializing the parameters (weights and the biases) as ones and zeros respectively 
params = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0, 0.0], requires_grad=True)

learning_rate = 0.01

optimizer = optim.SGD([params], lr=learning_rate)   #Using the SGD optimizer

#Training for learning rate of 0.01
training_loop(
    n_epochs = 5000,                   # running the training for 5000 epochs
    optimizer = optimizer,
    params = params,
    train_X = train_X,
    val_X = val_X,
    train_Y = train_Y,
    val_Y = val_Y)

Epoch 1, Training loss 1.2347, Validation loss 1.2044
Epoch 500, Training loss 0.0495, Validation loss 0.0449
Epoch 1000, Training loss 0.0217, Validation loss 0.0214
Epoch 1500, Training loss 0.0152, Validation loss 0.0164
Epoch 2000, Training loss 0.0132, Validation loss 0.0151
Epoch 2500, Training loss 0.0123, Validation loss 0.0147
Epoch 3000, Training loss 0.0118, Validation loss 0.0145
Epoch 3500, Training loss 0.0115, Validation loss 0.0144
Epoch 4000, Training loss 0.0113, Validation loss 0.0143
Epoch 4500, Training loss 0.0112, Validation loss 0.0143
Epoch 5000, Training loss 0.0111, Validation loss 0.0143


tensor([4.5568e-01, 1.8097e-01, 2.4421e-01, 1.2514e-01, 9.1959e-02, 4.2686e-04],
       requires_grad=True)

In [37]:
#Initializing the parameters (weights and the biases) as ones and zeros respectively 
params = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0, 0.0], requires_grad=True)

learning_rate = 0.001

optimizer = optim.SGD([params], lr=learning_rate)    #Using the SGD optimizer 

#Training for learning rate of 0.001
training_loop(
    n_epochs = 5000,          # running the training for 5000 epochs
    optimizer = optimizer,
    params = params,
    train_X = train_X,
    val_X = val_X,
    train_Y = train_Y,
    val_Y = val_Y)

Epoch 1, Training loss 1.2347, Validation loss 1.2044
Epoch 500, Training loss 0.2259, Validation loss 0.2054
Epoch 1000, Training loss 0.1408, Validation loss 0.1253
Epoch 1500, Training loss 0.1189, Validation loss 0.1060
Epoch 2000, Training loss 0.1035, Validation loss 0.0925
Epoch 2500, Training loss 0.0905, Validation loss 0.0810
Epoch 3000, Training loss 0.0795, Validation loss 0.0713
Epoch 3500, Training loss 0.0701, Validation loss 0.0630
Epoch 4000, Training loss 0.0621, Validation loss 0.0560
Epoch 4500, Training loss 0.0553, Validation loss 0.0500
Epoch 5000, Training loss 0.0494, Validation loss 0.0449


tensor([ 0.6994,  0.5557,  0.6248,  0.3442,  0.3773, -0.3903],
       requires_grad=True)

In [38]:
#Initializing the parameters (weights and the biases) as ones and zeros respectively 
params = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0, 0.0], requires_grad=True)

learning_rate = 0.0001

optimizer = optim.SGD([params], lr=learning_rate)    #Using the SGD optimizer

#Training for learning rate of 0.0001
training_loop(
    n_epochs = 5000,          # running the training for 5000 epochs
    optimizer = optimizer,
    params = params,
    train_X = train_X,
    val_X = val_X,
    train_Y = train_Y,
    val_Y = val_Y)

Epoch 1, Training loss 1.2347, Validation loss 1.2044
Epoch 500, Training loss 0.9807, Validation loss 0.9517
Epoch 1000, Training loss 0.7864, Validation loss 0.7586
Epoch 1500, Training loss 0.6380, Validation loss 0.6113
Epoch 2000, Training loss 0.5246, Validation loss 0.4989
Epoch 2500, Training loss 0.4376, Validation loss 0.4130
Epoch 3000, Training loss 0.3710, Validation loss 0.3473
Epoch 3500, Training loss 0.3197, Validation loss 0.2969
Epoch 4000, Training loss 0.2801, Validation loss 0.2582
Epoch 4500, Training loss 0.2495, Validation loss 0.2284
Epoch 5000, Training loss 0.2257, Validation loss 0.2053


tensor([ 0.8484,  0.7557,  0.8991,  0.7743,  0.8007, -0.5064],
       requires_grad=True)