In [151]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import wandb

In [152]:
housing_dataset = pd.read_csv('HousingData.csv')
housing_dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,,36.2


In [153]:
housing_dataset.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
count,486.0,486.0,486.0,486.0,506.0,506.0,486.0,506.0,506.0,506.0,506.0,506.0,486.0,506.0
mean,3.611874,11.211934,11.083992,0.069959,0.554695,6.284634,68.518519,3.795043,9.549407,408.237154,18.455534,356.674032,12.715432,22.532806
std,8.720192,23.388876,6.835896,0.25534,0.115878,0.702617,27.999513,2.10571,8.707259,168.537116,2.164946,91.294864,7.155871,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.0819,0.0,5.19,0.0,0.449,5.8855,45.175,2.100175,4.0,279.0,17.4,375.3775,7.125,17.025
50%,0.253715,0.0,9.69,0.0,0.538,6.2085,76.8,3.20745,5.0,330.0,19.05,391.44,11.43,21.2
75%,3.560263,12.5,18.1,0.0,0.624,6.6235,93.975,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [154]:
fig = px.scatter(housing_dataset, x='RM', y='MEDV', title='MEDV vs RM')
fig.show()

In [155]:
fig = px.histogram(housing_dataset, x='MEDV', title='Histogram of MEDV')
fig.show()

In [156]:
fig = px.imshow(housing_dataset.corr(), title='Correlation Heatmap',
                labels=dict(color='Correlation Coefficient'))
fig.show()

In [157]:
fig = px.box(housing_dataset, x='CHAS', y='MEDV', points="all", title='Boxplot of MEDV for CHAS')
fig.show()

In [158]:
fig = px.scatter_matrix(housing_dataset[['CRIM', 'RM', 'AGE', 'MEDV']], title='Scatter Matrix')
fig.show()

In [159]:
fig = px.scatter_3d(housing_dataset, x='RM', y='TAX', z='MEDV', color='MEDV', title='3D Scatter Plot')
fig.show()

## Splitting the dataset into train, validation and test sets

In [160]:
X = housing_dataset.drop('MEDV', axis=1).to_numpy()
y = housing_dataset['MEDV'].to_numpy()

# split into train, val and test sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2)

print(f"Size of training set: {len(X_train)}")
print(f"Size of validation set: {len(X_val)}")
print(f"Size of test set: {len(X_test)}")

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)
X_test = scaler.fit_transform(X_test)

imputer = SimpleImputer(strategy='median')
X_train = imputer.fit_transform(X_train)
X_val = imputer.fit_transform(X_val)
X_test = imputer.fit_transform(X_test)

print("Number of features = ", X_train.shape[1])

Size of training set: 323
Size of validation set: 102
Size of test set: 81
Number of features =  13


In [173]:
print(f"X_train values: {X_train[0]}\nPredicted output for the above input features: {y_train[0]}")

X_train values: [-0.42338288  3.67090421 -1.30987167 -0.25421615 -1.24529189  2.19662606
 -1.35550106  0.75534013 -0.68433293 -1.16987204 -1.72908567  0.39068921
 -1.25677502]
Predicted output for the above input features: 48.5


In [162]:
X_train.shape, y_train.shape

((323, 13), (323,))

In [163]:
wandb.login()

True

In [164]:
class MultiLayerPerceptronRegressionModel():
    def __init__(self, X_train, y_train, X_val, y_val, hidden_layers, activation_function="sigmoid", learning_rate=0.01, optimizer="sgd", log_to_wandb=False):
        self.log_to_wandb = log_to_wandb
        self.X_train = X_train
        self.y_train = y_train.reshape(-1, 1)
        self.X_val = X_val
        self.y_val = y_val.reshape(-1, 1)

        self.input_size = X_train.shape[1]
        self.output_size = self.y_train.shape[1]
        self.learning_rate = learning_rate
        self.layers = [self.input_size] + hidden_layers + [self.output_size]
        self.weights = [np.random.randn(self.layers[i], self.layers[i+1]) for i in range(len(self.layers) - 1)]
        self.biases = [np.random.randn(1, self.layers[i+1]) for i in range(len(self.layers) - 1)]
        
        if activation_function == "linear":
            self.activation = self.linear
            self.activation_derivative = self.linear_derivative
        elif activation_function == "sigmoid":
            self.activation = self.sigmoid
            self.activation_derivative = self.sigmoid_derivative
        elif activation_function == "tanh":
            self.activation = self.tanh
            self.activation_derivative = self.tanh_derivative
        elif activation_function == "relu":
            self.activation = self.relu
            self.activation_derivative = self.relu_derivative
        
        self.optimizer = optimizer

    def linear(self, x):
        return x
    
    def linear_derivative(self, x):
        return 1

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def tanh(self, x):
        return np.tanh(x)

    def tanh_derivative(self, x):
        return 1.0 - np.tanh(x)**2

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)

    def one_hot_encode(self, y):
        n_values = int(np.max(y)) + 1
        return np.eye(n_values)[y]
    
    def predict(self, X):
        outputs = self.forward(X)
        # Get the index of the maximum value in each output (this corresponds to the predicted class)
        predictions = np.argmax(outputs, axis=1)
        return predictions
    
    def compute_loss(self, y_true, y_pred):
        m = y_true.shape[0]
        loss = (1 / m) * np.sum(np.square(y_pred - y_true))
        return loss

    def mse(self, y_true, y_pred):
        m = len(y_true)
        return np.sum((y_pred - y_true)**2) / m

    def rmse(self, y_true, y_pred):
        return np.sqrt(self.mse(y_true, y_pred))

    def r2_score(self, y_true, y_pred):
        ss_total = np.sum((y_true - np.mean(y_true))**2)
        ss_residual = np.sum((y_true - y_pred)**2)
        return 1 - (ss_residual / ss_total)

    def forward(self, input_data):
        input_data = np.atleast_2d(input_data) # to make sure that input data has atleast 2 dimensions
        
        self.a_values = [input_data]
        self.z_values = []

        for i in range(len(self.weights)):
            z = np.dot(self.a_values[-1], self.weights[i]) + self.biases[i]
            a = self.activation(z)
            self.z_values.append(z)
            self.a_values.append(a)

        return self.a_values[-1]

    def backpropagation(self, X, y):
        m = X.shape[0]
        self.dz_values = [self.a_values[-1] - y]
        self.dw_values = [np.dot(self.a_values[-2].T, self.dz_values[0]) / m]
        self.db_values = [np.sum(self.dz_values[0], axis=0, keepdims=True) / m]

        for i in range(len(self.weights) - 1, 0, -1):
            dz = np.dot(self.dz_values[0], self.weights[i].T) * self.activation_derivative(self.a_values[i])
            dw = np.dot(self.a_values[i-1].T, dz) / m
            db = np.sum(dz, axis=0, keepdims=True) / m
            self.dz_values.insert(0, dz)
            self.dw_values.insert(0, dw)
            self.db_values.insert(0, db)

    def update_weights(self):
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * self.dw_values[i]
            self.biases[i] -= self.learning_rate * self.db_values[i]

    def train(self, epochs=100, batch_size=None):
        if self.optimizer == "sgd":
            for epoch in range(epochs):
                for x, target in zip(self.X_train, self.y_train):
                    self.forward(x)
                    self.backpropagation(x.reshape(1, -1), target.reshape(1, -1))
                    self.update_weights()
                self.print_epoch_stats(epoch, epochs)
        
        elif self.optimizer == "bgd":
            for epoch in range(epochs):
                self.forward(self.X_train)
                self.backpropagation(self.X_train, self.y_train)
                self.update_weights()
                self.print_epoch_stats(epoch, epochs)
        
        elif self.optimizer == "mbgd":
            n_samples = self.X_train.shape[0]
            n_batches = n_samples // batch_size

            for epoch in range(epochs):
                for i in range(0, n_samples, batch_size):
                    end = i + batch_size if i + batch_size <= n_samples else n_samples
                    batch_X, batch_y = self.X_train[i:end], self.y_train[i:end]
                    self.forward(batch_X)
                    self.backpropagation(batch_X, batch_y)
                    self.update_weights()
                self.print_epoch_stats(epoch, epochs)

    def print_epoch_stats(self, epoch, epochs):
        train_predictions = self.forward(self.X_train)
        train_mse = self.mse(self.y_train, train_predictions)
        train_rmse = self.rmse(self.y_train, train_predictions)
        train_r2 = self.r2_score(self.y_train, train_predictions)
        
        val_predictions = self.forward(self.X_val)
        val_mse = self.mse(self.y_val, val_predictions)
        val_rmse = self.rmse(self.y_val, val_predictions)
        val_r2 = self.r2_score(self.y_val, val_predictions)

        # Log the metrics to wandb
        if self.log_to_wandb:
            metrics = {
                "train_mse": train_mse, 
                "train_rmse": train_rmse, 
                "train_r2": train_r2,
                "val_mse": val_mse, 
                "val_rmse": val_rmse, 
                "val_r2": val_r2,
                "epoch": epoch + 1
            }
            # wandb.log(metrics) # Uncomment if using wandb

        # print the metrics
        else:
            print(f"Epoch {epoch + 1}/{epochs} - Training MSE: {train_mse:.4f}, Training RMSE: {train_rmse:.4f}, Training R2: {train_r2:.4f}")

In [174]:
model = MultiLayerPerceptronRegressionModel(X_train, y_train, X_val, y_val, hidden_layers=[3, 3], activation_function="sigmoid", learning_rate=0.01, optimizer="sgd", log_to_wandb=False)
model.train(epochs=100)

Epoch 1/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 2/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 3/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 4/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 5/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 6/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 7/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 8/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 9/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 10/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 11/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
Epoch 12/100 - Training MSE: 554.3132, Training RMSE: 23.5439, Training R2: -5.2444
E

In [None]:
yo = model.predict(X_test)
yo

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])