# **BOSTON HOUSING REGRESSOR WITH MACHINE LEARNING AND DEEP  LEARNING**

## Imports

In [None]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, StandardScaler, OrdinalEncoder, OneHotEncoder

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

## Load data and separate training and testing data

In [None]:
# Load the dataset

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

In [None]:
X, y = data, target

# Get target names

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# View data
pd.concat([pd.DataFrame(data),
           pd.DataFrame(target, columns = ['target'])
], axis = 1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


In [None]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# **Models**

# SkitLearn

In [None]:
# Import
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Define model
ml_model = LinearRegression()

# Fit Model
ml_model.fit(X_train, y_train)

# Making predictions
predictions = ml_model.predict(X_test)

# Evaluate
result = ml_model.score(X_test, y_test)

# Score do modelo nos dados de teste
print("Accuracy: %.3f%%" % (result * 100.0))

Accuracy: 66.876%


In [None]:
# Make predictions on new data

# X new
X_new = [[0.05,	0.0,	6.0,	0.0,	0.5,	6.5,	79.0,	5.0,	2.0,	245.0,	16.0,	398,	9.0]]

# Scale the features
X_new = scaler.transform(X_new)

# Making predictions
predictions = ml_model.predict(X_new)
print(predictions.round(2))

[26.61]


# PyTorch

## Pytorch with sequential api

In [None]:
import torch
from torch import nn
import torch.optim as optim

In [None]:
# Define the model structure
in_size = 13
hidden_size = 10
out_size = 1

torch_model = nn.Sequential(
    nn.Linear(in_features = in_size, out_features = hidden_size),                 # input layer
    nn.ReLU(),                                                                    # activation function
    nn.Linear(in_features = hidden_size, out_features = hidden_size),                       # hidden layer 1
    nn.ReLU(),                                                                    # activation function
    nn.Linear(in_features = hidden_size, out_features = out_size),                          # output layer 1
)

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Move the model and data to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch_model.to(device)

# Convert the data into tensors and move to GPU
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

In [None]:
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(torch_model.parameters(), lr=0.01)

In [None]:
# Train the model MSELoss
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    y_pred = torch_model(X_train)
    loss = criterion(y_pred, y_train.unsqueeze(1))

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss at every 10th epoch
    if (epoch+1) % 50 == 0:
      print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [50/1000], Loss: 59.9122
Epoch [100/1000], Loss: 44.9689
Epoch [150/1000], Loss: 27.5195
Epoch [200/1000], Loss: 22.0882
Epoch [250/1000], Loss: 19.5492
Epoch [300/1000], Loss: 17.2953
Epoch [350/1000], Loss: 16.7385
Epoch [400/1000], Loss: 15.6952
Epoch [450/1000], Loss: 15.0785
Epoch [500/1000], Loss: 14.6039
Epoch [550/1000], Loss: 14.0998
Epoch [600/1000], Loss: 13.6107
Epoch [650/1000], Loss: 13.2650
Epoch [700/1000], Loss: 12.9859
Epoch [750/1000], Loss: 12.8180
Epoch [800/1000], Loss: 12.5316
Epoch [850/1000], Loss: 12.3258
Epoch [900/1000], Loss: 11.8432
Epoch [950/1000], Loss: 11.7736
Epoch [1000/1000], Loss: 11.5252


In [None]:
# Evaluate the model on the test set
with torch.no_grad():
    y_pred = torch_model(X_test)
    r2 = r2_score(y_test.cpu().numpy(), y_pred.cpu().numpy())
    print(f'R2 Score: {r2}')

R2 Score: 0.7679966379814345


In [None]:
# Make predictions on test data
with torch.no_grad():
    y_pred = torch_model(X_test[0:1])
    print('Predictions:', y_pred)


Predictions: tensor([[26.1439]])


In [None]:
# Make predictions on new data

# X new
X_new = [[0.05,	0.0,	6.0,	0.0,	0.5,	6.5,	79.0,	5.0,	2.0,	245.0,	16.0,	398,	9.0]]

# Scale the features
X_new = scaler.transform(X_new)

# Convert X_new to tensor
X_new = torch.tensor(X_new, dtype=torch.float32).to(device)

# Make prediction
with torch.no_grad():
    y_pred = torch_model(X_new)
    print('Predict:',y_pred)

Predict: tensor([[4.9666]])


## Pytorch with function api

In [None]:
# Define the class and foward function
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.linear(out)
        return out

In [None]:
# Define the model
class_torch_model = Net(in_size, hidden_size, out_size).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(class_torch_model.parameters(), lr=0.01)

In [None]:
# Train the model 
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    y_pred = class_torch_model(X_train)
    loss = criterion(y_pred, y_train.unsqueeze(1))

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss at every 10th epoch
    if (epoch+1) % 50 == 0:
      print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [50/1000], Loss: 63.3447
Epoch [100/1000], Loss: 45.8292
Epoch [150/1000], Loss: 35.9124
Epoch [200/1000], Loss: 33.0836
Epoch [250/1000], Loss: 31.0184
Epoch [300/1000], Loss: 30.5891
Epoch [350/1000], Loss: 27.7788
Epoch [400/1000], Loss: 26.5907
Epoch [450/1000], Loss: 25.4121
Epoch [500/1000], Loss: 26.4609
Epoch [550/1000], Loss: 23.7150
Epoch [600/1000], Loss: 33.2594
Epoch [650/1000], Loss: 22.7000
Epoch [700/1000], Loss: 21.8334
Epoch [750/1000], Loss: 21.3002
Epoch [800/1000], Loss: 20.0048
Epoch [850/1000], Loss: 21.5540
Epoch [900/1000], Loss: 18.5886
Epoch [950/1000], Loss: 17.6898
Epoch [1000/1000], Loss: 17.7110


In [None]:
# Evaluate the model on the test set
with torch.no_grad():
    y_pred = class_torch_model(X_test)
    r2 = r2_score(y_test.cpu().numpy(), y_pred.cpu().numpy())
    print(f'R2 Score: {r2}')

R2 Score: 0.6558180011177634


In [None]:
# Make predictions on new data

# X new
X_new = [[0.05,	0.0,	6.0,	0.0,	0.5,	6.5,	79.0,	5.0,	2.0,	245.0,	16.0,	398,	9.0]]

# Scale the features
X_new = scaler.transform(X_new)

# Convert X_new to tensor
X_new = torch.tensor(X_new, dtype=torch.float32).to(device)

# Make prediction
with torch.no_grad():
    y_pred = class_torch_model(X_new)
    print(f"Predicted: {y_pred}")

Predicted: tensor([[3.2784]])


# TensorFlow

## Tensorflow with sequential api

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [None]:
# Define the model structure
in_size = X_train.shape[1]
hidden_size = 10
out_size = 1

# Create TensorFlow model
tf_model = Sequential([
     Dense(hidden_size, input_shape=(in_size,), activation='relu'),   # Input layer
     Dense(hidden_size, activation='relu'),                           # Hidden layer
     Dense(out_size)                            # Output layer
])

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Compile the model
tf_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = tf_model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_split=0.2, verbose=0)

In [None]:
# Evaluate the model on the test set
loss, mae = tf_model.evaluate(X_test, y_test)
print(f'Test loss: {loss:.4f}, Test MAE: {mae:.4f}')

# Calculate the R2 score
y_pred = tf_model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f'R2 score: {r2:.4f}')

Test loss: 19.2497, Test MAE: 2.9647
R2 score: 0.7375


In [None]:
# Make predictions on the test set
y_pred = tf_model.predict(X_test[:1])
print('Prediction: ', y_pred)

Prediction:  [[24.935966]]


In [None]:
# Make predictions on new data

# X new
X_new = [[0.05,	0.0,	6.0,	0.0,	0.5,	6.5,	79.0,	5.0,	2.0,	245.0,	16.0,	398,	9.0]]

# Scale the features
X_new = scaler.transform(X_new)

# Make predictions
predictions_tf = tf_model.predict(X_new)
print('Predict:', predictions_tf)

Predict: [[1.4522153]]


## Tensorflow with function api

In [None]:
# Define the model structure 
class Net(tf.keras.Model):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu')
        self.fc2 = tf.keras.layers.Dense(hidden_size, activation='relu')
        self.fc3 = tf.keras.layers.Dense(output_size, activation=None)

    def call(self, x):
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [None]:
# Define the model
tf_class_model = Net(in_size, hidden_size, out_size)

# Define the loss function and optimizer
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

# Compile the model
tf_class_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
history = tf_class_model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_split=0.2, verbose=0)

In [None]:
# Evaluate the model on the test set
loss, mae = tf_class_model.evaluate(X_test, y_test)
print(f'Test loss: {loss:.4f}, Test MAE: {mae:.4f}')

# Calculate the R2 score
y_pred = tf_class_model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f'R2 score: {r2:.4f}')

Test loss: 23.0569, Test MAE: 3.3071
R2 score: 0.6856


In [None]:
# Make predictions on the test set
y_pred = tf_class_model.predict(X_test[:1])
print('Prediction: ', y_pred)

Prediction:  [[27.910627]]


In [None]:
# Make predictions on new data

# X new
X_new = [[0.05,	0.0,	6.0,	0.0,	0.5,	6.5,	79.0,	5.0,	2.0,	245.0,	16.0,	398,	9.0]]

# Scale the features
X_new = scaler.transform(X_new)

# Make predictions
predictions_tf = tf_class_model.predict(X_new)
print('Predict:', predictions_tf)

Predict: [[1.6262757]]
