In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch.nn import functional as F

In [2]:
df = pd.read_csv('housing.csv')
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2


In [3]:
df.isnull().values.any()

False

In [4]:
feature_columns = list(df.columns.values[:-1])
feature_columns

['CRIM',
 'ZN',
 'INDUS',
 'CHAS',
 'NOX',
 'RM',
 'AGE',
 'DIS',
 'RAD',
 'TAX',
 'PTRATIO',
 'LSTAT']

In [5]:
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,6.48,22.0


> Creating a `Dataset`

In [6]:
class Boston(Dataset):
    def __init__(self, path, transform=None):
        self.path = path
        self.transform = transform
        self.dataframe = pd.read_csv(self.path)
        if self.dataframe.isnull().values.any():
            self.dataframe = self.dataframe.dropna()
        
        self.features = self.dataframe.drop(["MEDV"], axis=1)
        self.y =  self.dataframe["MEDV"].values
        
        """
        Transform the dataframe features to have the same scale
        """
        self.column_transformer = ColumnTransformer([
            ('min_max_scaler', MinMaxScaler(), feature_columns )
        ]).fit( self.features)
        
        self.data = self.column_transformer.transform(self.features)
        self.len = self.y.shape[0]
        self.x = self.data
        
    def __len__(self):
        return self.len
    
    def __getitem__(self, index):
        samples = self.x[index], self.y[index]
        
        if self.transform:
            samples = self.transform(samples)
        return samples
        

> Creating a `` ToTensor Transformer``

In [7]:
class ToTensor:
    def __call__(self, samples):
        x, y = samples
        return torch.from_numpy(x.astype('float32')), torch.from_numpy(np.array(y, dtype='float32'))

In [8]:
boston = Boston('housing.csv', transform=ToTensor())

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
train, test = train_test_split(boston, test_size=.2, random_state=42)

> Data Loading using the `DataLoader`

In [11]:
train_set = DataLoader(train, batch_size=10, shuffle=True)
test_set = DataLoader(train, batch_size=10, shuffle=False)

> Creating a `FFNN` to predict the Boston housing dataset.

In [25]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=12, out_features = 64 )
        self.fc2 = nn.Linear(in_features=64 , out_features = 128 )
        self.fc3 = nn.Linear(in_features=128 , out_features = 16 )
        self.fc4 = nn.Linear(in_features=16 , out_features = 1 )
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        return x
net = Net()
net

Net(
  (fc1): Linear(in_features=12, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=1, bias=True)
)

In [26]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
criterion = nn.L1Loss()

In [27]:
EPOCHS = 500
for epoch in range(EPOCHS):
    for X, y in train_set:
        optimizer.zero_grad()
        # forward pass
        outputs = net(X)
        # loss function
        loss = criterion(y.unsqueeze_(1), outputs)
        #backward pass
        loss.backward()
        # update the weights
        optimizer.step()
    if (epoch + 1) % 100 == 0:
        print(f'{epoch+1}/{EPOCHS}: Loss: {loss.item():.3f}')
        

100/500: Loss: 8.803
200/500: Loss: 3.019
300/500: Loss: 2.731
400/500: Loss: 4.609
500/500: Loss: 2.250


In [28]:
for X, y in test_set:
    pass


In [29]:
net(X[:5]), y[:5]

(tensor([[20.3464],
         [25.2822],
         [14.7981],
         [20.9318]], grad_fn=<AddmmBackward>),
 tensor([21.1000, 24.5000, 13.4000, 18.6000]))

> Evaluating the `model-loss` on the `test_set`

In [30]:
with torch.no_grad():
    for X, y in test_set:
        outputs = net(X)
        # Loss
        loss = criterion(outputs, y.unsqueeze_(1))
    print(loss)

tensor(1.3164)


> This is one of my `beautiful` pytorch `Linear Regession` model that is performing the way i want.