In [1]:
import pandas as pd
import torch
import torch.nn.functional as F
import torch.nn as nn
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import time
from sklearn.preprocessing import PolynomialFeatures


torch.cuda.is_available()

True

In [2]:
Data =pd.read_csv("housing.csv")
X = Data.drop(['median_house_value'] , axis=1)
Y = Data['median_house_value']
Y = Y.values.reshape(-1,1)



In [3]:
Place=['INLAND','<1H OCEAN','NEAR OCEAN','NEAR BAY','ISLAND']

encoder = OrdinalEncoder(
    categories=[Place],
    handle_unknown='use_encoded_value',
    unknown_value=-1
)
cat_cols = ['ocean_proximity']
new_cols = ['Place']
encoder.fit(X[cat_cols])

X[new_cols] = encoder.transform(X[cat_cols])

X = X.select_dtypes(include=['number'])



X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=42)

median_value_train = X_train.median(numeric_only = True)
X_train = X_train.fillna(median_value_train)

median_value_test =  X_test.median(numeric_only = True)
X_test = X_test.fillna(median_value_test)


poly = PolynomialFeatures(degree = 2,include_bias = False)
X_poly = poly.fit_transform(X_train)
poly_features = pd.DataFrame(X_poly, columns = poly.get_feature_names_out(X_train.columns))
X_train = poly_features
poly_test = PolynomialFeatures(degree = 2,include_bias = False)
X_poly_2 = poly_test.fit_transform(X_test)
X_test = X_poly_2

X_test

array([[-119.01      ,   36.06      ,   25.        , ...,    2.82643344,
           0.        ,    0.        ],
       [-119.46      ,   35.14      ,   30.        , ...,    6.40747969,
           0.        ,    0.        ],
       [-122.44      ,   37.8       ,   52.        , ...,   12.11109601,
          10.4403    ,    9.        ],
       ...,
       [-119.18      ,   34.21      ,   46.        , ...,    9.529569  ,
           6.174     ,    4.        ],
       [-117.41      ,   34.23      ,   17.        , ...,   37.73153476,
           0.        ,    0.        ],
       [-117.9       ,   34.13      ,   37.        , ...,    9.98370409,
           3.1597    ,    1.        ]], shape=(3096, 54))

In [4]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
y_scaler = StandardScaler()
y_scaler.fit(Y_train)
Y_train = y_scaler.transform(Y_train)
Y_test = y_scaler.transform(Y_test)



In [5]:
X_train_tensor = torch.tensor(X_train,dtype = torch.float32)
X_test_tensor = torch.tensor(X_test,dtype = torch.float32)
Y_train_tensor = torch.tensor(Y_train,dtype = torch.float32).view(-1,1)
Y_test_tensor = torch.tensor(Y_test,dtype = torch.float32).view(-1,1)
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)
train_loader = DataLoader(train_dataset,batch_size=64,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=64)

In [6]:
NUM_EPOCHS =200
RANDOM_SEED = 42
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [7]:
def weight_inti(m):
    if isinstance(m,nn.Linear):
        nn.init.kaiming_normal_(m.weight,mode = 'fan_in',nonlinearity = 'relu')
        if m.bias is not None:
            nn.init.constant_(m.bias,0)

    elif isinstance(m,nn.Conv2d):
        nn.init.xavier_uniform_(m.weight)


In [8]:
class Model(torch.nn.Module):
    def __init__(self,num_features):
        super().__init__()
        self.my_net = torch.nn.Sequential(
            torch.nn.Linear(in_features=num_features, out_features=40),
            torch.nn.BatchNorm1d(num_features=40),
            torch.nn.ELU(alpha=1.0),
            torch.nn.Linear(in_features=40, out_features=30),
            torch.nn.BatchNorm1d(num_features=30),
            torch.nn.ELU(alpha=1.0),
            torch.nn.Linear(in_features=30, out_features=20),
            torch.nn.BatchNorm1d(num_features=20),
            torch.nn.ELU(alpha=1.0),
            torch.nn.Linear(in_features=20, out_features=10),
            torch.nn.BatchNorm1d(num_features=10),
            torch.nn.ELU(alpha=1.0),
            torch.nn.Linear(in_features=10, out_features=1),


        )
    def forward(self, x):
        out = self.my_net(x)
        return out


torch.manual_seed(RANDOM_SEED)
model = Model(num_features=54)
model.apply(weight_inti)
model = model.to(DEVICE)


optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

def compute_loss(net,dataloader):
    curr_loss = 0
    with torch.no_grad():
        for cnt , (fet, output) in enumerate(dataloader):
            fet = fet.view((-1,54)).to(DEVICE)
            output = output.view((-1,1)).to(DEVICE)
            out = net(fet)
            loss_c = F.mse_loss(out,output)
            curr_loss += loss_c
        return float(curr_loss)/len(dataloader)

start_time = time.time()
minibatch_cost = []
epoch_cost = []

for epoch in range(NUM_EPOCHS):
    model.train()
    for batch_idx , (features , targets) in enumerate(train_loader):
        features = features.view(-1,54).to(DEVICE)
        targets = targets.to(DEVICE)
        pred = model.forward(features)
        cost = F.mse_loss(pred,targets.float().view(-1,1))
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        minibatch_cost.append(cost.item())
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
                   %(epoch+1, NUM_EPOCHS, batch_idx,
                     len(train_loader), cost.item()))
    cost = compute_loss(model, train_loader)
    epoch_cost.append(cost)
    print('Epoch: %03d/%03d Train Cost: %.4f' % (
            epoch+1, NUM_EPOCHS, cost))
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))










Epoch: 001/200 | Batch 000/275 | Cost: 1.9796
Epoch: 001/200 | Batch 050/275 | Cost: 0.4823
Epoch: 001/200 | Batch 100/275 | Cost: 0.3983
Epoch: 001/200 | Batch 150/275 | Cost: 0.2451
Epoch: 001/200 | Batch 200/275 | Cost: 0.2341
Epoch: 001/200 | Batch 250/275 | Cost: 0.3453
Epoch: 001/200 Train Cost: 0.3087
Time elapsed: 0.05 min
Epoch: 002/200 | Batch 000/275 | Cost: 0.3603
Epoch: 002/200 | Batch 050/275 | Cost: 0.2752
Epoch: 002/200 | Batch 100/275 | Cost: 0.3878
Epoch: 002/200 | Batch 150/275 | Cost: 0.2208
Epoch: 002/200 | Batch 200/275 | Cost: 0.2768
Epoch: 002/200 | Batch 250/275 | Cost: 0.2243
Epoch: 002/200 Train Cost: 0.2856
Time elapsed: 0.08 min
Epoch: 003/200 | Batch 000/275 | Cost: 0.2567
Epoch: 003/200 | Batch 050/275 | Cost: 0.2589
Epoch: 003/200 | Batch 100/275 | Cost: 0.2360
Epoch: 003/200 | Batch 150/275 | Cost: 0.3891
Epoch: 003/200 | Batch 200/275 | Cost: 0.2703
Epoch: 003/200 | Batch 250/275 | Cost: 0.3893
Epoch: 003/200 Train Cost: 0.2795
Time elapsed: 0.11 min
E

In [9]:
FILE_NAME = "my_model.pth"


torch.save(model.state_dict(), FILE_NAME)

In [10]:
model.eval()

running_test_loss = 0.0

with torch.no_grad():
    for features, targets in test_loader:
        features = features.view(-1,54).to(DEVICE)
        targets = targets.to(DEVICE)


        pred = model(features)

        loss = F.mse_loss(pred, targets.float().view(-1, 1))


        running_test_loss += loss.item()

avg_test_cost = running_test_loss / len(test_loader)

print(f"Training Cost: ~0.18")
print(f"Testing Cost:  {avg_test_cost:.4f}")




Training Cost: ~0.18
Testing Cost:  0.2036
