In [3]:
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import time
from sklearn.preprocessing import PolynomialFeatures
import reverse_geocoder as rg

In [11]:
Data =pd.read_csv("housing.csv")
X = Data.drop(['median_house_value'] , axis=1)
Y = Data['median_house_value']
Y = Y.values.reshape(-1,1)

X['city'] = rg.search(list(zip(X['latitude'],X['longitude'])), mode=1)[0]['name']

X =X.drop(['latitude','longitude','city'], axis=1)
X


Unnamed: 0,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
0,41.0,880.0,129.0,322.0,126.0,8.3252,NEAR BAY
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,NEAR BAY
2,52.0,1467.0,190.0,496.0,177.0,7.2574,NEAR BAY
3,52.0,1274.0,235.0,558.0,219.0,5.6431,NEAR BAY
4,52.0,1627.0,280.0,565.0,259.0,3.8462,NEAR BAY
...,...,...,...,...,...,...,...
20635,25.0,1665.0,374.0,845.0,330.0,1.5603,INLAND
20636,18.0,697.0,150.0,356.0,114.0,2.5568,INLAND
20637,17.0,2254.0,485.0,1007.0,433.0,1.7000,INLAND
20638,18.0,1860.0,409.0,741.0,349.0,1.8672,INLAND


In [12]:
Place=['INLAND','ISLAND','NEAR BAY','<1H OCEAN','NEAR OCEAN']

encoder = OrdinalEncoder(
    categories=[Place],
    handle_unknown='use_encoded_value',
    unknown_value=-1
)
cat_cols = ['ocean_proximity']
new_cols = ['Place']
encoder.fit(X[cat_cols])

X[new_cols] = encoder.transform(X[cat_cols])

X = X.select_dtypes(include=['number'])



X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=42)

median_value_train = X_train.median(numeric_only = True)
X_train = X_train.fillna(median_value_train)

median_value_test =  X_test.median(numeric_only = True)
X_test = X_test.fillna(median_value_test)


poly = PolynomialFeatures(degree = 2,include_bias = False)
X_poly = poly.fit_transform(X_train)
poly_features = pd.DataFrame(X_poly, columns = poly.get_feature_names_out(X_train.columns))
X_train = poly_features
poly_test = PolynomialFeatures(degree = 2,include_bias = False)
X_poly_2 = poly_test.fit_transform(X_test)
X_test = X_poly_2

X_test

array([[2.50000000e+01, 1.50500000e+03, 4.27000000e+02, ...,
        2.82643344e+00, 0.00000000e+00, 0.00000000e+00],
       [3.00000000e+01, 2.94300000e+03, 4.27000000e+02, ...,
        6.40747969e+00, 0.00000000e+00, 0.00000000e+00],
       [5.20000000e+01, 3.83000000e+03, 4.27000000e+02, ...,
        1.21110960e+01, 6.96020000e+00, 4.00000000e+00],
       ...,
       [4.60000000e+01, 2.06200000e+03, 4.84000000e+02, ...,
        9.52956900e+00, 1.23480000e+01, 1.60000000e+01],
       [1.70000000e+01, 8.89000000e+02, 1.31000000e+02, ...,
        3.77315348e+01, 0.00000000e+00, 0.00000000e+00],
       [3.70000000e+01, 1.80100000e+03, 4.22000000e+02, ...,
        9.98370409e+00, 9.47910000e+00, 9.00000000e+00]], shape=(3096, 35))

In [15]:


scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
y_scaler = StandardScaler()
y_scaler.fit(Y_train)
Y_train = y_scaler.transform(Y_train)

array([[-1.3152842 , -0.96722769, -0.50206321, ..., -0.78705162,
        -0.62495091,  0.47674295],
       [ 0.27101546, -0.07830509,  0.03333907, ..., -0.18136745,
         0.36439793,  0.47674295],
       [ 1.06416529, -0.99941836, -1.06375758, ..., -0.39181599,
        -1.07745284, -1.213329  ],
       ...,
       [ 0.58827539, -0.24707622,  0.07397228, ..., -0.45832614,
         0.0279598 ,  0.47674295],
       [-1.07733925,  0.43076749,  0.14089757, ...,  0.64686396,
         1.07701689,  0.47674295],
       [ 1.85731512,  0.7301408 ,  1.85705311, ..., -0.54901696,
        -0.4306448 , -0.46218591]], shape=(17544, 35))

In [19]:
X_train_tensor = torch.tensor(X_train,dtype = torch.float32)
X_test_tensor = torch.tensor(X_test,dtype = torch.float32)
Y_train_tensor = torch.tensor(Y_train,dtype = torch.float32).view(-1,1)
Y_test_tensor = torch.tensor(Y_test,dtype = torch.float32).view(-1,1)
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)
train_loader = DataLoader(train_dataset,batch_size=64,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=64)

In [20]:
NUM_EPOCHS =500
RANDOM_SEED = 42
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [22]:
class Model(torch.nn.Module):
    def __init__(self,num_features):
        super().__init__()
        self.my_net = torch.nn.Sequential(
            torch.nn.Linear(in_features=num_features, out_features=30),
            torch.nn.BatchNorm1d(num_features=30),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features=30, out_features=20),
            torch.nn.BatchNorm1d(num_features=20),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features=20, out_features=10),
            torch.nn.BatchNorm1d(num_features=10),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features=10, out_features=1),


        )
    def forward(self, x):
        out = self.my_net(x)
        return out


torch.manual_seed(RANDOM_SEED)
model = Model(num_features=35)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

def compute_loss(net,dataloader):
    curr_loss = 0
    with torch.no_grad():
        for cnt , (fet, output) in enumerate(dataloader):
            fet = fet.view((-1, 35)).to(DEVICE)
            output = output.view((-1,1)).to(DEVICE)
            out = net(fet)
            loss = F.mse_loss(out,output)
            curr_loss += loss
        return float(curr_loss)/len(dataloader)

start_time = time.time()
minibatch_cost = []
epoch_cost = []

for epoch in range(NUM_EPOCHS):
    model.train()
    for batch_idx , (features , targets) in enumerate(train_loader):
        features = features.view(-1,35).to(DEVICE)
        targets = targets.to(DEVICE)
        pred = model.forward(features)
        cost = F.mse_loss(pred,targets.float().view(-1,1))
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        minibatch_cost.append(cost.item())
        if not batch_idx % 64:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
                   %(epoch+1, NUM_EPOCHS, batch_idx,
                     len(train_loader), cost.item()))
    cost = compute_loss(model, train_loader)
    epoch_cost.append(cost)
    print('Epoch: %03d/%03d Train Cost: %.4f' % (
            epoch+1, NUM_EPOCHS, cost))
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))











Epoch: 001/500 | Batch 000/275 | Cost: 1.1495
Epoch: 001/500 | Batch 064/275 | Cost: 0.2907
Epoch: 001/500 | Batch 128/275 | Cost: 0.4975
Epoch: 001/500 | Batch 192/275 | Cost: 0.2954
Epoch: 001/500 | Batch 256/275 | Cost: 0.2202
Epoch: 001/500 Train Cost: 0.3164
Time elapsed: 0.02 min
Epoch: 002/500 | Batch 000/275 | Cost: 0.2702
Epoch: 002/500 | Batch 064/275 | Cost: 0.3011
Epoch: 002/500 | Batch 128/275 | Cost: 0.2470
Epoch: 002/500 | Batch 192/275 | Cost: 0.2660
Epoch: 002/500 | Batch 256/275 | Cost: 0.3385
Epoch: 002/500 Train Cost: 0.2962
Time elapsed: 0.04 min
Epoch: 003/500 | Batch 000/275 | Cost: 0.3153
Epoch: 003/500 | Batch 064/275 | Cost: 0.2786
Epoch: 003/500 | Batch 128/275 | Cost: 0.3510
Epoch: 003/500 | Batch 192/275 | Cost: 0.2429
Epoch: 003/500 | Batch 256/275 | Cost: 0.2589
Epoch: 003/500 Train Cost: 0.2934
Time elapsed: 0.06 min
Epoch: 004/500 | Batch 000/275 | Cost: 0.3727
Epoch: 004/500 | Batch 064/275 | Cost: 0.4883
Epoch: 004/500 | Batch 128/275 | Cost: 0.2371
E

KeyboardInterrupt: 