In [224]:
import pandas as pd
import numpy as np
import torch

In [225]:
from torch.utils.data import TensorDataset, DataLoader

In [239]:
df = pd.read_csv('housing.csv')

In [240]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20433 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
dtypes: float64(9), object(1)
memory usage: 1.6+ MB


In [241]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [242]:
df = pd.get_dummies(df, columns=['ocean_proximity'])

In [243]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,0,0,0,1,0
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,0,0,0,1,0
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,0,0,0,1,0
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,0,0,0,1,0
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,0,0,0,1,0


In [244]:
from sklearn.preprocessing import MinMaxScaler

In [245]:
scaler = MinMaxScaler()

In [246]:
names = df.columns
d = scaler.fit_transform(df)

scaled_df = pd.DataFrame(d, columns=names)
scaled_df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN
0,0.211155,0.567481,0.784314,0.022331,0.019863,0.008941,0.020556,0.539668,0.902266,0.0,0.0,0.0,1.0,0.0
1,0.212151,0.565356,0.392157,0.180503,0.171477,0.06721,0.186976,0.538027,0.708247,0.0,0.0,0.0,1.0,0.0
2,0.210159,0.564293,1.0,0.03726,0.02933,0.013818,0.028943,0.466028,0.695051,0.0,0.0,0.0,1.0,0.0
3,0.209163,0.564293,1.0,0.032352,0.036313,0.015555,0.035849,0.354699,0.672783,0.0,0.0,0.0,1.0,0.0
4,0.209163,0.564293,1.0,0.04133,0.043296,0.015752,0.042427,0.230776,0.674638,0.0,0.0,0.0,1.0,0.0


In [248]:
value = scaled_df['median_house_value']

In [249]:
value

0        0.902266
1        0.708247
2        0.695051
3        0.672783
4        0.674638
           ...   
20635    0.130105
20636    0.128043
20637    0.159383
20638    0.143713
20639    0.153403
Name: median_house_value, Length: 20640, dtype: float64

In [263]:
scaled_df.drop('median_house_value', axis=1, inplace=True)

### Linear regression

In [264]:
X_train = torch.tensor(scaled_df.values[::2], dtype=torch.float32)

In [265]:
X_test = torch.tensor(scaled_df.values[1::2], dtype=torch.float32)

In [266]:
y_train = torch.tensor(value.values[::2], dtype=torch.float32)

In [267]:
y_test = torch.tensor(value.values[1::2], dtype=torch.float32)

In [268]:
X_train

tensor([[0.2112, 0.5675, 0.7843,  ..., 0.0000, 1.0000, 0.0000],
        [0.2102, 0.5643, 1.0000,  ..., 0.0000, 1.0000, 0.0000],
        [0.2092, 0.5643, 1.0000,  ..., 0.0000, 1.0000, 0.0000],
        ...,
        [0.2779, 0.7152, 0.5294,  ..., 0.0000, 0.0000, 0.0000],
        [0.3127, 0.7386, 0.3333,  ..., 0.0000, 0.0000, 0.0000],
        [0.3018, 0.7322, 0.3333,  ..., 0.0000, 0.0000, 0.0000]])

In [269]:
batch_size = 10
dataset = TensorDataset(X_train, y_train)
# Randomly reading mini-batches
data_iter = DataLoader(dataset, batch_size, shuffle=True)

# Read a batch to see how it works
for X, y in data_iter:
    print(X, y)
    break

tensor([[0.7151, 0.0244, 0.6667, 0.0355, 0.0424, 0.0194, 0.0460, 0.2253, 0.0000,
         0.0000, 0.0000, 0.0000, 1.0000],
        [0.6145, 0.1658, 0.1373, 0.0283, 0.0413, 0.0121, 0.0385, 0.3050, 1.0000,
         0.0000, 0.0000, 0.0000, 0.0000],
        [0.6016, 0.1286, 0.6863, 0.0690, 0.0734, 0.0364, 0.0750, 0.2831, 0.0000,
         0.0000, 0.0000, 0.0000, 1.0000],
        [0.2281, 0.5792, 0.2941, 0.0621, 0.0726, 0.0201, 0.0778, 0.1589, 0.0000,
         0.0000, 0.0000, 1.0000, 0.0000],
        [0.1843, 0.5569, 0.9608, 0.0488, 0.0661, 0.0293, 0.0727, 0.2126, 0.0000,
         0.0000, 0.0000, 1.0000, 0.0000],
        [0.2321, 0.5048, 0.3333, 0.0648, 0.0483, 0.0279, 0.0525, 0.5720, 1.0000,
         0.0000, 0.0000, 0.0000, 0.0000],
        [0.7948, 0.1222, 0.1176, 0.2239, 0.2391, 0.0240, 0.0735, 0.5035, 0.0000,
         1.0000, 0.0000, 0.0000, 0.0000],
        [0.2012, 0.5792, 0.0588, 0.1017, 0.1673, 0.0445, 0.1457, 0.2352, 0.0000,
         0.0000, 0.0000, 1.0000, 0.0000],
        [0.6026,

In [270]:
inputs = X_train.shape[1]
model = torch.nn.Sequential(torch.nn.Linear(inputs, 1))

In [279]:
model

Sequential(
  (0): Linear(in_features=13, out_features=1, bias=True)
)

In [310]:
true_w = torch.randn(X_train.shape[1], dtype=torch.float32)
true_w

tensor([-1.3225,  1.3228, -0.3514, -0.7344,  0.1236,  0.3398,  0.8139,  0.6304,
        -1.0394,  0.4441,  0.5638, -0.2601,  2.9725])

In [311]:
true_b = torch.zeros((1))
true_b

tensor([0.])

In [312]:
model[0].weight.data = true_w.clone().detach().requires_grad_(True).reshape((1, X_train.shape[1]))
model[0].bias.data = torch.tensor([true_b], requires_grad = True)

In [313]:
loss = torch.nn.MSELoss(reduction='mean')

In [314]:
trainer = torch.optim.SGD(model.parameters(), lr=0.1) #stohastic gradient descent

In [316]:
num_epochs = 100
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        trainer.zero_grad()
        l = loss(model(X).reshape(-1), y)
        l.backward()
        trainer.step()
    l = loss(model(X_train).reshape(-1), y_train)
    if epoch % 5 == 0:
        print('epoch %d, loss: %f' % (epoch, l.item()),'|\tw', model[0].weight.data, '|\tb', model[0].bias.data)

epoch 5, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 10, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 15, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 20, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 25, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 30, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 35, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 40, loss: nan |	w tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]) |	b tensor([nan])
epoch 45, loss: nan |	w tensor([[nan, nan, nan, nan, nan,