In [14]:
import random
import re
from pathlib import Path
from sklearn.model_selection import train_test_split

## Data Science
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## torch
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader

In [15]:
def get_dataloader(features, targets, bs=128):
  X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size = 0.2, random_state=42)

  #DataSets
  train_ds = TabDataSet(X_train, y_train)
  test_ds = TabDataSet(X_test, y_test)

  #DataLoader
  train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
  test_dl = DataLoader(test_ds, batch_size=bs, shuffle=True)

  return train_dl, test_dl

def first(x):
  "First element of `x`, or None if missing"
  try: return next(iter(x))
  except StopIteration: return None
  
def plot_function(f, tx=None, ty=None, title=None, min=-2, max=2, figsize=(6,4)):
  x = torch.linspace(min,max)
  fig,ax = plt.subplots(figsize=figsize)
  ax.plot(x,f(x))
  if tx is not None: ax.set_xlabel(tx)
  if ty is not None: ax.set_ylabel(ty)
  if title is not None: ax.set_title(title)

def get_model(n_in, n_out):
  return nn.Sequential(nn.Linear(n_in, 128), 
                       nn.ReLU(),
                       nn.Linear(128, 64),
                       nn.ReLU(),
                       nn.Linear(64, 32),
                       nn.ReLU(),
                       nn.Linear(32,n_out))

In [16]:
class TabDataSet(Dataset):
  def __init__(self, features, targets):
    self.x = features
    self.y  = targets
  def __len__(self): return len(self.x)
  def __getitem__(self, idx): return self.x[idx], self.y[idx]


def train_model(model, epochs, metric):
  for i  in range(epochs):
    for xb, yb in train_dl:
      preds = model(xb.float())
      loss = loss_func(preds, yb.float())
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      
    print(validate_epoch(model, metric), end=" ")

def validate_epoch(model, metric):
  accs = [metric(model(xb.float()), yb) for xb,yb in test_dl]
  return round(torch.stack(accs).mean().item(), 4)

In [17]:
from sklearn.datasets import fetch_california_housing

In [18]:
housing = fetch_california_housing()
features, targets =  housing.data, housing.target
features.shape, targets.shape

((20640, 8), (20640,))

In [19]:
from sklearn.preprocessing import StandardScaler

In [20]:
scalar = StandardScaler()
features = scalar.fit_transform(features)

In [21]:
train_dl, test_dl = get_dataloader(features, targets)

In [22]:
xb, yb = first(train_dl)
xb.shape, yb.shape

(torch.Size([128, 8]), torch.Size([128]))

In [23]:
model = get_model(8,1)
model

Sequential(
  (0): Linear(in_features=8, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): ReLU()
  (6): Linear(in_features=32, out_features=1, bias=True)
)

In [24]:
def loss_func(inputs, targets):
  loss = nn.MSELoss()
  return loss(inputs.squeeze(), targets)

In [25]:
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [26]:
train_model(model, 9, metric=loss_func)

0.41 0.3696 0.3382 0.3256 0.3203 0.312 0.3445 0.3009 0.3014 