Mix.install(
[
{:nx, "~> 0.2"},
{:nimble_csv, "~> 1.2"},
{:kino, "~> 0.8"},
{:kino_vega_lite, "~> 0.1"},
{:exla, "~> 0.2"}
],
config: [
nx: [default_backend: EXLA.Backend]
]
)
defmodule Dataset do
def read_csv(url, headers \\ false) do
{:ok, _} = Application.ensure_all_started(:inets)
{:ok, _} = Application.ensure_all_started(:ssl)
[hs | data] =
url
|> :httpc.request()
|> then(fn
{:ok, {_, _, data}} ->
IO.iodata_to_binary(data)
end)
|> NimbleCSV.RFC4180.parse_string(skip_headers: false)
if headers do
data
|> Enum.map(fn x ->
Enum.zip(hs, x)
|> Enum.into(%{})
|> Map.take(headers)
end)
else
data
end
end
end
url = "https://www.openintro.org/stat/data/ames.csv"
dataset =
Dataset.read_csv(url, ["Gr.Liv.Area", "SalePrice", "Yr.Sold", "MS.SubClass", "Bedroom.AbvGr"])
|> Enum.filter(fn d -> d["Yr.Sold"] == "2010" end)
alias VegaLite, as: Vl
Vl.new(width: 500, height: 400)
|> Vl.mark(:point)
|> Vl.data_from_values(dataset)
|> Vl.encode_field(:y, "SalePrice", type: :quantitative)
|> Vl.encode_field(:x, "['Gr.Liv.Area']", type: :quantitative, scale: [zero: false])
graph TD;
A(Training set)-->B(Learning algorithm)
subgraph hypothesis
direction LR
D(x)-->h
h-->E(predicted y)
end
B-->hypothesis
When the target variable that we're predict is continuous, we call the learning problem a regression problem.
When y can take on only a small number of discrete values, we call it a classification problem.
If you've seen linear regression before, you may recognize this as the familiar least-squares cost function that gives rise to the ordinary least squares regression model.
Here, alpha is called the learning rate.
Looks at every example in the entire training set on every step.
Each time we encounter a training example, we update the parameters according to the gradient of the error with respect to that single training example only.
For a function
Thus, the gradient
Given a training set, define the design matrix
Also, let
The value of
Given
Choose
A fairly standard choice for the weights is
The parameter
Locally weightted linear regression is a non-parametric algorithm.
defmodule LinReg do
import Nx.Defn
defn predict({m, b}, x) do
m * x + b
end
defn loss(params, x, y) do
y_pred = predict(params, x)
Nx.mean(Nx.power(y - y_pred, 2))
end
defn update({m, b} = params, inp, tar) do
{grad_m, grad_b} = grad(params, &loss(&1, inp, tar))
{
m - grad_m * 0.000000001,
b - grad_b * 0.000000001
}
end
defn init_random_params(key) do
{m, key} = Nx.Random.normal(key, 0.0, 0.1)
{b, key} = Nx.Random.normal(key, 0.0, 0.1)
{{m, b}, key}
end
def train(epochs, data, key \\ Nx.Random.key(42)) do
{init_params, _key} = init_random_params(key)
1..epochs
Stream.resource(
fn -> {1, init_params} end,
fn {epoch, cur_params} ->
if epoch > epochs do
{:halt, {epoch, cur_params}}
else
params =
data
|> Enum.take(200)
|> Enum.reduce(
cur_params,
fn batch, cur_params ->
{inp, tar} = Enum.unzip(batch)
x = Nx.tensor(inp)
y = Nx.tensor(tar)
# IO.puts "epoch: #{epoch}, current loss: #{loss(cur_params, x, y) |> Nx.to_number() |> Kernel./(10000000)}"
update(cur_params, x, y)
end
)
{[params], {epoch + 1, params}}
end
end,
fn _ -> :ok end
)
end
end
data =
dataset
|> Enum.map(fn d ->
{d["Gr.Liv.Area"] |> String.to_integer(), d["SalePrice"] |> String.to_integer()}
end)
IO.inspect(length(data))
alias VegaLite, as: Vl
widget =
Vl.new(width: 500, height: 400)
|> Vl.datasets_from_values(examples: dataset, outputs: [])
|> Vl.layers([
Vl.new()
|> Vl.mark(:point)
|> Vl.data(name: "examples")
|> Vl.encode_field(:y, "SalePrice", type: :quantitative)
|> Vl.encode_field(:x, "['Gr.Liv.Area']", type: :quantitative, scale: [zero: false]),
Vl.new()
|> Vl.data(name: "outputs")
|> Vl.mark(:line)
|> Vl.encode_field(:y, "SalePrice", type: :quantitative)
|> Vl.encode_field(:x, "['Gr.Liv.Area']", type: :quantitative, scale: [zero: false])
])
|> Kino.VegaLite.new()
|> Kino.render()
LinReg.train(1000, [data])
|> Stream.take_every(10)
|> Stream.zip(Stream.interval(100))
|> Enum.each(fn {{m, b}, _} ->
widget
|> Kino.VegaLite.clear(dataset: "outputs")
widget
|> Kino.VegaLite.push_many(
Enum.map(data, fn {x, _} ->
y = Nx.multiply(m, x) |> Nx.add(b) |> Nx.to_number()
%{"SalePrice" => y, "Gr.Liv.Area" => x}
end),
dataset: "outputs"
)
end)
:ok