In [None]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.15.0


In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [None]:
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.05335407, -0.92705965,  0.6985407 ,  0.3538999 ,  0.5755565 ,
         0.06876956,  0.20022757,  0.49607116, -0.20650741, -1.1358263 ]],
      dtype=float32)

In [None]:
tf.nn.softmax(predictions).numpy()

array([[0.08989463, 0.0337245 , 0.17136998, 0.12141132, 0.15153864,
        0.09129114, 0.10411663, 0.13995981, 0.069323  , 0.02737028]],
      dtype=float32)

In [None]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7c0c25d1d960>

In [None]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 0.0741 - accuracy: 0.9765 - 612ms/epoch - 2ms/step


[0.07413280755281448, 0.9764999747276306]

In [None]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [None]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[3.29016018e-07, 8.01659272e-09, 1.37528423e-06, 2.54933930e-05,
        2.25231014e-11, 2.42897300e-07, 5.41047459e-12, 9.99969363e-01,
        1.31417403e-06, 1.93169785e-06],
       [1.15346657e-08, 1.07555948e-04, 9.99869704e-01, 5.26885174e-07,
        2.95874665e-15, 1.25470660e-05, 2.05698075e-06, 1.35401496e-13,
        7.68678274e-06, 2.41848286e-14],
       [2.12372925e-07, 9.96412575e-01, 5.09858481e-04, 2.49463301e-05,
        1.58729570e-04, 2.49055847e-05, 7.98723795e-06, 1.56422961e-03,
        1.28351478e-03, 1.30450326e-05],
       [9.99996066e-01, 9.31951877e-11, 1.43603620e-06, 1.30603768e-08,
        1.40747387e-08, 1.30602601e-07, 1.90355854e-06, 3.26920741e-07,
        4.73396646e-08, 3.66034527e-08],
       [1.12512325e-05, 1.18703758e-08, 1.51559711e-04, 4.71773269e-08,
        9.84826922e-01, 1.24964160e-07, 2.39895030e-06, 1.49981090e-04,
        8.04624779e-06, 1.48496376e-02]], dtype=float32)>

In [None]:
import numpy as np
import math

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):

    y_pred = a + b * x + c * x ** 2 + d * x ** 3


    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)


    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()


    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')


import torch
import math


dtype = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):

    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1029.4050681994004
199 721.6458805660006
299 507.04126913720006
399 357.2797858834025
499 252.69195542484982
599 179.60007289032043
699 128.48473269305586
799 92.71507550991704
899 67.6685658338659
999 50.12021533616358
1099 37.81837622190615
1199 29.18984754967873
1299 23.134695733061918
1399 18.883374928811644
1499 15.897149476758129
1599 13.798640320023566
1699 12.323345665411768
1799 11.28577726459536
1899 10.555789471130979
1999 10.042022051239801
Result: y = 0.03584264479167293 + 0.8481436798239659 x + -0.006183452650747332 x^2 + -0.09210751743306841 x^3
99 340.9046630859375
199 233.55047607421875
299 161.0597686767578
399 112.06150817871094
499 78.9090805053711
599 56.454402923583984
699 41.229530334472656
799 30.89541244506836
899 23.873180389404297
999 19.096206665039062
1099 15.842941284179688
1199 13.62484359741211
1299 12.110806465148926
1399 11.076159477233887
1499 10.36828327178955
1599 9.883445739746094
1699 9.550971031188965
1799 9.322728157043457
1899 9.165855407714