In [1]:
import numpy as np
import math 

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for i in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  loss = np.square(y_pred - y).sum()

  grad_y_pred = 2.0 * (y_pred - y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x ** 2).sum()
  grad_d = (grad_y_pred * x ** 3).sum()
  
  if i % 100 == 99:
    print(loss)
    
  a -= learning_rate * grad_a
  b -= learning_rate * grad_b
  c -= learning_rate * grad_c
  d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

2080.5616763076455
1393.5525470410078
934.943341517506
628.6271477140692
423.90954487469514
287.0072425256562
195.3959151554812
134.05034892148686
92.942416357208
65.37541550828848
46.87472838917273
34.44866196732138
26.095711525658583
20.4759307888735
16.69163849725867
14.141004581071398
12.42023902692171
11.258210158476759
10.472712711890384
9.94119750412294
Result: y = 0.022374646125949478 + 0.8314440858527752 x + -0.003859998772444895 x^2 + -0.08973214561201026 x^3


In [2]:
import torch
import math

dtype = torch.float
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("mps") if torch.backends.mps.is_available() else torch.device('cpu')
print (f"running {device}")


x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for i in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  loss = (y_pred - y).pow(2).sum().item()

  grad_y_pred = 2.0 * (y_pred - y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x ** 2).sum()
  grad_d = (grad_y_pred * x ** 3).sum()
  
  if i % 100 == 99:
    print(loss)
    
  a -= learning_rate * grad_a
  b -= learning_rate * grad_b
  c -= learning_rate * grad_c
  d -= learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

running MPS
257.56402587890625
173.6973876953125
118.12605285644531
81.29692077636719
56.8856201171875
40.7022705078125
29.971858978271484
22.855714797973633
18.135570526123047
15.003992080688477
12.925908088684082
11.546592712402344
10.630847930908203
10.022743225097656
9.618809700012207
9.350395202636719
9.17198371887207
9.053364753723145
8.974461555480957
8.921964645385742
Result: y = -0.0036701629869639874 + 0.8473776578903198 x + 0.0006331619806587696 x^2 + -0.09199855476617813 x^3


In [77]:
import torch

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("mps") if torch.backends.mps.is_available() else torch.device('cpu')
torch.set_default_device(device)
# TypeError: Cannot convert a MPS Tensor to float64 dtype as the MPS framework doesn't support float64. Please use float32 instead.
dtype = torch.float32 if device.type == 'mps' else torch.float
torch.set_default_dtype(dtype)
print (f"running {device} with {dtype} dtype")

running mps with torch.float32 dtype


In [18]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

powers = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(powers)

model = torch.nn.Sequential(
  torch.nn.Linear(3, 1),
  torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for i in range(2000):

  y_pred = model(xx)

  loss = loss_fn(y_pred, y)
  
  if i % 100 == 99:
    print(loss)
    
  model.zero_grad()

  loss.backward() # TODO why loss does backprop, and not model

  with torch.no_grad():
    for param in model.parameters():
      param -= learning_rate * param.grad

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

tensor(207.6785, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(140.8889, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(96.5562, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(67.1224, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(47.5752, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(34.5899, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(25.9614, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(20.2260, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(16.4123, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(13.8756, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(12.1876, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(11.0640, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(10.3157, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(9.8171, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(9.4848, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(9.2632, device='mps:0', grad_fn=<MseLossBackward0>)
tensor(9.1153, device='mps:0', grad_fn=<M

In [13]:
import glob 
import os
import torch
import numpy as np
import PIL.Image as Image
from torchvision import transforms

images = []
labels = []

images_dir_path = f"{os.path.dirname(os.getcwd())}/dataset/training_data/images"
image_paths = glob.glob(os.path.join(images_dir_path, "*"))
image_paths = list(np.random.choice(image_paths, size=10))
print(image_paths)

transform = transforms.Compose([
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Resize((500, 500))
])

for image_path in image_paths:
    img = Image.open(image_path)
    img = img.convert('L')               # open image as PIL Image
    img_tensor = transform(img)                                         # convert PIL Image to np array
    images.append(img_tensor)
    skew_angle_str = os.path.basename(image_path).split('_')[0]
    skew_angle = torch.tensor(float(skew_angle_str) + 30).to(torch.long)
    labels.append(skew_angle)

['/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/-19.931_89368010.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/-19.809_81749056_9057.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/-21.575_80310840a.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/9.013_71601299.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/24.399_0011973451.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/-21.575_80310840a.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/2.213_91914407.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/16.963_0011976929.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/-13.466_00838511_00838525.png', '/Users/mp/projects/ml/innora-document-deskew/dataset/training_data/images/16.963_0011976929.png']




In [14]:
( len(images), len(labels), labels[0], images[0].size(), images[0][0][100])

(10,
 10,
 tensor(10),
 torch.Size([1, 500, 500]),
 tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0

In [15]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, img_list, labels):
        super(MyDataset, self).__init__()
        self.img_list = img_list
        self.labels = labels

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        sample = {"data": self.img_list[idx], "target": self.labels[idx] }
        return sample

In [18]:
import torch

model = torch.nn.Sequential(
    torch.nn.Conv2d(
        1,  # if B&W else 3,
        12,  # number of kernels - we need less as we only detect vertical/horizontal/diagonal lines
        3,  # kernel size
        1,  # 1 pixel at a time
        1  # padding - kernel size / 2 - to apply kernel on borders
    ),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),
    torch.nn.Conv2d(12, 24, kernel_size=3, stride=1, padding=1),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),
    torch.nn.Flatten(),  # Flatten the tensor
    # Adjust the input size based on your image size
    torch.nn.Linear(24 * 125 * 125, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(
        128,  # number of kernels
        30 + 1 + 30,  # [-30, 30] degrees
        # bias=True # TODO try without
    ),
)
criterion = torch.nn.CrossEntropyLoss(
    reduction='sum')  # TODO why cross entropy?
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)

epochs = 200

train_loader = torch.utils.data.DataLoader(MyDataset(images, labels))

for idx, sample in enumerate(train_loader):
    x, y = sample['data'], sample['target']
    print(idx, x.size(), y.size())
# for x,y in zip(images, labels):
    # x, y = data.to(device), target
    # x = torch.tensor(images) # images             ->    Width x Height x Amount ( B&W or dim for rgb? )
    # y = torch.tensor(labels) # skew angle labels  ->    Amount
    # for i in range(epochs):
    print(next(model.parameters()).device)
    hypothesis = model(x)
    print(y.item())
    print(hypothesis.size(), y.size())
    loss = criterion(hypothesis, y)
    print(f"loss {loss} hypothesis: {hypothesis.argmax(dim=1)} y: {y}")
    optimizer.zero_grad()  # TODO why
    loss.backward()
    optimizer.step()


0 torch.Size([1, 1, 500, 500]) torch.Size([1])
cpu
10
torch.Size([1, 61]) torch.Size([1])
loss 4.184998989105225 hypothesis: tensor([11]) y: tensor([10])
1 torch.Size([1, 1, 500, 500]) torch.Size([1])
cpu
10
torch.Size([1, 61]) torch.Size([1])
loss 4.187053680419922 hypothesis: tensor([11]) y: tensor([10])
2 torch.Size([1, 1, 500, 500]) torch.Size([1])
cpu
8
torch.Size([1, 61]) torch.Size([1])
loss 4.173307418823242 hypothesis: tensor([11]) y: tensor([8])
3 torch.Size([1, 1, 500, 500]) torch.Size([1])
cpu
39
torch.Size([1, 61]) torch.Size([1])
loss 4.284471035003662 hypothesis: tensor([30]) y: tensor([39])
4 torch.Size([1, 1, 500, 500]) torch.Size([1])
cpu
54
torch.Size([1, 61]) torch.Size([1])
loss 4.042973041534424 hypothesis: tensor([30]) y: tensor([54])
5 torch.Size([1, 1, 500, 500]) torch.Size([1])
cpu
8
torch.Size([1, 61]) torch.Size([1])
loss 4.173227787017822 hypothesis: tensor([11]) y: tensor([8])
6 torch.Size([1, 1, 500, 500]) torch.Size([1])
cpu
32
torch.Size([1, 61]) torch.