In [1]:
import math
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [3]:
# convert the input number to a list of 64 bits.
# this is the input to the model

def int_to_64bit_list(number):
    binary_str = format(number, '064b')
    return [int(bit) for bit in binary_str]

ints = [x for x in range(1, 1_000_000)]

input_data = [int_to_64bit_list(x) for x in ints]

In [4]:
# this is just a helper class to load the data into the model
class Data(Dataset):
    def __init__(self, inputs):
        self.inputs = inputs

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return torch.tensor(self.inputs[idx], dtype=torch.float32)

In [5]:
# this is the machine learning model
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.encoder = nn.Sequential(
          nn.Linear(64, 64 * 5), # 64 bits in
          nn.ReLU(),
          nn.Linear(64 * 5, 64),
          nn.ReLU(),
          nn.Linear(64, 32) # 1 value out
        )

        self.decoder = nn.Sequential(
          nn.Linear(32, 64), # 1 value in
          nn.ReLU(),
          nn.Linear(64, 64 * 2),
          nn.ReLU(),
          nn.Linear(64 * 2, 64) # 64 bits out
        )

    # this is the forward pass. it takes the input and returns the output
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [6]:
# create the dataset and the loader
training_dataset = Data(input_data)
training_loader = DataLoader(training_dataset, batch_size=512, shuffle=True)

# create model and send it to the device, the mac gpu.
model = Model().to(device)

# mean squared error loss and the optimizer
loss = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # learning rate and momentum are hyperparameters that can be tuned

# train the model for 50 iterations
for epoch in range(100):

    # iterate over the data
    for data in training_loader:
        inputs = data
        inputs = inputs.to(device)
        # zero the gradients
        optimizer.zero_grad()

        # run the data through the model, we need to send it to the device as well
        out = model(inputs)
        loss_size = loss(out, inputs)
        loss_size.backward() # backpropagation
        optimizer.step() # update the weights

    print(f"Loss: {loss_size}")
    
    # save the model
torch.save(model.state_dict(), f"model")

Loss: 0.07574784010648727
Loss: 0.07287474721670151
Loss: 0.06256505101919174
Loss: 0.05387353152036667
Loss: 0.04096760228276253
Loss: 0.03476806730031967
Loss: 0.026425348594784737
Loss: 0.02417917363345623
Loss: 0.018669014796614647
Loss: 0.011933606117963791
Loss: 0.01207087654620409
Loss: 0.007353839930146933
Loss: 0.004649989772588015
Loss: 0.00028651399770751595
Loss: 0.00010962077794829383
Loss: 0.0001315423724008724
Loss: 0.00011187815835000947
Loss: 0.0001054332751664333
Loss: 7.193471537902951e-05
Loss: 6.71574889565818e-05
Loss: 7.950556755531579e-05
Loss: 6.971681432332844e-05
Loss: 8.06220414233394e-05
Loss: 6.880093860672787e-05
Loss: 5.533561488846317e-05
Loss: 5.074084401712753e-05
Loss: 5.747174145653844e-05
Loss: 5.5280732340179384e-05
Loss: 4.350199014879763e-05
Loss: 5.807401248603128e-05
Loss: 3.904541881638579e-05
Loss: 3.193641168763861e-05
Loss: 4.546070340438746e-05
Loss: 3.37510064127855e-05
Loss: 4.05072933062911e-05
Loss: 3.4653199691092595e-05
Loss: 3.3094

In [7]:
checkpoint = torch.load(f"model", weights_only=False)
model = Model().to(device)
model.load_state_dict(checkpoint)

val = torch.tensor(int_to_64bit_list(100), dtype=torch.float32).to(device)

output = model.encoder(val)

res = model.decoder(output)

# convert values to binary output. 0.5 was a starting point.
binary_tensor = (res >= 0.5).int().tolist()


bit_string = ''.join(map(str, binary_tensor))
number = int(bit_string, 2)

assert number == 100