In [4]:
# Step 1 - import pytorch
import torch
from torch import nn

In [2]:
# Step 2 - get device to run model on - this is the main difference from what Markus showed us
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using mps device


In [10]:
# Step 3 - define a simply neural network model
class OneLayerNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(8, 4)

    def forward(self, x):
        output = self.layer(x)
        return output
    
class TwoLayerNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(8, 4)
        self.layer2 = nn.Linear(4, 1)

    def forward(self, x):
        output = self.layer(x)
        output = self.layer2(output)
        return output

In [17]:
# Step 4 - instantiate mode, and create a suitable input tensor
input = torch.Tensor([1]*8)
neural_network = TwoLayerNN()

In [18]:
input

tensor([1., 1., 1., 1., 1., 1., 1., 1.])

In [19]:
input.shape

torch.Size([8])

In [20]:
# You can see the weights here! If you recreate the tensor, the weights will change! This is because initialization is somewhat random!
list(neural_network.named_parameters())

[('layer.weight',
  Parameter containing:
  tensor([[ 0.1537,  0.2885, -0.0564, -0.1863,  0.1707,  0.1340, -0.2404,  0.0764],
          [-0.1364, -0.1030,  0.2440, -0.3508, -0.2170, -0.3131,  0.1061,  0.1789],
          [ 0.0397,  0.2628, -0.2057, -0.2252, -0.2714, -0.0730,  0.0676, -0.1599],
          [-0.0700, -0.2917, -0.2232, -0.0656, -0.0975, -0.0730, -0.0706, -0.2313]],
         requires_grad=True)),
 ('layer.bias',
  Parameter containing:
  tensor([-0.1050,  0.1621,  0.0264,  0.1198], requires_grad=True)),
 ('layer2.weight',
  Parameter containing:
  tensor([[-0.2407,  0.1258,  0.4788,  0.2084]], requires_grad=True)),
 ('layer2.bias',
  Parameter containing:
  tensor([0.2990], requires_grad=True))]

In [21]:
# Step 5 - apply the network to your input tensor
# Note that this is running on your CPU!!!
neural_network(input)

tensor([-0.2785], grad_fn=<ViewBackward0>)

In [22]:
# Step 6 - try running it with apple mps acceleration
neural_network.to(device) # Send NN to mps
input = input.to(device) # Send input to mps
output = neural_network(input)

In [23]:
output.device # Note that the output is on the mps device too

device(type='mps', index=0)

In [11]:
output # We can print the output ... the display string will tell us the values, as well as what device it's on

tensor([-1.0177], device='mps:0', grad_fn=<LinearBackward0>)

In [12]:
# We can convert our tensors to a numpy array with the .numpy() method.
# However, to do so the tensor must first be on our CPU
# and 'detached' - which means that we've told pytorch it doesn't need
# to track the gradient for this tensor anymore... since it won't be used in backpropagation
# We do that with the .detach() method.
# To move it to the cpu, can use the .cpu() method, which is equivalent to .device('cpu')
output_numpy = output.cpu().detach().numpy()
output_numpy

array([-1.0177195], dtype=float32)

# Part 2 - Loading Data

In [1]:
import pandas as pd
from tensordict import TensorDict
from data_utils.embedding_utils import get_model_embeddings
from data_utils.feature_utils import create_features_from_raw_df

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
antibody_df = pd.read_csv("./data/GDPa1_v1.2_20250814.csv")
antibody_df = antibody_df.drop(["est_status_asof_feb2025", "hc_protein_sequence", "hc_dna_sequence", "highest_clinical_trial_asof_feb2025", "lc_protein_sequence", "lc_dna_sequence"], axis=1) # Drop features we are not actually given in the contest data
antibody_df.index = antibody_df["antibody_id"]
antibody_df = antibody_df.drop("antibody_id", axis=1)

In [4]:
antibody_df['embeddings'] = get_model_embeddings(antibody_df)

'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /ollieturnbull/p-IgGen/resolve/main/tokenizer_config.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x14970d940>: Failed to resolve \'huggingface.co\' ([Errno 8] nodename nor servname provided, or not known)"))'), '(Request ID: 00bb11c5-9dfc-4064-a7c6-59ba2c8e475b)')' thrown while requesting HEAD https://huggingface.co/ollieturnbull/p-IgGen/resolve/main/tokenizer_config.json
Retrying in 1s [Retry 1/5].
'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /ollieturnbull/p-IgGen/resolve/main/tokenizer_config.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x149657c50>: Failed to resolve \'huggingface.co\' ([Errno 8] nodename nor servname provided, or not known)"))'), '(Request ID: ccd082c7-ecd6-449d-af1c-f2dec90f9ece)')' thrown while requesting HEAD https://huggingf

In [6]:
antibody_df.to_pickle("data/antibody_df.pkl")

In [2]:
antibody_df = pd.read_pickle("data/antibody_df.pkl")

In [3]:
X = create_features_from_raw_df(antibody_df)