# "THE PRICE IS RIGHT" Capstone Project

This week - build a model that predicts how much something costs from a description, based on a scrape of Amazon data


A model that can estimate how much something costs, from its description.

# Order of play

DAY 1: Data Curation  
DAY 2: Data Pre-processing  
DAY 3: Evaluation, Baselines, Traditional ML  
DAY 4: Deep Learning and LLMs  
DAY 5: Fine-tuning a Frontier Model  

## DAY 4: Neural Networks and LLMs

Today we'll work from Traditional ML to Neural Networks to Large Language Models!!

In [1]:
import os
from dotenv import load_dotenv
from huggingface_hub import login
from pricer.evaluator import evaluate
from litellm import completion
from pricer.items import Item
import numpy as np
from tqdm.notebook import tqdm
import csv
from sklearn.feature_extraction.text import HashingVectorizer
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import CosineAnnealingLR

# GPU / CPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


In [2]:
LITE_MODE = False

load_dotenv(override=True)
hf_token = os.getenv("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
username = "ed-donner"
dataset = f"{username}/items_lite" if LITE_MODE else f"{username}/items_full"

train, val, test = Item.from_hub(dataset)

print(f"Loaded {len(train):,} training items, {len(val):,} validation items, {len(test):,} test items")

Loaded 800,000 training items, 10,000 validation items, 10,000 test items


# Before we look at the Artificial Neural Networks

## There is a different kind of Neural Network we could consider

In [4]:
# Write the test set to a CSV

with open('human_in.csv', 'w', encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    for t in test[:100]:
        writer.writerow([t.summary, 0])

In [5]:
# Read it back in

human_predictions = []
with open('human_out.csv', 'r', encoding="utf-8") as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        human_predictions.append(float(row[1]))

In [6]:
def human_pricer(item):
    idx = test.index(item)
    return human_predictions[idx]

In [7]:
human = human_pricer(test[0])
actual = test[0].price
print(f"Human predicted {human} for an item that actually costs {actual}")


Human predicted 120.0 for an item that actually costs 219.0


In [8]:
evaluate(human_pricer, test, size=100)

  0%|          | 0/100 [00:00<?, ?it/s]

[91m$99 [91m$184 [92m$12 [92m$15 [92m$18 [92m$10 [91m$119 [91m$135 [92m$6 [91m$270 [91m$643 [91m$329 [92m$15 [92m$26 [92m$24 [92m$18 [92m$29 [92m$25 [92m$25 [93m$53 [92m$35 [91m$126 [92m$25 [91m$127 [91m$273 [91m$398 [92m$55 [92m$6 [91m$101 [93m$51 [92m$30 [92m$5 [92m$35 [92m$9 [92m$10 [91m$419 [92m$25 [92m$11 [91m$186 [92m$33 [91m$161 [93m$51 [92m$23 [91m$155 [91m$150 [92m$4 [92m$31 [92m$18 [91m$115 [91m$82 [92m$25 [91m$111 [91m$410 [93m$75 [93m$67 [92m$34 [92m$8 [92m$10 [91m$122 [92m$28 [91m$116 [92m$17 [92m$19 [93m$60 [91m$599 [93m$60 [91m$160 [91m$355 [93m$75 [92m$34 [92m$17 [92m$2 [93m$70 [93m$76 [93m$41 [92m$9 [91m$226 [92m$5 [92m$5 [92m$4 [92m$0 [92m$7 [92m$5 [93m$74 [92m$7 [92m$10 [93m$68 [93m$74 [92m$5 [92m$3 [92m$17 [93m$45 [92m$5 [92m$16 [92m$0 [91m$153 [92m$2 [91m$122 [91m$150 [91m$355 

# And now - a vanilla Neural Network

During the remainder of this course we will get deeper into how Neural Networks work, and how to train a neural network.

This is just a sneak preview - let's make our own Neural Network, from scratch, using Pytorch.

Use this to get intuition; it's not important to know all about Neural networks at this point..

In [9]:
# Prepare our documents and prices

y = np.array([float(item.price) for item in train])
documents = [item.summary for item in train]

In [10]:
# Use the HashingVectorizer for a Bag of Words model
# Using binary=True with the CountVectorizer makes "one-hot vectors"

np.random.seed(42)
vectorizer = HashingVectorizer(n_features=5000, stop_words='english', binary=True)
X = vectorizer.fit_transform(documents)

In [11]:
# Define the neural network - here is Pytorch code to create a 8 layer neural network

class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 64)
        self.layer4 = nn.Linear(64, 64)
        self.layer5 = nn.Linear(64, 64)
        self.layer6 = nn.Linear(64, 64)
        self.layer7 = nn.Linear(64, 64)
        self.layer8 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        output1 = self.relu(self.layer1(x))
        output2 = self.relu(self.layer2(output1))
        output3 = self.relu(self.layer3(output2))
        output4 = self.relu(self.layer4(output3))
        output5 = self.relu(self.layer5(output4))
        output6 = self.relu(self.layer6(output5))
        output7 = self.relu(self.layer7(output6))
        output8 = self.layer8(output7)
        return output8

In [12]:
class SparseCSRDataset(Dataset):
    def __init__(self, X_csr, y):
        self.X = X_csr          # scipy.sparse.csr_matrix
        self.y = y              # 1D numpy array

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        # Extract one row -> small dense vector
        row = self.X[idx].toarray().astype("float32")   # (1, n_features)
        x = torch.from_numpy(row).squeeze(0)            # (n_features,)
        target = torch.tensor(self.y[idx], dtype=torch.float32)
        return x, target



In [13]:
# Split indices instead of densifying X
indices = np.arange(X.shape[0])
train_idx, val_idx = train_test_split(indices, test_size=0.01, random_state=42)

X_train_csr = X[train_idx]
y_train = y[train_idx]
X_val_csr = X[val_idx]
y_val = y[val_idx]

train_dataset = SparseCSRDataset(X_train_csr, y_train)
val_dataset   = SparseCSRDataset(X_val_csr,   y_val)

trainloader = DataLoader(train_dataset, batch_size=256, shuffle=True)
valloader   = DataLoader(val_dataset,   batch_size=256, shuffle=False)

# Initialize the model
inputsize = X.shape[1]
model = NeuralNetwork(inputsize).to(device)   # move model to GPU/CPU

In [14]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of trainable parameters: {trainable_params:,}")

Number of trainable parameters: 669,249


In [15]:
lossfunction = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
EPOCHS = 2

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    for batchX, batchy in tqdm(trainloader):
        batchX = batchX.to(device)
        batchy = batchy.to(device).unsqueeze(1)  # (batch, 1)

        optimizer.zero_grad()
        outputs = model(batchX)
        loss = lossfunction(outputs, batchy)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * batchX.size(0)

    train_loss = running_loss / len(train_dataset)

    # Validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for Xb, yb in valloader:
            Xb = Xb.to(device)
            yb = yb.to(device).unsqueeze(1)
            vout = model(Xb)
            valloss = lossfunction(vout, yb)
            val_losses.append(valloss.item())

    print(
        f"Epoch {epoch + 1}/{EPOCHS}, "
        f"Train Loss {train_loss:.3f}, "
        f"Val Loss {np.mean(val_losses):.3f}"
    )


  0%|          | 0/3094 [00:00<?, ?it/s]

Epoch 1/2, Train Loss 15190.022, Val Loss 13296.436


  0%|          | 0/3094 [00:00<?, ?it/s]

Epoch 2/2, Train Loss 11924.909, Val Loss 11556.320


In [16]:
def neuralnetwork(item):
    model.eval()
    with torch.no_grad():
        # vectorizer.transform expects an iterable of strings
        vec = vectorizer.transform([item.summary])   # 1 x 5000 sparse
        row = vec.toarray().astype("float32")        # small dense array
        x = torch.from_numpy(row).to(device)         # move to GPU/CPU
        result = model(x)[0, 0].item()
        return max(0, result)


In [17]:
evaluate(neuralnetwork, test)

  0%|          | 0/200 [00:00<?, ?it/s]

[91m$167 [93m$72 [92m$23 [91m$103 [93m$57 [91m$126 [93m$72 [92m$4 [93m$42 [93m$116 [93m$182 [91m$230 [93m$61 [91m$110 [92m$39 [92m$13 [92m$3 [92m$8 [92m$22 [93m$43 [92m$5 [93m$48 [93m$67 [92m$0 [91m$208 [93m$95 [93m$198 [93m$45 [91m$146 [93m$41 [91m$116 [91m$145 [93m$55 [92m$4 [93m$53 [91m$437 [92m$16 [93m$74 [91m$104 [91m$88 [91m$159 [92m$21 [92m$13 [92m$10 [92m$38 [93m$50 [93m$41 [92m$32 [93m$58 [92m$35 [92m$29 [93m$60 [93m$176 [92m$18 [93m$63 [91m$133 [92m$29 [91m$149 [92m$9 [93m$51 [93m$66 [92m$15 [92m$22 [92m$26 [91m$323 [93m$67 [93m$56 [91m$337 [92m$10 [91m$182 [92m$12 [92m$27 [93m$63 [91m$128 [92m$13 [92m$7 [91m$92 [92m$17 [92m$27 [93m$70 [93m$63 [93m$70 [93m$62 [93m$54 [92m$15 [91m$101 [91m$124 [93m$78 [92m$16 [91m$152 [92m$15 [93m$49 [92m$2 [92m$1 [92m$36 [92m$29 [92m$34 [93m$45 [92m$32 [93m$215 [92m$4 [92m$23 [92m$19 [92m$29 [92m$9 [92m$36 [93m$55 [91m$251 [92m$9 [92m

# And now - to the frontier!

Let's see how Frontier models do out of the box; no training, just inference based on their world knowledge.

Tomorrow we will do some training.

In [None]:
def messages_for(item):
    message = f"Estimate the price of this product. Respond with the price, no explanation\n\n{item.summary}"
    return [{"role": "user", "content": message}]

In [None]:
print(test[0].summary)

In [None]:
messages_for(test[0])

In [None]:
# The function for gpt-4.1-nano

def gpt_4__1_nano(item):
    response = completion(model="openai/gpt-4.1-nano", messages=messages_for(item), api_key=userdata.get('OPENAI_API_KEY'))
    return response.choices[0].message.content

In [None]:
gpt_4__1_nano(test[0])

In [None]:
test[0].price

In [None]:
evaluate(gpt_4__1_nano, test)

In [None]:
def claude_opus_4_5(item):
    response = completion(model="anthropic/claude-opus-4-5", messages=messages_for(item))
    return response.choices[0].message.content

In [None]:
evaluate(claude_opus_4_5, test)

In [None]:
def gemini_3_pro_preview(item):
    response = completion(model="gemini/gemini-3-pro-preview", messages=messages_for(item), reasoning_effort='low')
    return response.choices[0].message.content

In [None]:
evaluate(gemini_3_pro_preview, test, size=50, workers=2)

In [None]:
def gemini_2__5_flash_lite(item):
    response = completion(model="gemini/gemini-2.5-flash-lite", messages=messages_for(item))
    return response.choices[0].message.content

In [None]:
evaluate(gemini_2__5_flash_lite, test)

In [None]:

def grok_4__1_fast(item):
    response = completion(model="xai/grok-4-1-fast-non-reasoning", messages=messages_for(item), seed=42)
    return response.choices[0].message.content

In [None]:
evaluate(grok_4__1_fast, test)

In [None]:
# The function for gpt-5.1

def gpt_5__1(item):
    response = completion(model="gpt-5.1", messages=messages_for(item), reasoning_effort='high', seed=42)
    return response.choices[0].message.content


In [None]:
evaluate(gpt_5__1, test)