## Training Loop

In [1]:
import lightning as L
import torch
from torch import nn, optim
import torch.nn.functional as F
import math
import pandas as pd
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

class LitIndexer(L.LightningModule):
    def __init__(self, mapper, indexer, name):
        super().__init__()
        self.name = name
        self.mapper = mapper
        self.indexer = indexer
        self.min_loss = float("inf")
        self.epoch_losses = []
        self.test_losses = []
        self.results_df = pd.DataFrame(columns=["Name", "Test Loss", "Percent Narrowed"])
        
        self.early_stop_callback = EarlyStopping(
            monitor='train_loss',
            min_delta=0.001,
            patience=3,
            verbose=True,
            mode='min'
        )
        
    def forward(self, strs):
        mapped_strs = self.mapper.forward(strs)
        pred_idxs = self.indexer(mapped_strs)
        return pred_idxs
        
    def loss(self, pred_idxs, real_idxs):
        return F.mse_loss(pred_idxs, real_idxs)
    
    def percent_narrowed(self, loss):
        return math.sqrt(loss) / len(self.mapper.data)

    def training_step(self, batch, batch_idx):
        strs, idxs = batch
        mapped_strs = self.mapper.forward(strs)
        pred_idxs = self.indexer(mapped_strs)
        loss = self.loss(pred_idxs, idxs.to(torch.float32))
        self.min_loss = min(self.min_loss, loss)
        self.epoch_losses.append(loss)
        self.log("train_loss", loss, prog_bar=True)
        
        return loss
    
    def on_train_epoch_end(self):
        avg_epoch_loss = sum(self.epoch_losses) / len(self.epoch_losses)
        # print(f"Epoch {self.current_epoch}, Average Epoch Loss: {avg_epoch_loss:.4f}, Percent Narrowed: {self.percent_narrowed(avg_epoch_loss):.4f}")
        self.epoch_losses = []
    
    def test_step(self, batch, batch_idx):
        strs, idxs = batch
        mapped_strs = self.mapper.forward(strs)
        pred_idxs = self.indexer(mapped_strs)
        loss = self.loss(pred_idxs, idxs.to(torch.float32))
        self.test_losses.append(loss)
        self.log("test_loss", loss, prog_bar=True)
        
    def on_test_epoch_end(self):
        avg_test_loss = sum(self.test_losses) / len(self.test_losses)
        print(f"Average Test Loss: {avg_test_loss:.4f}, Percent Narrowed: {self.percent_narrowed(avg_test_loss):.4f}")
        self.test_losses = []
        self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name], 
                                                             "Test Loss": [avg_test_loss.item()], 
                                                             "Percent Narrowed": [self.percent_narrowed(avg_test_loss)]})], ignore_index=True)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-2)
        return optimizer


## Experiment Runner

In [2]:
from models.BaselineHash import BaselineHash
from models.BaselineEmbed import BaselineEmbed
from models.LinearModel import LinearRegressionModel
from models.TwoLinearModel import TwoLayerLinearRegressionModel
from IndexDataset import IndexDataset
from torch.utils.data import DataLoader
import mmh3

experiments = {
            # IMDB
            "linear-hash_imdb": {"dataset_name": "data/imdb/imdb_top_1000.csv", 
                               "column_name": "Overview",
                               "mapper": "BaselineHash", 
                               "indexer": "2LinearModel"},
            "linear-embed_imdb": {"dataset_name": "data/imdb/imdb_top_1000.csv", 
                               "column_name": "Overview",
                               "mapper": "BaselineEmbed", 
                               "indexer": "2LinearModel"},
            # LEGO
            "linear-hash_lego": {"dataset_name": "data/lego/inventory_parts.csv", 
                               "column_name": "part_num",
                               "mapper": "BaselineHash", 
                               "indexer": "2LinearModel"},
            "linear-embed_lego": {"dataset_name": "data/lego/inventory_parts.csv", 
                               "column_name": "part_num",
                               "mapper": "BaselineEmbed", 
                               "indexer": "2LinearModel"},
            
            # RANDOM 100
            "linear-hash_rand-100": {"dataset_name": "data/random/random_100.csv", 
                               "column_name": "RandomString",
                               "mapper": "BaselineHash", 
                               "indexer": "2LinearModel"},
            "linear-embed_rand-100": {"dataset_name": "data/random/random_100.csv", 
                               "column_name": "RandomString",
                               "mapper": "BaselineEmbed", 
                               "indexer": "2LinearModel"},
            # RANDOM 1000000
            "linear-hash_rand-1000000": {"dataset_name": "data/random/random_1000000.csv", 
                               "column_name": "RandomString",
                               "mapper": "BaselineHash", 
                               "indexer": "2LinearModel"},
            "linear-embed_rand-1000000": {"dataset_name": "data/random/random_1000000.csv", 
                               "column_name": "RandomString",
                               "mapper": "BaselineEmbed", 
                               "indexer": "2LinearModel"},
            }

In [3]:
from pytorch_lightning import loggers as pl_loggers

batch_size = 128
token_len = 1
embed_size = 1
hash = mmh3.hash    # 32-bits
num_minutes = 1
max_epochs = 100

results_df = pd.DataFrame(columns=["Name", "Test Loss", "Percent Narrowed"])

for name, experiment_dict in experiments.items():
    dataset_name, column_name, mapper_type, indexer_type = experiment_dict["dataset_name"], experiment_dict["column_name"], \
                                                            experiment_dict["mapper"], experiment_dict["indexer"]
    
    print("#################")
    print(f"{name}: {dataset_name}, {column_name}")
    
    df = pd.read_csv(dataset_name)
    dataset = IndexDataset(df, column_name)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    mapper, indexer = None, None
    vector_size = None
    if mapper_type == "BaselineHash":
        mapper = BaselineHash(dataset, hash)
        vector_size = 1
    elif mapper_type == "BaselineEmbed":
        mapper = BaselineEmbed(dataset, token_len, embed_size)
        vector_size = mapper.max_len * embed_size
    
    if indexer_type == "LinearModel":
        indexer = LinearRegressionModel(vector_size)
    elif indexer_type == "2LinearModel":
        indexer = TwoLayerLinearRegressionModel(vector_size)
    
    model = LitIndexer(mapper, indexer, name)
    
    tb_logger = pl_loggers.TensorBoardLogger('lightning_logs/')
    trainer = L.Trainer(accelerator="cpu", logger=tb_logger,
                        max_epochs=max_epochs,
                        max_time={"minutes": num_minutes}
                        )
    
    trainer.fit(model, train_dataloaders=dataloader)
    
    torch.save(model.state_dict(), f"models/{name}.pth")
    
    trainer.test(model, dataloaders=dataloader)
    
    results_df = pd.concat([results_df, model.results_df], ignore_index=True)

results_df

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


#################
linear-hash_imdb: data/imdb/imdb_top_1000.csv, Overview


/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | indexer | TwoLayerLinearRegressionModel | 4     
----------------------------------------------------------
4         Trainable params
0         Non-trainable params
4         Total params
0.000     Total estimated model params size (MB)
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: Th

Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
`Trainer.fit` stopped: `max_epochs=100` reached.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 57749.5820, Percent Narrowed: 0.3755


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


  results_df = pd.concat([results_df, model.results_df], ignore_index=True)
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | mapper  | BaselineEmbed                 | 86    
1 | indexer | TwoLayerLinearRegressionModel | 98.6 K
----------------------------------------------------------
98.7 K    Trainable params
0         Non-trainable params
98.7 K    Total params
0.395     Total estimated model params size (MB)


#################
linear-embed_imdb: data/imdb/imdb_top_1000.csv, Overview


/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (8) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
  return F.mse_loss(pred_idxs, real_idxs)
`Trainer.fit` stopped: `max_epochs=100` reached.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 85016.3281, Percent Narrowed: 0.2916


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


#################
linear-hash_lego: data/lego/inventory_parts.csv, part_num


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | indexer | TwoLayerLinearRegressionModel | 4     
----------------------------------------------------------
4         Trainable params
0         Non-trainable params
4         Total params
0.000     Total estimated model params size (MB)
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `

Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
  return F.mse_loss(pred_idxs, real_idxs)
`Trainer.fit` stopped: `max_epochs=100` reached.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 28253060.0000, Percent Narrowed: 0.3628


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


#################
linear-embed_lego: data/lego/inventory_parts.csv, part_num


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | mapper  | BaselineEmbed                 | 40    
1 | indexer | TwoLayerLinearRegressionModel | 256   
----------------------------------------------------------
296       Trainable params
0         Non-trainable params
296       Total params
0.001     Total estimated model params size (MB)
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of th

Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
  return F.mse_loss(pred_idxs, real_idxs)
Time limit reached. Elapsed time is 0:01:00. Signaling Trainer to stop.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 44629680.0000, Percent Narrowed: 0.2888


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | indexer | TwoLayerLinearRegressionModel | 4     
----------------------------------------------------------
4         Trainable params
0         Non-trainable params
4         Total params
0.000     Total estimated model params size (MB)


#################
linear-hash_rand-100: data/random/random_100.csv, RandomString


/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
`Trainer.fit` stopped: `max_epochs=100` reached.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 1220.5646, Percent Narrowed: 0.5545


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | mapper  | BaselineEmbed                 | 62    
1 | indexer | TwoLayerLinearRegressionModel | 81    
----------------------------------------------------------
143       Trainable params
0         Non-trainable params
143       Total params
0.001     Total estimated model params size (MB)


#################
linear-embed_rand-100: data/random/random_100.csv, RandomString


/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
`Trainer.fit` stopped: `max_epochs=100` reached.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 1017.6185, Percent Narrowed: 0.3190


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


#################
linear-hash_rand-1000000: data/random/random_1000000.csv, RandomString


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | indexer | TwoLayerLinearRegressionModel | 4     
----------------------------------------------------------
4         Trainable params
0         Non-trainable params
4         Total params
0.000     Total estimated model params size (MB)
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `

Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
  return F.mse_loss(pred_idxs, real_idxs)
Time limit reached. Elapsed time is 0:01:00. Signaling Trainer to stop.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 132999380992.0000, Percent Narrowed: 0.5771


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


#################
linear-embed_rand-1000000: data/random/random_1000000.csv, RandomString


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                          | Params
----------------------------------------------------------
0 | mapper  | BaselineEmbed                 | 62    
1 | indexer | TwoLayerLinearRegressionModel | 81    
----------------------------------------------------------
143       Trainable params
0         Non-trainable params
143       Total params
0.001     Total estimated model params size (MB)
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of th

Training: |          | 0/? [00:00<?, ?it/s]

  return F.mse_loss(pred_idxs, real_idxs)
  return F.mse_loss(pred_idxs, real_idxs)
Time limit reached. Elapsed time is 0:01:00. Signaling Trainer to stop.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/alexanderkumar/miniconda3/envs/graphs/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Average Test Loss: 83333668864.0000, Percent Narrowed: 0.2887


  self.results_df = pd.concat([self.results_df, pd.DataFrame({"Name": [self.name],


Unnamed: 0,Name,Test Loss,Percent Narrowed
0,linear-hash_imdb,57749.58,0.375487
1,linear-embed_imdb,85016.33,0.291576
2,linear-hash_lego,28253060.0,0.362848
3,linear-embed_lego,44629680.0,0.288813
4,linear-hash_rand-100,1220.565,0.554549
5,linear-embed_rand-100,1017.619,0.319001
6,linear-hash_rand-1000000,132999400000.0,0.577104
7,linear-embed_rand-1000000,83333670000.0,0.288676
