# Find top error contributors

Pre-requisite:

```bash
python3 -m venv ~/.venv/lance-test
source ~/.venv/lance-test/bin/activate
pip install jupyter

# then in the same directory as this notebook
jupyter notebook
```

## One-time setup

In [None]:
!pip install --quiet pylance \
                     duckdb \
                     duckdb-engine \
                     git+https://github.com/changhiskhan/ipython-sql.git
    
!pip install --quiet --extra-index-url https://download.pytorch.org/whl/cpu \
                     torch==1.12.1 \
                     torchvision==0.13.1

Get the DuckDB extension  -- does NOT work on mac

In [None]:
import platform

def extension_uri():
    version = '0.0.1'
    uname = platform.uname()
    arch = uname.machine  # arm64, x86_64
    system = uname.system.lower()
    uri_root = 'https://eto-public.s3.us-west-2.amazonaws.com/repo/lance_duckdb'
    uri = f'{uri_root}/{version}/lance.duckdb_extension.{system}.{arch}.zip'
    return uri


!curl {extension_uri()} --output lance.duckdb_extension.zip
!unzip -o lance.duckdb_extension.zip
!rm lance.duckdb_extension.zip

In [None]:
import torch
import duckdb
con = duckdb.connect(config={'allow_unsigned_extensions': True})
con.install_extension('lance.duckdb_extension', force_install=True)
con.load_extension('lance')

## Load pre-trained model and convert to torchscript

In [None]:
import torch
from torchvision.models import resnet18, ResNet18_Weights

resnet = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
m = torch.jit.script(resnet)
torch.jit.save(m, '/tmp/resnet18.pth')

## Load duckdb and register model

In [2]:
import torch

In [3]:
%load_ext sql
%sql duckdb:///:memory: --connection_arguments {"config":{"allow_unsigned_extensions":"true"},"preload_extensions":["lance"]}

The sql extension is already loaded. To reload it, use:
  %reload_ext sql
{'config': {'allow_unsigned_extensions': 'true'}, 'preload_extensions': ['lance']}


In [4]:
%%sql

DROP MACRO IF EXISTS get_pred_id;
CREATE MACRO get_pred_id(p) AS list_argmax(p);

DROP MACRO IF EXISTS get_score;
CREATE MACRO get_score(p) AS list_aggregate(p, 'max');

Took 0.0035495758056640625


In [5]:
%%sql

CALL create_pytorch_model('resnet', '/tmp/resnet18.pth');
SELECT * FROM ml_models();

Took 0.054344892501831055


Unnamed: 0,name,uri,type
0,resnet,/tmp/resnet18.pth,torchscript


## Load dataset

In [6]:
import lance
uri ="s3://eto-public/datasets/oxford_pet/oxford_pet.lance"
oxford_pet = lance.dataset(uri)

## Prepare the labels

In [7]:
import pandas as pd

labels_uri = ("https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels"
              "/master/imagenet-simple-labels.json")

labels = (pd.read_json(labels_uri)
          .reset_index()
          .rename(columns={0: "label", "index": "label_id"}))
labels['label'] = (labels.label
                   .str.lower()
                   .str.removesuffix(' cat')
                   .str.replace('st.', 'saint', regex=False)
                   .str.replace(' ', '_', regex=False)
                   .str.replace('german_shorthaired_pointer', 'german_shorthaired')
                   .str.replace('soft-coated_wheaten_terrier', 'wheaten_terrier')
                   .str.replace('pyrenean_mountain_dog', 'great_pyrenees'))

## Find top error contributors

In [20]:
%%sql --lance

SELECT external_image, class, object 
FROM oxford_pet 
WHERE external_image in ('https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/chihuahua_162.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Birman_125.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/american_pit_bull_terrier_134.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Birman_76.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/american_pit_bull_terrier_138.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/american_pit_bull_terrier_127.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Bombay_177.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/american_pit_bull_terrier_162.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Sphynx_222.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/american_pit_bull_terrier_183.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Ragdoll_29.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Birman_149.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/american_pit_bull_terrier_55.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Birman_32.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Birman_15.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/miniature_pinscher_126.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/miniature_pinscher_180.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Maine_Coon_1.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/english_cocker_spaniel_196.jpg', 'https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/images/Bombay_106.jpg')

Took 6.396015644073486


In [13]:
sql_result_set.df

Unnamed: 0,external_image,class,pred_label,score,object
0,https://eto-public.s3.us-west-2.amazonaws.com/...,chihuahua,dingo,0.935031,"{""bndbox"":[0,0,0,0]}"
1,https://eto-public.s3.us-west-2.amazonaws.com/...,Birman,siamese,0.779235,"{""bndbox"":[0,0,0,0]}"
2,https://eto-public.s3.us-west-2.amazonaws.com/...,american_pit_bull_terrier,american_staffordshire_terrier,0.778417,"{""bndbox"":[0,0,0,0]}"
3,https://eto-public.s3.us-west-2.amazonaws.com/...,Birman,siamese,0.751264,"{""bndbox"":[0,0,0,0]}"
4,https://eto-public.s3.us-west-2.amazonaws.com/...,american_pit_bull_terrier,american_staffordshire_terrier,0.750234,"{""bndbox"":[0,0,0,0]}"
5,https://eto-public.s3.us-west-2.amazonaws.com/...,american_pit_bull_terrier,american_staffordshire_terrier,0.727152,"{""bndbox"":[0,0,0,0]}"
6,https://eto-public.s3.us-west-2.amazonaws.com/...,Bombay,egyptian_mau,0.706516,"{""bndbox"":[0,0,0,0]}"
7,https://eto-public.s3.us-west-2.amazonaws.com/...,american_pit_bull_terrier,american_staffordshire_terrier,0.690617,"{""bndbox"":[0,0,0,0]}"
8,https://eto-public.s3.us-west-2.amazonaws.com/...,Sphynx,egyptian_mau,0.671884,"{""bndbox"":[0,0,0,0]}"
9,https://eto-public.s3.us-west-2.amazonaws.com/...,american_pit_bull_terrier,american_staffordshire_terrier,0.670559,"{""bndbox"":[0,0,0,0]}"
