# Evaluate DuckDB Python Performance (Iris)

In [1]:
import numpy as np
import pandas as pd

import time

# from timeit import default_timer as timer

import torch
import torch.nn as nn

import duckdb

### Config

In [2]:
times = {}

In [3]:
table_name = 'iris'
times['dataset'] = 'iris'
times['pipeline'] = 'traditonal'

ori_workload = False
drop_table = False

if not ori_workload:
    workload = 1000000
    table_name += "_" + str(workload / 1000000).replace(".", "_")
    select_stmt = f"SELECT * FROM iris_20_0 LIMIT {workload}"
else:
    select_stmt = f"SELECT * FROM iris"

table_name

'iris_1_0'

### Load iris data

In [4]:
st = time.perf_counter_ns()

con = duckdb.connect("../test.db")
iris = con.sql(select_stmt).df()
con.close()

et = time.perf_counter_ns()
times['workload'] = iris.shape[0]
times["move"] = et - st
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,label
0,-0.173674,1.709595,-1.169714,-1.183812,0
1,2.249683,1.709595,1.672157,1.317199,2
2,0.553333,-0.362176,1.046945,0.790671,2
3,0.553333,-1.743357,0.364896,0.13251,1
4,1.159173,-0.131979,0.990108,1.185567,2


### Load the compiled model

In [6]:
st = time.perf_counter_ns()

model_path = "../models/iris.pt"
model = torch.jit.load(model_path)

et = time.perf_counter_ns()
times["load"] = et - st

### Predict

In [7]:
st = time.perf_counter_ns()

iris_data = iris.iloc[:, :4]
iris_label = iris.iloc[:, 4]

x = torch.FloatTensor(iris_data.values)

out = model(x)

et = time.perf_counter_ns()
times["predict"] = et - st

In [8]:
times_df = pd.DataFrame.from_records([times]).loc[:, ['dataset', 'workload', 'move', 'load', 'predict']]

con = duckdb.connect("../test.db")
tables = con.sql(f"SHOW TABLES").df()
has_table = tables[tables.name == 'times'].shape[0] == 1

if drop_table:
    con.sql(f"DROP TABLE IF EXISTS times")
    
if not has_table:
    con.sql(f"CREATE TABLE times (dataset varchar, workload integer, move integer, load integer, predict integer, ts timestamp DEFAULT current_timestamp)")

con.sql(f"INSERT INTO times (dataset, workload, move, load, predict) SELECT * FROM times_df")
con.sql(f"SELECT * FROM times").show()
con.close()

┌─────────┬──────────┬───────────┬──────────┬────────────┬─────────────────────────┐
│ dataset │ workload │   move    │   load   │  predict   │           ts            │
│ varchar │  int32   │   int32   │  int32   │   int32    │        timestamp        │
├─────────┼──────────┼───────────┼──────────┼────────────┼─────────────────────────┤
│ iris    │  1000000 │  99968156 │ 18879699 │ 1181731834 │ 2024-04-16 14:55:58.035 │
│ iris    │  1000000 │ 110142343 │ 17123567 │  825986872 │ 2024-04-16 14:56:16.202 │
│ iris    │  1000000 │  91453449 │ 20282890 │  785223391 │ 2024-04-16 14:56:35.162 │
│ iris    │  1000000 │  92200839 │  5309186 │ 1545610563 │ 2024-04-16 14:56:49.705 │
│ iris    │  1000000 │  92105644 │  5623429 │ 1015662888 │ 2024-04-16 14:56:59.238 │
│ iris    │  1000000 │  98426867 │ 16412810 │  762114296 │ 2024-04-16 14:57:17.432 │
└─────────┴──────────┴───────────┴──────────┴────────────┴─────────────────────────┘

