# Evaluate Traditional Pipeline Performance (Iris)

In [1]:
import numpy as np
import pandas as pd

import time

# from timeit import default_timer as timer

import torch
import torch.nn as nn

import duckdb

### Config

In [2]:
times = {}

In [3]:
table_name = 'iris'
times['dataset'] = 'iris'
times['pipeline'] = 'traditonal'

ori_workload = False
drop_table = False

if not ori_workload:
    workload = 20000000
    table_name += "_" + str(workload / 1000000).replace(".", "_")
    select_stmt = f"SELECT * FROM iris_20_0 LIMIT {workload}"
else:
    select_stmt = f"SELECT * FROM iris"

table_name

'iris_20_0'

### Load iris data

In [4]:
st = time.perf_counter_ns()

con = duckdb.connect("../test.db")
iris = con.sql(select_stmt).df()
iris.to_csv('dummy.csv', index=False)
con.close()
iris = pd.read_csv('dummy.csv')

et = time.perf_counter_ns()
times['workload'] = iris.shape[0]
times["move"] = (et - st)/1000
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,-0.173674,1.709595,-1.169714,-1.183812
1,2.249683,1.709595,1.672157,1.317199
2,0.553333,-0.362176,1.046946,0.790671
3,0.553333,-1.743357,0.364896,0.13251
4,1.159173,-0.131979,0.990108,1.185567


### Load the compiled model

In [5]:
st = time.perf_counter_ns()

model_path = "../models/iris.pt"
model = torch.jit.load(model_path)

et = time.perf_counter_ns()
times["load"] = (et - st)/1000

### Predict

In [6]:
st = time.perf_counter_ns()

iris_data = iris.iloc[:, :4]

x = torch.FloatTensor(iris_data.values)

out = model(x)

et = time.perf_counter_ns()
times["predict"] = (et - st)/1000

In [7]:
times_df = pd.DataFrame.from_records([times]).loc[:, ['dataset', 'workload', 'pipeline', 'move', 'load', 'predict']]

con = duckdb.connect("../test.db")
tables = con.sql(f"SHOW TABLES").df()
has_table = tables[tables.name == 'times'].shape[0] == 1

if drop_table:
    con.sql(f"DROP TABLE IF EXISTS times")
    
if not has_table:
    con.sql(f"CREATE TABLE times (dataset varchar, workload integer, pipeline varchar, move integer, load integer, predict integer, ts timestamp DEFAULT current_timestamp)")

con.sql(f"INSERT INTO times (dataset, workload, pipeline, move, load, predict) SELECT * FROM times_df")
con.sql(f"SELECT * FROM times").show()
con.close()

┌─────────┬──────────┬──────────┬───────┬───────────┬─────────────────────────┬───────────────┐
│ dataset │ workload │   move   │ load  │  predict  │           ts            │   pipeline    │
│ varchar │  int32   │  int32   │ int32 │   int32   │        timestamp        │    varchar    │
├─────────┼──────────┼──────────┼───────┼───────────┼─────────────────────────┼───────────────┤
│ iris    │  1000000 │    66407 │ 11404 │    644862 │ 2024-04-16 15:39:06.832 │ duckdb_python │
│ iris    │  1000000 │    69000 │ 14205 │    711004 │ 2024-04-16 15:39:13.889 │ duckdb_python │
│ iris    │  1000000 │    87882 │ 15013 │    714544 │ 2024-04-16 15:39:37.287 │ duckdb_python │
│ iris    │  1000000 │    66269 │ 14332 │    696981 │ 2024-04-16 15:39:42.694 │ duckdb_python │
│ iris    │  1000000 │    75381 │ 14991 │    691012 │ 2024-04-16 15:39:54.801 │ duckdb_python │
│ iris    │  5000000 │   328913 │ 15558 │   5179009 │ 2024-04-16 15:40:13.76  │ duckdb_python │
│ iris    │  5000000 │   349613 │ 15064 