## datasette: Explore and Publish


## ibis: Many many backends


### Polars - calmcode


In [2]:
import ibis

ibis.options.interactive = True

ModuleNotFoundError: No module named 'ibis'

In [None]:
con_polars = ibis.polars.connect()
tbl_polars = con_polars.read_csv("../data/birthdays.csv")

In [None]:
tbl_polars.head()

In [None]:
def set_types(dataf):
    return dataf.mutate(dataf.date.to_date("%Y-%m-%d").name("date"))


def counter(dataf, *args):
    return (
        dataf.group_by(args)
        .agg(dataf.births.sum().name("sum"), dataf.births.mean().name("mean"))
        .order_by(args)
    )


counter(tbl_polars, "date")

### Duckdb - ibis framework


- create a duckdb file from csv `duckdb data/birthdays.ddb "CREATE TABLE birthdays AS SELECT * FROM read_csv('data/birthdays.csv');"`
- run the above from terminal
- make sure duckdb is already installed on the machine. can use homebrew


In [None]:
import ibis
import pandas as pd

ibis.options.interactive = True

In [None]:
df = pd.DataFrame(
    [["a", 1, 2], ["b", 3, 4]],
    columns=["one", "two", "three"],
    index=[5, 6],
)

df

Unnamed: 0,one,two,three
5,a,1,2
6,b,3,4


In [None]:
# t = ibis.memtable(df)
# t

## neo4j: Great Graph Database


- code in Neo4j
- created `calmcode` database in neo4j

## pandas pipe: Clean Pandas Code


In [32]:
from functools import wraps
import datetime as dt


def log_step(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        tic = dt.datetime.now()
        result = func(*args, **kwargs)
        time_taken = str(dt.datetime.now() - tic)
        print(f"just ran step {func.__name__} shape={result.shape} took {time_taken}s")
        return result

    return wrapper

In [35]:
import pandas as pd

df = pd.read_csv("../data/bigmac.csv")


@log_step
def start_pipeline(dataf):
    return dataf.copy()


df.pipe(start_pipeline)

just ran step start_pipeline shape=(1330, 6) took 0:00:00.000059s


Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price
0,2000-04-01,ARS,Argentina,2.50,1.000,2.500000
1,2000-04-01,AUD,Australia,2.59,1.680,1.541667
2,2000-04-01,BRL,Brazil,2.95,1.790,1.648045
3,2000-04-01,CAD,Canada,2.85,1.470,1.938776
4,2000-04-01,CHF,Switzerland,5.90,1.700,3.470588
...,...,...,...,...,...,...
1325,2020-01-14,UAH,Ukraine,57.00,23.990,2.375990
1326,2020-01-14,UYU,Uruguay,179.00,37.435,4.781621
1327,2020-01-14,USD,United States,5.67,1.000,5.670000
1328,2020-01-14,VND,Vietnam,66000.00,23176.000,2.847774


## pandas datetime: Datetimes in Pandas


## polars: Faster DataFrames
