In [30]:
import pandas as pd
import numpy as np

# Parameters
N = 1_000
categories = ['A', 'B', 'C', 'D', 'E']

# Generate synthetic data
df = pd.DataFrame({
    "id": np.arange(1, N + 1),
    "dflt": np.random.binomial(1, 0.1, size=N),  # 10% default rate
    "ratio": np.round(np.random.beta(2, 5, size=N), 4),  # Skewed ratio between 0 and 1
    "category": np.random.choice(categories, size=N, p=[0.2, 0.2, 0.2, 0.2, 0.2])
})

# Save to CSV
df.head()

Unnamed: 0,id,dflt,ratio,category
0,1,0,0.3559,E
1,2,0,0.4907,B
2,3,0,0.1152,A
3,4,0,0.347,C
4,5,0,0.0764,E


In [2]:
import sqlalchemy as alc
import pandas as pd
import os


# add run method to engine
def run(self: alc.engine.Engine, sql: str) -> pd.DataFrame | None:
    with self.begin() as conn:
        res = conn.execute(alc.text(sql))
        if res.returns_rows:
            return pd.DataFrame(res.all(), columns=res.keys())


alc.engine.Engine.run = run

connection_string = f"teradatasql://demo_user:{os.environ['password']}@test-l36lujzkc0420a7n.env.clearscape.teradata.com"
eng = alc.create_engine(connection_string)

In [31]:
_ = eng.run(f"drop table data")

In [32]:
_ = eng.run(f"""
CREATE TABLE data (
    id INT,
    dflt INT,
    ratio FLOAT,
    category VARCHAR(10)
);
""")

In [33]:
df.to_sql("data", eng, if_exists="append", index=False)

1

In [34]:
eng.run("select count(*) as n from data").iloc[0,0]

np.int64(1000)

In [35]:
eng.run("select top 5 * from data")

Unnamed: 0,id,dflt,ratio,category
0,3,0,0.1152,A
1,5,0,0.0764,E
2,4,0,0.347,C
3,2,0,0.4907,B
4,1,0,0.3559,E


In [36]:
qry = f"""
WITH base AS (
  SELECT a.*, RANDOM(1,1000000) AS u
  FROM data a 
),
rolled AS (
  SELECT
    CAST(id AS INTEGER) AS id,
    category,
    AVG(ratio) OVER (PARTITION BY category ORDER BY CAST(id AS INTEGER) ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS ratio_roll
  FROM base
)
SELECT
  b.*,
  r.ratio_roll
FROM base b
JOIN rolled r ON (CAST(b.id AS INTEGER) = r.id AND b.category = r.category);
"""

df = eng.run(qry)
df

Unnamed: 0,id,dflt,ratio,category,u,ratio_roll
0,1,0,0.3559,E,899437,0.355900
1,2,0,0.4907,B,566455,0.490700
2,3,0,0.1152,A,16700,0.115200
3,4,0,0.3470,C,569752,0.347000
4,5,0,0.0764,E,118400,0.216150
...,...,...,...,...,...,...
995,996,0,0.1110,E,120584,0.183167
996,997,0,0.2905,D,576000,0.255333
997,998,0,0.0658,A,581092,0.238833
998,999,0,0.4602,C,133354,0.191567
