# Récupération d'un jeu de données de binance

In [3]:
import requests

data = {"symbol": "BTCEUR", "interval": "1s", "limit": 1000}
url = 'https://api.binance.com/api/v3/klines'
binance_response = requests.get(url, params=data)


# Préparation d'un dataframe de 256 000 lignes en guise de batch 

In [4]:
import pandas as pd

df = pd.DataFrame(binance_response.json())
columns = ['timestamp', 'open', 'close', 'high', 'low', 'quantity']
df = df.drop(list(range(6,df.shape[1])), axis=1) \
    .astype("float") \
    .rename(columns={i: c for i, c in enumerate(columns)})

df["timestamp"] = df["timestamp"].floordiv(1000).astype("int")

for _ in range(0,8):
    df2 = df.copy()
    df2["timestamp"] = df2["timestamp"].add(df.iloc[0]["timestamp"]-df.iloc[-1]["timestamp"]-1).astype("int")
    df = pd.concat([df2, df])


# Préparation de la base SQLite via SQLAlchemy et de la table 

In [7]:
import sqlite3, sqlalchemy
from sqlalchemy import Table, Column, Integer, Float, MetaData, create_engine

engine = create_engine('sqlite:///huge-table.db', echo=False)
meta = MetaData()

engine.execute(f"DROP TABLE IF EXISTS HUGE_TABLE")
symbol = Table("HUGE_TABLE", meta,
                Column('symbol_id', Integer, nullable=False),
                Column('timestamp', Integer, nullable=False),
                Column('open', Float, nullable=False),
                Column('close', Float, nullable=False),
                Column('high', Float, nullable=False),
                Column('low', Float, nullable=False),
                Column('quantity', Float, nullable=False),
                extend_existing=True
            )
meta.create_all(engine)


# Insertion de 350 fois les 256 000 lignes

In [8]:
df_sql = df.copy()
df_sql["symbol_id"] = 1
for _ in range(0,350):
    df_sql.to_sql("HUGE_TABLE",con = engine, index=False, if_exists='append')
    df_sql["timestamp"] = df_sql["timestamp"].add(df_sql.iloc[0]["timestamp"]-df_sql.iloc[-1]["timestamp"]-1).astype("int")



# Exemple de requête

In [10]:
from datetime import datetime, timedelta

start = datetime.now()-timedelta(days = 200)
end = start + timedelta(seconds = 1000)

start = int(start.strftime("%s"))
end = int(end.strftime("%s"))

conn = engine.connect()
cursor = conn.execute(f"SELECT * FROM HUGE_TABLE WHERE timestamp >= {start:.0f} AND timestamp < {end:.0f}")

result = list(cursor)
len(result)


1000