# Récupération d'un jeu de données de binance

In [1]:
import requests

symbol = "BTCEUR"
interval = "1s"

data = {"symbol": symbol, "interval": interval, "limit": 1000}
url = 'https://api.binance.com/api/v3/klines'
binance_response = requests.get(url, params=data)


# Préparation d'un dataframe de 256 000 lignes en guise de batch 

In [2]:
import pandas as pd

df = pd.DataFrame(binance_response.json())
columns = ['timestamp', 'open', 'close', 'high', 'low', 'quantity']
df = df.drop(list(range(6,df.shape[1])), axis=1) \
    .astype({i: "int" if i == 0 else "float" for i in range(0,6)}) \
    .rename(columns={i: c for i, c in enumerate(columns)})
df["timestamp"] = df["timestamp"].floordiv(1000).astype("int")
df["symbol"] = symbol
df["interval"] = interval

for _ in range(0,8):
    df2 = df.copy()
    df2["timestamp"] = df2["timestamp"].add(df.iloc[0]["timestamp"]-df.iloc[-1]["timestamp"]-1).astype("int")
    df = pd.concat([df2, df])


# Préparation de la base MongoDB en mode classique 

In [4]:
import pymongo
from pymongo import MongoClient
from datetime import datetime

client = MongoClient('localhost', 27017)

#Création de la base
mydb = client["test-crypto-simple"]

try:
    mydb.ticker.drop()
finally:
    pass

ticker = mydb.create_collection(
    "ticker"
)


# Insertion de 350 fois les 256 000 lignes

In [5]:
df_sql = df.copy()
i = 0
for _ in range(0,350):
    df_sql["_id"] = df_sql["timestamp"].astype("str").str.replace("(\\d+)", f"{symbol}-{interval}-\\1", regex=True)
    batch = df_sql[[c for c in df_sql.columns if c != 'timestamp_tmp']].to_dict('records')
    mydb.ticker.insert_many(batch)
    df_sql["timestamp"] = df_sql["timestamp"].add(df_sql.iloc[0]["timestamp"]-df_sql.iloc[-1]["timestamp"]-1).astype("int")
    i += 1
    if i < 10:
        print(f"{i:.0f} ...")
    elif i % 10 == 0:
        print(f"{i:.0f} ...")


1 ...
2 ...
3 ...
4 ...
5 ...
6 ...
7 ...
8 ...
9 ...
10 ...
20 ...
30 ...
40 ...
50 ...
60 ...
70 ...
80 ...
90 ...
100 ...
110 ...
120 ...
130 ...
140 ...
150 ...
160 ...
170 ...
180 ...
190 ...
200 ...
210 ...
220 ...
230 ...
240 ...
250 ...
260 ...
270 ...
280 ...
290 ...
300 ...
310 ...
320 ...
330 ...
340 ...
350 ...


# Exemple de requête

In [6]:
from datetime import datetime, timedelta

start = datetime.now()-timedelta(days = 200)
end = start + timedelta(seconds = 1000)
start = int(start.strftime("%s"))
end = int(end.strftime("%s"))

requete = mydb.ticker.aggregate([
   {"$match": {"timestamp": {'$lt': end, '$gte': start}}}
])
len(list(requete))

1000