# Отработка онлайн-сбора данных, а не через csv

In [1]:
import requests as rq
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from datetime import datetime, date

# Инструменты и свечки

## Инструменты

In [4]:
allow_engine = ['stock']
allow_market = ['shares','bonds']

def get_securites_df_with_fixed_engine_market_step_start(engine,market,start,step=100):
    response = rq.get(
        'https://iss.moex.com/iss/securities.json'
        ,params={
            'engine': engine
            ,'market': market
            ,'limit': step
            ,'start': start
        }
    )
    response_securities = response.json()['securities']
    df = pd.DataFrame(columns=response_securities['columns'],data=response_securities['data'])
    df['engine'] = engine
    df['market'] = market
    return df

# def get_securites_df_with_fixed_engine_market_limit(engine,market,limit,used_step=100):
#     step = used_step
#     dfs = []
#     for start in range(0,limit+1,step):
#         step_result = get_securites_df_with_fixed_engine_market_step_start(engine,market,start,step)
#         if len(step_result) == 0:
#             break
#         dfs.append(step_result)
#     return pd.concat(dfs,ignore_index=True)


df = get_securites_df_with_fixed_engine_market_step_start('stock','shares',100)
# df = get_securites_df_with_fixed_engine_market_limit('stock','shares',100)
df.head()

Unnamed: 0,id,secid,shortname,regnumber,name,isin,is_traded,emitent_id,emitent_title,emitent_inn,emitent_okpo,gosreg,type,group,primary_boardid,marketprice_boardid,engine,market
0,415545871,INFL,INFL ETF,4742,БПИФ Ингосстрах Инфляционный,RU000A1047H6,1,3687,"Акционерное общество Управляющая компания ""Инг...",7705136973,46350001,4742,exchange_ppif,stock_ppif,TQTF,TQTF,stock,shares
1,414180783,INGO,INGO ETF,4483,БПИФ Ингосстрах Корп облигации,RU000A103CA4,1,3687,"Акционерное общество Управляющая компания ""Инг...",7705136973,46350001,4483,exchange_ppif,stock_ppif,TQTF,TQTF,stock,shares
2,317439317,INGR,ИНГРАД ао,1-01-50020-A,ИНГРАД ПАО ао,RU000A0DJ9B4,1,2231,"Публичное акционерное общество ""ИНГРАД""",7702336269,59060606,1-01-50020-A,common_share,stock_shares,TQBR,TQBR,stock,shares
3,6375,IRAO,ИнтерРАОао,1-04-33498-E,"""Интер РАО"" ПАО ао",RU000A0JPNM1,1,3565,"Публичное акционерное общество ""Интер РАО ЕЭС""",2320109650,33741102,1-04-33498-E,common_share,stock_shares,TQBR,TQBR,stock,shares
4,2748,IRKT,Яковлев-3,1-03-00040-A,"""Яковлев"" ПАО ак.об.-3",RU0006752979,1,940,"Публичное акционерное общество ""Яковлев""",3807002509,7504910,1-03-00040-A,common_share,stock_shares,TQBR,TQBR,stock,shares


## Свечи

In [49]:
def get_500_day_candles_df_on_period(engine,market,security,begin:datetime,end:datetime=datetime(2037,1,1),start=0):
    response = rq.get(
        f'https://iss.moex.com/iss/engines/{engine}/markets/{market}/securities/{security}/candles.json'
        ,params={
            'start': start
            ,'from': begin.strftime('%Y-%m-%d 00:00:00')
            ,'till': end.strftime('%Y-%m-%d 23:59:59')
            ,'interval': 24
        }
    )
    candles_json = response.json()['candles']
    df = pd.DataFrame(columns=candles_json['columns'],data=candles_json['data'])
    df['date'] = df['begin'].map(lambda x: datetime.strptime(x[0:10],'%Y-%m-%d'))
    df = df.drop(['begin','end'],axis=1)
    return df

def get_all_day_candles_df_on_period(engine,market,security,begin:datetime,end:datetime=datetime(2037,1,1)):
    MAX_ITER = 9999
    CANDLE_STEP = 500
    dfs = []
    for i in range(0,MAX_ITER,CANDLE_STEP):
        step_result = get_500_day_candles_df_on_period(engine,market,security,begin,end,start=i)
        if len(step_result) == 0:
            break
        dfs.append(step_result)
    return (None if len(dfs)==0 else pd.concat(dfs,ignore_index=True))

get_all_day_candles_df_on_period('stock','shares','SBER',datetime(2023,1,1))

Unnamed: 0,open,close,high,low,value,volume,date
0,141.60,141.78,143.25,141.56,3.000248e+09,21098550,2023-01-03
1,141.85,141.43,142.28,140.75,2.419429e+09,17112740,2023-01-04
2,141.60,141.27,141.84,140.54,2.435171e+09,17245300,2023-01-05
3,141.39,141.40,141.62,140.90,1.504731e+09,10657600,2023-01-06
4,141.83,142.40,142.99,141.65,4.480148e+09,31449160,2023-01-09
...,...,...,...,...,...,...,...
493,239.29,237.29,240.51,236.75,1.252639e+10,52493640,2024-12-09
494,238.00,230.82,238.01,230.65,1.371039e+10,58740230,2024-12-10
495,230.50,234.19,234.39,229.22,1.502699e+10,64914300,2024-12-11
496,234.00,229.02,235.95,228.52,1.374824e+10,59171170,2024-12-12


## Объединение

In [41]:
def get_bunch_securities_and_day_candles_on_period(begin:datetime,end:datetime=datetime(2037,1,1),bunch_number=0):
    MAX_ITER = 9999
    CANDLE_STEP = 500
    SECURITES_STEP = 100
    engine,market = 'stock','shares'
    securities_df = get_securites_df_with_fixed_engine_market_step_start(engine,market,bunch_number*SECURITES_STEP,SECURITES_STEP)
    candles_dfs = []
    for i, row in securities_df.iterrows():
        row_engine,row_market,row_security = row['engine'],row['market'],row['secid']
        row_candles_df = get_all_day_candles_df_on_period(row_engine,row_market,row_security,begin,end)
        if row_candles_df is None:
            break
        row_candles_df['engine'],row_candles_df['market'],row_candles_df['secid'] = row_engine,row_market,row_security
        candles_dfs.append(row_candles_df)
    if len(candles_dfs) == 0:
        return securities_df
    candles_df = pd.concat(candles_dfs)
    union_df = securities_df.merge(candles_df,how='left',on=['engine','market','secid'])
    return union_df

df = get_bunch_securities_and_day_candles_on_period(datetime(2023,1,1),datetime(2023,2,1),bunch_number=1)
df.head()

Unnamed: 0,id,secid,shortname,regnumber,name,isin,is_traded,emitent_id,emitent_title,emitent_inn,...,marketprice_boardid,engine,market,open,close,high,low,value,volume,date
0,415545871,INFL,INFL ETF,4742,БПИФ Ингосстрах Инфляционный,RU000A1047H6,1,3687,"Акционерное общество Управляющая компания ""Инг...",7705136973,...,TQTF,stock,shares,117.56,117.8,117.87,117.34,577431.36,4902.0,2023-01-03
1,415545871,INFL,INFL ETF,4742,БПИФ Ингосстрах Инфляционный,RU000A1047H6,1,3687,"Акционерное общество Управляющая компания ""Инг...",7705136973,...,TQTF,stock,shares,117.8,117.87,117.91,117.49,344833.7,2929.0,2023-01-04
2,415545871,INFL,INFL ETF,4742,БПИФ Ингосстрах Инфляционный,RU000A1047H6,1,3687,"Акционерное общество Управляющая компания ""Инг...",7705136973,...,TQTF,stock,shares,117.78,117.94,117.97,117.78,269134.19,2284.0,2023-01-05
3,415545871,INFL,INFL ETF,4742,БПИФ Ингосстрах Инфляционный,RU000A1047H6,1,3687,"Акционерное общество Управляющая компания ""Инг...",7705136973,...,TQTF,stock,shares,117.98,117.66,117.98,117.6,276978.39,2352.0,2023-01-06
4,415545871,INFL,INFL ETF,4742,БПИФ Ингосстрах Инфляционный,RU000A1047H6,1,3687,"Акционерное общество Управляющая компания ""Инг...",7705136973,...,TQTF,stock,shares,117.7,117.91,118.04,117.69,597075.87,5064.0,2023-01-09


In [47]:
dfs = []
for i in range(999):
    df = get_bunch_securities_and_day_candles_on_period(datetime(2023,7,1),datetime(2023,8,1),bunch_number=i)
    if len(df)==0:
        break
    dfs.append(df)
df = pd.concat(dfs)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2755 entries, 0 to 4
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   id                   2755 non-null   int64         
 1   secid                2755 non-null   object        
 2   shortname            2755 non-null   object        
 3   regnumber            2698 non-null   object        
 4   name                 2755 non-null   object        
 5   isin                 2455 non-null   object        
 6   is_traded            2755 non-null   int64         
 7   emitent_id           2194 non-null   float64       
 8   emitent_title        2194 non-null   object        
 9   emitent_inn          2191 non-null   object        
 10  emitent_okpo         1568 non-null   object        
 11  gosreg               2698 non-null   object        
 12  type                 2755 non-null   object        
 13  group                2755 non-null   obje

In [48]:
df['secid'][~df['volume'].isna()].unique()

array(['ABIO', 'ABRD', 'INFL', 'INGO', 'INGR', 'IRAO', 'IRKT', 'PAZA',
       'PHOR', 'PIKK', 'PLZL', 'PMSB', 'PMSBP', 'RU000A103B62', 'TMON',
       'TMOS', 'TNSE'], dtype=object)