# Module: Fetch Data
*get data from TSETMC and directly write to db*

## Prepearing Classification Data

In [1]:
from typing import Union
import pandas as pd
import requests
from datetime import datetime
import pytse_client as tse
import fetch
import config
from utils import debug_log, data_log, ar_to_fa_series, ar_to_fa, fa_to_ar, fa_to_ar_series, flatten_json

In [2]:
pd.set_option('display.max_columns', 30)

## Definition & Assignment

In [3]:
request_headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
    "Referer": "http://main.tsetmc.com/StaticContent/WebServiceHelp",
}
cookie_jar = {"ASP.NET_SessionId": "wa40en1alwxzjnqehjntrv5j"}
test_ids = ['46348559193224090','20024911381434086','22129017544200','94378630649293']

currentDateTime = datetime.now().strftime("%Y-%m-%d %H-%M-%S %p")
currentDateTime

In [4]:
last_date = fetch.last_possible_deven()
instruments, shares = fetch.InstrumentAndShare(0, 0)

shares.to_csv(f"./files/shares@{last_date}.csv")
instruments.to_csv(f"./files/instruments@{last_date}.csv")

print(last_date)

20230905


## Initialize Identity

In [None]:
data_log.info('|||||||||||||||||||||||||||||| Identity initialization')

tickers_except_indices = instruments[instruments['tableu_code'] != '6'].index
all_identities = pd.DataFrame({index:fetch.get_identity(index) for index in tickers_except_indices}).transpose()
# اینبار تو یک دق و ۲۵ ثانیه تموم شد! و اینکه تا قبل از این ساعت یعنی ۸ و ۴۹ شب ارور میداد ک ریسپانس خالی میگرفت.
# شاید از ی ساعتی ببعد ترافیک سرور کم میشه و راحت میشه دیتا گرفت. بعدا چک کنم ک مطمئن بشم
# Update: without proxy (even with bypassing tsetmc.com) works better.

data_log.info('|||||||||||||||||||||||||||||| Identity loaded succesfuly')

all_identities.index.name = 'insCode'

# save to csv
all_identities.to_csv(f"./files/identities {last_date}.csv")

## Getting Prices

In [5]:
tickers = list(instruments.index)

In [6]:
len(tickers)

2296

In [7]:
daily_prices = fetch.get_daily_prices(tickers)

In [14]:
len(daily_prices.keys())

2295

### Cleaning Data

In [15]:
def clean_data(prices: dict) -> dict:
    for insCode in list(prices.keys()):
        if len(prices[insCode].index) < 1:
            del prices[insCode]
    return prices

In [16]:
cleaned_data = clean_data(daily_prices)

In [18]:
len(cleaned_data.keys())

2187

## Categorization

In [34]:
identities = pd.read_csv('./files/identities 05-09-2023 23-58-07 PM.csv', index_col='insCode', dtype={'insCode':str})

In [35]:
def filter_tickers_by_industry(cSecVal: int) -> list:
    return list(identities[identities['sector_cSecVal'] == cSecVal].index)

In [36]:
khodroi = filter_tickers_by_industry(34)

## Volume Filter

In [38]:
daily_prices['22129017544200'].head(3)

Unnamed: 0,<TICKER>,<DTYYYYMMDD>,<FIRST>,<HIGH>,<LOW>,<CLOSE>,<VALUE>,<VOL>,<OPENINT>,<PER>,<OPEN>,<LAST>,<VOL> MA 3,<VOL> MA 5,<VOL> MA 9,<VOL> MA 26,<VOL> MA 52
0,22129017544200,20230905,0.0,0.0,0.0,2668.0,0.0,0,0,D,2668.0,2664.0,0.0,0.0,0.0,0.0,0.0
1,22129017544200,20230904,0.0,0.0,0.0,2668.0,0.0,0,0,D,2668.0,2664.0,0.0,0.0,0.0,0.0,0.0
2,22129017544200,20230903,0.0,0.0,0.0,2668.0,0.0,0,0,D,2668.0,2664.0,0.0,0.0,0.0,0.0,0.0


In [19]:

def volume_filter(insCodes: list) -> list:
    
    daily_prices = fetch.get_daily_prices(insCodes)
    
    suspicious = []
    ticker_names= []
    
    for insCode in insCodes:
        debug_log.info(f'Going for insCode: {insCode}')
        first_row = daily_prices[insCode].iloc[0]
        if ((first_row['<VOL>'] > first_row['<VOL> MA 3']) and 
            (first_row['<VOL>'] > first_row['<VOL> MA 5']) and
            (first_row['<VOL>'] > first_row['<VOL> MA 9']) and
            (first_row['<VOL>'] > first_row['<VOL> MA 26']) and
            (first_row['<VOL>'] > first_row['<VOL> MA 52'])): # فعلا شرط ۹۰ روزه رو نمیذارم بخاطر شرایط کیری بازار ک اصلا هیچی معامله نمیشه
            suspicious.append(daily_prices[insCode])
            ticker_names.append(insCode)
            debug_log.info(f'{insCode} Checked.')
    print(len(suspicious),'\n', ticker_names, '\n')
    return suspicious


In [20]:
volume_filter(list(cleaned_data.keys()))

0 
 [] 



[]