In [1]:

import pandas as pd
import warnings
import requests
import time
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


warnings.simplefilter(action='ignore', category=FutureWarning)

from sqlalchemy import create_engine
import numpy as np
import pickle
from scipy import stats
from tqdm import tqdm
import datetime as dt
from retrying import retry

from utils.env import *
from utils import avgNav, commonMetric, query, thematicManager as tm, tableManager

from oauth2client.service_account import ServiceAccountCredentials

In [2]:
@retry(stop_max_attempt_number=3)
def execute_query(sql_query, engine):
    return pd.read_sql_query(sql_query, con=engine)


def get_adr_stock_ids(ver, cnx):
    adr_stock_ids = []
    stockinfo_qr = f'''SELECT * FROM `{ver}_stock_infos`'''
    stockinfo = pd.read_sql_query(stockinfo_qr, con=cnx)
    adr_stock_ids.extend(stockinfo[stockinfo.adr == 1].jittaStockId.to_list())
    
    return adr_stock_ids



In [3]:
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time

def create_session_with_retries(retries, backoff_factor, status_forcelist):
    session = requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

def sandbox_v2(fid, jid, bid, uid, scope):
    url = f"http://192.168.152.53:3000/api/v2/formulas/{fid}/results"
    params = {
        "jitta_stock_ids": jid,
        "buildVersionId": bid,
        "sector": "ALL",
        "industry": "ALL",
        "scopeType": scope,
        "limit": "",
        "skip": ""
    }
    headers = {
        "universe-id": uid
    }

    retries = 5
    backoff_factor = 0.3
    status_forcelist = [500, 502, 503, 504]

    session = create_session_with_retries(retries, backoff_factor, status_forcelist)

    try:
        response = session.get(url, params=params, headers=headers, timeout=120)
        if response.status_code == 200:
            time.sleep(0.5)
            data = response.json()
            return data
        else:
            print("Failed to retrieve data. Status code:", response.status_code)
            return None
    except requests.exceptions.Timeout:
        print("The request timed out")
        return None
    except requests.exceptions.RequestException as e:
        # Handle other possible exceptions
        print(f"An error occurred: {e}")
        return None


# def sandbox_v2(fid, jid, bid, uid, scope):
#     url = f"http://192.168.152.53:3000/api/v2/formulas/{fid}/results"
#     params = {
#         "jitta_stock_ids": jid,
#         "buildVersionId": bid,
#         "sector": "ALL",
#         "industry": "ALL",
#         "scopeType": scope,
#         "limit": "",
#         "skip": ""
#     }
#     headers = {
#         "universe-id": uid
#     }

#     response = requests.get(url, params=params, headers=headers, timeout=120)

#     if response.status_code == 200:
#         time.sleep(1)
#         data = response.json()
#         return data
#     else:
#         print("Failed to retrieve data. Status code:", response.status_code)
#         return None
    

def rolling_quantile(df, column, quantile, window_size):
    expanding_window_quantile = df[column].expanding(min_periods=1).quantile(quantile)
    rolling_window_quantile = df[column].rolling(window=window_size, min_periods=window_size).quantile(quantile)
    return expanding_window_quantile.combine_first(rolling_window_quantile)


def get_clean_jitta_stock_ids(jitta_score_date_like, list_adr, list_etf, skip_jid, cnx, ver):
    clean_jitta_stock_ids_dict = {}
    
    for i, date in enumerate(jitta_score_date_like):
        if len(list_adr) == 1:
            top_jitta_score_qr = f'''
                SELECT * FROM `{ver}_jitta_score_price$monthly$1_Bh-KI69fC`
                WHERE seen LIKE '{date}'
                AND `jittaStockId` NOT IN {tuple(list_etf)}
                AND `jittaStockId` NOT IN {tuple(skip_jid)}
                AND `jittaStockId` !=  {list_adr[0]}
                ORDER BY `value` DESC
                LIMIT 200;
                '''
        elif len(list_adr) == 0:
            top_jitta_score_qr = f'''
                SELECT * FROM `{ver}_jitta_score_price$monthly$1_Bh-KI69fC`
                WHERE seen LIKE '{date}'
                AND `jittaStockId` NOT IN {tuple(list_etf)}
                AND `jittaStockId` NOT IN {tuple(skip_jid)}
                ORDER BY `value` DESC
                LIMIT 200;
                '''
        else:
            top_jitta_score_qr = f'''
                SELECT * FROM `{ver}_jitta_score_price$monthly$1_Bh-KI69fC`
                WHERE seen LIKE '{date}'
                AND `jittaStockId` NOT IN {tuple(list_etf)}
                AND `jittaStockId` NOT IN {tuple(skip_jid)}
                AND `jittaStockId` NOT IN {tuple(list_adr)}
                ORDER BY `value` DESC
                LIMIT 200;
                '''

        df = pd.read_sql_query(top_jitta_score_qr, con=cnx)
        df.seen = pd.to_datetime(df.seen)
        latest_seen_df = df.loc[df.groupby('jittaStockId')['seen'].idxmax()]
        latest_seen_df_sorted = latest_seen_df.sort_values(by='value', ascending=False)[:50]

        # result = result[result.value > 7] # filter score > 7
        # display(latest_seen_df_sorted)
        
        jitta_stock_ids = latest_seen_df_sorted.jittaStockId.to_list()
        clean_jitta_stock_ids = [id for id in jitta_stock_ids]
        
        # Appending to the dictionary
        clean_jitta_stock_ids_dict[date] = clean_jitta_stock_ids
    
    return clean_jitta_stock_ids_dict

In [4]:
def computer_each_dataversion(clean_jitta_stock_ids_dict, ver, fid, bid, scope):
    cache = {}
    pe_zone = {}
    
    for date in clean_jitta_stock_ids_dict.keys():
        top50_each_date = clean_jitta_stock_ids_dict[str(date)]
        list_pe_catagory2 = []
        
        if len(top50_each_date) == 0:
            # list_pe_catagory2 = [np.nan] * 50
            continue
        
        for stock_id in tqdm(top50_each_date):
            if stock_id in cache:
                df1 = cache[stock_id]
                SKIP = True
            else:
                SKIP = False
                
            if not SKIP:
                resp = sandbox_v2(fid=fid, jid=stock_id, bid=bid, uid=ver, scope=scope)
                value = resp['data'][0]['value']
                
                pe_df = pd.DataFrame(value)
                pe_df.set_index('seen', inplace=True)
                pe_df = pe_df[~pe_df.index.duplicated(keep='last')]  
                pe_df.index = pd.to_datetime(pe_df.index)
                pe_df = pe_df.resample('M').last()
                
                # PE Calculation
                df1 = pe_df[['v']].copy()
                df1.drop(df1[df1['v'] == "N/A"].index, inplace=True)
                df1.dropna(inplace=True)
                df1 = df1.astype(float)
                df1['jitta_stock_id'] = stock_id
                
                df1['rolling_10th'] = rolling_quantile(df1, 'v', 0.1, 10*12)
                df1['rolling_20th'] = rolling_quantile(df1, 'v', 0.2, 10*12)
                df1['rolling_30th'] = rolling_quantile(df1, 'v', 0.3, 10*12)
                df1['rolling_40th'] = rolling_quantile(df1, 'v', 0.4, 10*12)
                df1['rolling_50th'] = rolling_quantile(df1, 'v', 0.5, 10*12)
                df1['rolling_60th'] = rolling_quantile(df1, 'v', 0.6, 10*12)
                df1['rolling_70th'] = rolling_quantile(df1, 'v', 0.7, 10*12)
                df1['rolling_80th'] = rolling_quantile(df1, 'v', 0.8, 10*12)
                df1['rolling_90th'] = rolling_quantile(df1, 'v', 0.9, 10*12)

                df1['PE_category2'] = np.where(df1['v'] < df1['rolling_10th'], '0_10',
                                                    np.where((df1['v'] >= df1['rolling_10th']) & (df1['v'] < df1['rolling_20th']), '10_20',
                                                    np.where((df1['v'] >= df1['rolling_20th']) & (df1['v'] < df1['rolling_30th']), '20_30',
                                                    np.where((df1['v'] >= df1['rolling_30th']) & (df1['v'] < df1['rolling_40th']), '30_40',
                                                    np.where((df1['v'] >= df1['rolling_40th']) & (df1['v'] < df1['rolling_50th']), '40_50',
                                                    np.where((df1['v'] >= df1['rolling_50th']) & (df1['v'] < df1['rolling_60th']), '50_60',
                                                    np.where((df1['v'] >= df1['rolling_60th']) & (df1['v'] < df1['rolling_70th']), '60_70',
                                                    np.where((df1['v'] >= df1['rolling_70th']) & (df1['v'] < df1['rolling_80th']), '70_80',
                                                    np.where((df1['v'] >= df1['rolling_80th']) & (df1['v'] < df1['rolling_90th']), '80_90',
                                                    np.where((df1['v'] >= df1['rolling_90th']), '90+', 'Unknown'))))))))))

                df1.index = pd.to_datetime(df1.index) 
                
                cache[stock_id] = df1
                # display(df1)

            try:
                # print(date)
                pe_category_value2 = df1.loc[(df1.index.year == int(date[:4])) & 
                                             (df1.index.month == int(date[5:7])), 'PE_category2'].values[-1]
                # pe_v = df1.loc[(df1.index.year == int(date[:4])) & 
                #                              (df1.index.month == int(date[5:7])), 'v'].values[-1]
                list_pe_catagory2.append(pe_category_value2)
            except Exception as e:
                # print(stock_id, e)   
                list_pe_catagory2.append(np.nan)  
        
        
        # Calculate how many NaN values are needed
        num_nans_to_append = 50 - len(list_pe_catagory2)
        # Append NaN values if needed
        if num_nans_to_append > 0:
            list_pe_catagory2.extend([np.nan] * num_nans_to_append)
        pe_zone[date] = list_pe_catagory2      
            
    return pe_zone


In [5]:
def display_pe_zone(pe_zone):
    # Create DataFrame
    df = pd.DataFrame(pe_zone)

    # Count occurrences of each value
    value_counts_df = df.apply(pd.Series.value_counts)

    # Reindex to ensure all categories are present
    index_categories = ['0_10', '10_20', '20_30', '30_40', '40_50', '50_60', '60_70', '70_80', '80_90', '90+']
    value_counts_df = value_counts_df.reindex(index_categories)

    # Display the transposed DataFrame for better readability
    display(value_counts_df.T)


In [6]:
list_dataversion = [
                # 'US_2024-05-31',
                # 'HK_2024-05-31',
                # 'CN_2024-05-31',
                'TH_2024-06-20',
                # 'JP_2024-05-31',
                # # 'VN_2024-05-01',
                
                # 'UK_2024-05-31',
                # 'IN_2024-01-03',
                # 'KR_2024-01-03',
                # 'TW_2024-01-03',
                # 'SG_2024-06-11',
                # 'DE_2024-06-12',/
                # 'AU_2024-06-11',
                # 'CA_2024-01-03',
                ]

jitta_score_date_like = [

                    # '2012-12-%%',
                    # '2013-12-%%',
                    # '2014-12-%%',
                    # '2015-12-%%',
                    # '2016-12-%%',
                    # '2017-12-%%',
                    # '2018-12-%%',
                    # '2019-12-%%',
                    # '2020-12-%%',
                    # '2021-12-%%',
                    # '2022-12-%%',
                    # '2023-12-%%',
                    

                    '2024-01-%%',
                    '2024-02-%%',
                    '2024-03-%%',
                    '2024-04-%%',
                    '2024-05-%%',
                    '2024-06-%%',
                    ]

# skip_jid
skip_jid = [8173, 2604908, 2259841, 2583555, 2259840, 2259842, 2259843, 2578562, 2573968, 2574007, 2604908]
# list_etf
etf_df = pd.read_json("etf.json", orient='records')
list_etf = pd.DataFrame.from_records(etf_df['US']).jitta_stock_id.to_list()



In [7]:
# # pd.set_option('display.max_rows', None)

# fid, bid, scope = '6633b62594330887b8ec65cb', 'PytXziitn', 'monthly'
# # fid, bid, scope = '664a20726101425826b0a517', 'HwSIpqwiT', 'monthly'  # pe30
# # fid, bid, scope = '664e1b8d6101425826b14a55', 'iRBkTTRDJ', 'monthly' 
# ver = 'US_2024-05-01'
# stock_id = 2577393

# resp = sandbox_v2(fid=fid, jid=stock_id, bid=bid, uid=ver, scope=scope)
# value = resp['data'][0]['value']
# pe_df = pd.DataFrame(value)
# pe_df.set_index('seen', inplace=True)
# pe_df = pe_df[~pe_df.index.duplicated(keep='last')]  
# # Resample by month and keep the last entry of each month
# pe_df.index = pd.to_datetime(pe_df.index)
# last_entries = pe_df.resample('M').last()

# pe_df

In [8]:
# PE Calculation
# df1 = pe_df[['v']].copy()
# df1.drop(df1[df1['v'] == "N/A"].index, inplace=True)
# df1.dropna(inplace=True)
# df1 = df1.astype(float)

# df1['rolling_10th'] = rolling_quantile(df1, 'v', 0.1, 10*12)
# df1['rolling_20th'] = rolling_quantile(df1, 'v', 0.2, 10*12)
# df1['rolling_30th'] = rolling_quantile(df1, 'v', 0.3, 10*12)
# df1['rolling_40th'] = rolling_quantile(df1, 'v', 0.4, 10*12)
# df1['rolling_50th'] = rolling_quantile(df1, 'v', 0.5, 10*12)
# df1['rolling_60th'] = rolling_quantile(df1, 'v', 0.6, 10*12)
# df1['rolling_70th'] = rolling_quantile(df1, 'v', 0.7, 10*12)
# df1['rolling_80th'] = rolling_quantile(df1, 'v', 0.8, 10*12)
# df1['rolling_90th'] = rolling_quantile(df1, 'v', 0.9, 10*12)

In [9]:
def main():
    for dataversion in list_dataversion:
        print(dataversion)
        list_adr = get_adr_stock_ids(dataversion, cnx)
        clean_jitta_stock_ids_dict = get_clean_jitta_stock_ids(jitta_score_date_like, list_adr, list_etf, skip_jid, cnx, ver=dataversion)
        pe_decile = computer_each_dataversion(clean_jitta_stock_ids_dict, dataversion, fid, bid, scope)
        display_pe_zone(pe_decile)
        
if __name__ == "__main__":
    fid, bid, scope = '6633b62594330887b8ec65cb', '1NCMPuCYt', 'monthly' # pe6
    # fid, bid, scope = '65dff500faa2180026d60183', 'MRrX47PTp', 'monthly' # pe5

    main()

TH_2024-06-20


100%|██████████| 50/50 [01:07<00:00,  1.36s/it]
100%|██████████| 50/50 [00:23<00:00,  2.15it/s]
100%|██████████| 50/50 [00:00<00:00, 2821.98it/s]
100%|██████████| 50/50 [00:00<00:00, 3025.01it/s]
100%|██████████| 50/50 [00:18<00:00,  2.68it/s]
100%|██████████| 50/50 [00:00<00:00, 3146.00it/s]


Unnamed: 0,0_10,10_20,20_30,30_40,40_50,50_60,60_70,70_80,80_90,90+
2024-01-%%,14.0,13.0,1.0,2.0,4.0,6.0,2.0,4.0,4.0,
2024-02-%%,18.0,10.0,3.0,4.0,3.0,4.0,3.0,1.0,3.0,1.0
2024-03-%%,18.0,13.0,1.0,4.0,3.0,4.0,3.0,3.0,1.0,
2024-04-%%,18.0,9.0,3.0,6.0,1.0,7.0,2.0,,3.0,1.0
2024-05-%%,22.0,7.0,2.0,7.0,3.0,2.0,4.0,2.0,1.0,
2024-06-%%,26.0,3.0,3.0,6.0,5.0,2.0,2.0,2.0,1.0,
