In [None]:
import requests
import json
import pandas as pd
import os
import dotenv
import pdfbox
import re
from IPython.display import clear_output
dotenv.load_dotenv()
FDAPI = os.environ["FUND_DAILY_API_KEY"]
FFAPI = os.environ["FUND_FACT_API_KEY"]
global FundDailyKey
FundDailyKey= {
    'Ocp-Apim-Subscription-Key': FDAPI
}
global FundFactKey
FundFactKey = {
    'Ocp-Apim-Subscription-Key': FFAPI
}
project_type_keys = ['proj_retail_type', 'proj_term_flag', 'proj_term_dd', 'proj_term_mm','proj_term_yy']
specification_keys = ['spec_code', 'spec_desc']
suitability_keys = ['risk_spectrum', 'risk_spectrum_desc','fund_suitable_desc']

In [None]:
def FundFactAPIGet(url, proj_unique_id, type, key):
    if type == 'amc':
        response = requests.get(url,headers=key)
        if response.status_code == 200:
            return json.loads(response.text)
    elif type in ['project_type', 'specification', 'suitability']:
        response = requests.get(url + proj_unique_id + '/' + type, headers=key)
        if response.status_code == 200:
            return {'json': json.loads(response.text), 'status_code': response.status_code}
        else:
            dummy_json = {key: None for key in project_type_keys} if type == 'project_type' else ({key: None for key in specification_keys} if type == 'specification' else {key: None for key in suitability_keys})
            return {'json': dummy_json, 'status_code': response.status_code}
    elif type in ['URLs']:
        response = requests.get(url + proj_unique_id + '/'+ type, headers=key)
        res_json = json.loads(response.text)
        res_pdf = requests.get(res_json['url_factsheet'])
        if res_pdf.status_code == 200:
            with open(f"tmp/tmp.pdf", 'wb') as pdf_file:
                    pdf_file.write(res_pdf.content)
            return res_pdf.status_code
        else:
            return res_pdf.status_code

In [None]:
def readRiskFromPdf(risk_level):
    if risk_level != 200:
           return None
    p = pdfbox.PDFBox()
    if os.path.getsize("tmp/tmp.pdf") < 216:
            raise Exception("File not found ::") 
    p.extract_text(f'tmp/tmp.pdf')
    with open(f'tmp/tmp.txt', 'r',  encoding="utf8") as file:
            data = file.read()
    match1 = re.search(r'(\S+)\s*:\s*เสี่ยง', data)
    match2 = re.search(r'(\S+)\s*:\s*เสียง', data)
    match3 = re.search(r'(\S+)\s*:\s*เสยง', data)
    if match1:
            return match1.group(1)
    elif match2:
            return match2.group(1)
    elif match3:
            return match3.group(1)
    else:
            return None

In [None]:
def getPdf(row):
    print(row['proj_id'])
    status_code = FundFactAPIGet('https://api.sec.or.th/FundFactsheet/fund/', row['proj_id'] ,'URLs',FundFactKey)
    risk_level = readRiskFromPdf(status_code)
    print(risk_level)
    return risk_level

In [None]:
df = pd.read_csv(f'../Result_amc/SCB/SCB_funds_clean.csv')
df

In [None]:
df['risk_spectrum'] = df.apply(lambda row : getPdf(row), axis=1)

In [None]:
df['risk_spectrum'].value_counts().sum()

# Check RMF&SSF distribution

In [45]:
dropna_df = df[df['spec_code'].notna()]
dropna_df['spec_code'].value_counts()

spec_code
CRF            44
FED            28
FED,SSF        28
FED,RMF        18
FF,SSF         13
SSF            11
FF              8
LTF,SSF         6
RMF             4
FED,SE,SSF      3
FF,RMF          3
FED,SE          2
IND,SSF         2
IND,SE          2
TESG            2
FED,RMF,SE      2
MM              2
IND,RMF         1
FF,SE,SSF       1
IND,LTF,SSF     1
LTF             1
SPF             1
IND             1
IND,TESG        1
Name: count, dtype: int64

In [43]:
rmf_df = dropna_df[dropna_df['spec_code'].str.contains('RMF')].reset_index()
print(rmf_df['spec_code'].value_counts())
print(rmf_df['risk_spectrum'].value_counts())

spec_code
FED,RMF       18
RMF            4
FF,RMF         3
FED,RMF,SE     2
IND,RMF        1
Name: count, dtype: int64
risk_spectrum
6    19
5     3
4     2
7     2
Name: count, dtype: int64


In [46]:
ssf_df = dropna_df[dropna_df['spec_code'].str.contains('SSF')].reset_index()
print(ssf_df['spec_code'].value_counts())
print(ssf_df['risk_spectrum'].value_counts())

spec_code
FED,SSF        28
FF,SSF         13
SSF            11
LTF,SSF         6
FED,SE,SSF      3
IND,SSF         2
FF,SE,SSF       1
IND,LTF,SSF     1
Name: count, dtype: int64
risk_spectrum
6    45
5     9
4     5
7     4
Name: count, dtype: int64
