In [1]:
import requests
import json
import pandas as pd

In [2]:
BALANCE_KEY_ACCOUNTS = ['SECURITY_CODE', 'SECURITY_NAME_ABBR', 
                    'REPORT_DATE', 
                    'TOTAL_ASSETS', 
                    'FIXED_ASSET',
                    'MONETARYFUNDS', 
                    'MONETARYFUNDS_RATIO', 
                    'ACCOUNTS_RECE',
                    'ACCOUNTS_RECE_RATIO', 
                    'INVENTORY', 
                    'INVENTORY_RATIO',
                    'TOTAL_LIABILITIES', 
                    'ACCOUNTS_PAYABLE', 
                    'ACCOUNTS_PAYABLE_RATIO',
                    'ADVANCE_RECEIVABLES', 
                    'ADVANCE_RECEIVABLES_RATIO', 
                    'TOTAL_EQUITY',
                    'TOTAL_EQUITY_RATIO', 
                    'TOTAL_ASSETS_RATIO', 
                    'TOTAL_LIAB_RATIO',
                    'CURRENT_RATIO', 
                    'DEBT_ASSET_RATIO',]

INCOME_KEY_ACCOUNTS = ['SECURITY_CODE', 'SECURITY_NAME_ABBR', 
                    'REPORT_DATE',
                    'PARENT_NETPROFIT',
                    'TOTAL_OPERATE_INCOME',
                    'TOTAL_OPERATE_COST',
                    'TOE_RATIO',
                    'TOI_RATIO', 
                    'OPERATE_COST',
                    'OPERATE_EXPENSE',
                    'OPERATE_EXPENSE_RATIO',
                    'SALE_EXPENSE',
                    'MANAGE_EXPENSE',
                    'FINANCE_EXPENSE',
                    'OPERATE_PROFIT',
                    'TOTAL_PROFIT',
                    'INCOME_TAX',
                    'OPERATE_INCOME',
                    'INTEREST_NI',
                    'INTEREST_NI_RATIO',]

CASHFLOW_KEY_ACCOUNTS = ['SECURITY_CODE', 'SECURITY_NAME_ABBR', 
                     'REPORT_DATE',
                     'NETCASH_OPERATE',
                     'NETCASH_OPERATE_RATIO',
                     'SALES_SERVICES',
                     'SALES_SERVICES_RATIO',
                     'PAY_STAFF_CASH',
                     'PSC_RATIO',
                     'NETCASH_INVEST',
                     'NETCASH_INVEST_RATIO',
                     'RECEIVE_INVEST_INCOME',
                     'RII_RATIO',
                     'CONSTRUCT_LONG_ASSET',
                     'CLA_RATIO',
                     'NETCASH_FINANCE',
                     'NETCASH_FINANCE_RATIO',]
SUMMARY_KEY_ACCOUNTS = ['SECURITY_CODE', 'SECURITY_NAME_ABBR', 
                        'REPORTDATE', 
                        'BASIC_EPS', 
                        'DEDUCT_BASIC_EPS', 
                        'WEIGHTAVG_ROE', 
                        'YSTZ', 
                        'SJLTZ', 
                        'BPS', 
                        'MGJYXJJE',
                        'XSMLL', 
                        'YSHZ', 
                        'SJLHZ', 
                        'QDATE', ]

In [3]:
def get_data(security_code, report):
    
    if report in ['BALANCE', 'INCOME', 'CASHFLOW']: 
        url = f'https://datacenter-web.eastmoney.com/api/data/v1/get?\
        sortColumns=REPORT_DATE&sortTypes=-1&pageSize=50&pageNumber=1&columns=ALL&filter=\
        (SECURITY_CODE%3D%22{security_code}%22)&reportName=RPT_DMSK_FN_{report}'
    elif report == 'SUMMARY': 
        url = f'https://datacenter-web.eastmoney.com/api/data/v1/get?\
        sortColumns=REPORTDATE&sortTypes=-1&pageSize=50&pageNumber=1&columns=ALL&filter=\
        (SECURITY_CODE%3D%22{security_code}%22)&reportName=RPT_LICO_FN_CPD'
    else:
        raise ValueError(f'{report} is not supported.')
    
    
    df = pd.DataFrame(json.loads(requests.get(url).text)['result']['data'])
    cols = df.columns.tolist()
    
    if report == 'BALANCE':
        key_df = df[BALANCE_KEY_ACCOUNTS]
    elif report == 'INCOME': 
        key_df = df[INCOME_KEY_ACCOUNTS]
    elif report == 'CASHFLOW': 
        key_df = df[CASHFLOW_KEY_ACCOUNTS]
    elif report == 'SUMMARY': 
        key_df = df[SUMMARY_KEY_ACCOUNTS]
        key_df = key_df.rename(columns={'REPORTDATE': 'REPORT_DATE'})
    else:
        raise ValueError(f'{report} is not supported.')
        
    return key_df, cols

In [4]:
# 五粮液 000858
# 格力电器 000651
# 美的集团 000333

for security_code in ['000858', '000651', '000333']:
    for report in ['BALANCE', 'INCOME', 'CASHFLOW', 'SUMMARY']:
        df, cols = get_data(security_code, report)
        df.to_csv(f'../input/{security_code}_{report}.csv', index=False)