In [1]:
import akshare as ak 

import pandas as pd
import numpy as np

import os 
from datetime import datetime
from tqdm import tqdm

In [2]:
# load the data
hs = pd.read_csv('../data/input/hs300_list_20251213.csv')
hs['code'] = hs['code'].astype(str).str.zfill(6)

STOCK_CODES = hs['code'].tolist()
print(len(STOCK_CODES))

300


In [9]:
today = pd.to_datetime("today").strftime("%Y%m%d")
MISSED_CODE = []

for stock_code in tqdm(STOCK_CODES):
    try: 
        # --- get the financial data ---
        financial_df = ak.stock_financial_abstract_ths(symbol=f"{stock_code}", indicator="按单季度")
        # select the key indicators
        financial_df = financial_df[['报告期', '每股净资产', '基本每股收益', '净资产收益率']]
        # rename the columns
        financial_df.columns = ['report_date', 'bps', 'eps', 'roe']
        # chage the date format
        financial_df['report_date'] = pd.to_datetime(financial_df['report_date'])
        # choose the date later than 2010-01-01
        financial_df = financial_df[financial_df['report_date'] >= '2010-01-01']
        # change the data format
        financial_df['eps'] = financial_df['eps'].astype(float)
        financial_df['roe'] = financial_df['roe'].str.replace('%', '').astype(float)
        financial_df['bps'] = financial_df['bps'].astype(float)
        # calculate ttm eps and ttm roe
        financial_df['bps_ttm'] = financial_df['bps'].rolling(window=4).mean()
        financial_df['eps_ttm'] = financial_df['eps'].rolling(window=4).sum()
        financial_df['roe_ttm'] = financial_df['roe'].rolling(window=4).sum()
        # drop the values with null values
        financial_df.dropna(inplace=True)
        
        # --- merge the financial data with standardized report dates ---
        # standardize the report dates
        date_df = pd.DataFrame(pd.date_range(start='2010-12-31', end='2025-12-31', freq='ME'), columns=['report_date'])
        financial_date = pd.merge(date_df, financial_df, on='report_date', how='left', validate="1:1")
        financial_date.to_csv(f"../data/input/financial-indicators/20251213/financial_indicators_{stock_code}_{today}.csv", index=False)
    except: 
        MISSED_CODE.append(stock_code)
        continue



  0%|          | 0/300 [00:00<?, ?it/s]

100%|██████████| 300/300 [44:45<00:00,  8.95s/it]   


In [9]:
stocks = os.listdir('../data/input/financial-indicators/20251213/')
loaded_stocks = [stock[21:27] for stock in stocks]
len(loaded_stocks) 
MISSED_CODE= list(set(STOCK_CODES) - set(loaded_stocks)) 
len(MISSED_CODE)

0

In [8]:
today = pd.to_datetime("today").strftime("%Y%m%d")

while len(MISSED_CODE) > 0 & len(MISSED_CODE) < 125:
    print(len(MISSED_CODE))
    for stock_code in tqdm(MISSED_CODE):
        try: 
            # --- get the financial data ---
            financial_df = ak.stock_financial_abstract_ths(symbol=f"{stock_code}", indicator="按单季度")
            # select the key indicators
            financial_df = financial_df[['报告期', '每股净资产', '基本每股收益', '净资产收益率']]
            # rename the columns
            financial_df.columns = ['report_date', 'bps', 'eps', 'roe']
            # chage the date format
            financial_df['report_date'] = pd.to_datetime(financial_df['report_date'])
            # choose the date later than 2010-01-01
            financial_df = financial_df[financial_df['report_date'] >= '2010-01-01']
            # change the data format
            financial_df['eps'] = financial_df['eps'].astype(float)
            financial_df['roe'] = financial_df['roe'].str.replace('%', '').astype(float)
            financial_df['bps'] = financial_df['bps'].astype(float)
            # calculate ttm eps and ttm roe
            financial_df['bps_ttm'] = financial_df['bps'].rolling(window=4).mean()
            financial_df['eps_ttm'] = financial_df['eps'].rolling(window=4).sum()
            financial_df['roe_ttm'] = financial_df['roe'].rolling(window=4).sum()
            # drop the values with null values
            financial_df.dropna(inplace=True)
            
            # --- merge the financial data with standardized report dates ---
            # standardize the report dates
            date_df = pd.DataFrame(pd.date_range(start='2010-12-31', end='2025-12-31', freq='ME'), columns=['report_date'])
            financial_date = pd.merge(date_df, financial_df, on='report_date', how='left', validate="1:1")
            financial_date.to_csv(f"../data/input/financial-indicators/20251213/financial_indicators_{stock_code}_{today}.csv", index=False)
            MISSED_CODE.remove(stock_code)
        except: 
            # MISSED_CODE.append(stock_code)
            continue

124


 65%|██████▌   | 81/124 [06:56<03:41,  5.14s/it]


81


 65%|██████▌   | 53/81 [04:19<02:16,  4.89s/it]


52


 67%|██████▋   | 35/52 [15:57<07:44, 27.35s/it]   


34


 65%|██████▍   | 22/34 [01:36<00:52,  4.38s/it]


21


 76%|███████▌  | 16/21 [01:42<00:31,  6.38s/it]


15


 73%|███████▎  | 11/15 [01:17<00:28,  7.03s/it]


11


 55%|█████▍    | 6/11 [00:47<00:39,  7.94s/it]


6


 83%|████████▎ | 5/6 [00:39<00:07,  7.89s/it]


4


100%|██████████| 4/4 [00:28<00:00,  7.11s/it]


3


 67%|██████▋   | 2/3 [00:08<00:04,  4.32s/it]


2


 50%|█████     | 1/2 [00:03<00:03,  3.87s/it]


1


100%|██████████| 1/1 [00:05<00:00,  5.87s/it]


In [6]:
today = pd.to_datetime("today").strftime("%Y%m%d")

# --- get the financial data ---
financial_df = ak.stock_financial_abstract_ths(symbol=f"002230", indicator="按单季度")
# select the key indicators
financial_df = financial_df[['报告期', '每股净资产', '基本每股收益', '净资产收益率']]
# rename the columns
financial_df.columns = ['report_date', 'bps', 'eps', 'roe']
# chage the date format
financial_df['report_date'] = pd.to_datetime(financial_df['report_date'])
# choose the date later than 2010-01-01
financial_df = financial_df[financial_df['report_date'] >= '2010-01-01']
# change the data format
financial_df['eps'] = financial_df['eps'].astype(float)
financial_df['roe'] = financial_df['roe'].str.replace('%', '').astype(float)
financial_df['bps'] = financial_df['bps'].astype(float)
# calculate ttm eps and ttm roe
financial_df['bps_ttm'] = financial_df['bps'].rolling(window=4).mean()
financial_df['eps_ttm'] = financial_df['eps'].rolling(window=4).sum()
financial_df['roe_ttm'] = financial_df['roe'].rolling(window=4).sum()
# drop the values with null values
financial_df.dropna(inplace=True)

# --- merge the financial data with standardized report dates ---
# standardize the report dates
date_df = pd.DataFrame(pd.date_range(start='2010-12-31', end='2025-12-31', freq='ME'), columns=['report_date'])
financial_date = pd.merge(date_df, financial_df, on='report_date', how='left', validate="1:1")
financial_date.to_csv(f"../data/input/financial-indicators/20251213/financial_indicators_002230_{today}.csv", index=False)

300