In [None]:
from vnstock3 import Vnstock
import pandas as pd

# Khởi tạo đối tượng Vnstock
vnstock = Vnstock()

# Lấy danh sách các mã cổ phiếu trong VN30
vn30_list = list(vnstock.stock(symbol='VN30F1M', source='TCBS').listing.symbols_by_group('VN30'))

# Các cột chỉ số cần lấy (cập nhật nếu cần)
income_statement_cols = [
    'revenue',
    'operation_profit',
    'pre_tax_profit',
    'post_tax_profit',
    'share_holder_income',
    'operation_income'
]

ratio_cols = [
    'price_to_earning',
    'price_to_book',
    'roe',
    'roa',
    'earning_per_share',
    'book_value_per_share'
]

balance_sheet_cols = [
    # Cập nhật tên cột dựa trên dữ liệu thực tế
    'asset',
    'debt',
    'equity',
    'short_asset',
    'short_debt',
    'cash',
    'inventory',
    'fixed_asset',
    'capital',
    'un_distributed_income'
]

cash_flow_cols = [
    # Cập nhật tên cột dựa trên dữ liệu thực tế
    'free_cash_flow',
    'from_invest',
    'from_financial',
    'from_sale',
    'invest_cost'
]

# Tạo các dictionary để lưu trữ dữ liệu
income_statement_data = {}
ratio_data = {}
balance_sheet_data = {}
cash_flow_data = {}

# Duyệt qua từng mã cổ phiếu trong VN30
for symbol in vn30_list:
    try:
        company = vnstock.stock(symbol=symbol, source='TCBS')
        
        # Lấy dữ liệu Báo cáo Kết quả Kinh doanh
        income_df = company.finance.income_statement(period='quarter')
        # Đảm bảo 'year' và 'quarter' có trong index
        income_df['year'] = income_df['year'].astype(str)
        income_df['quarter'] = income_df['quarter'].astype(str)
        income_df['period'] = income_df['year'] + 'Q' + income_df['quarter']
        income_df.set_index('period', inplace=True)
        income_df_selected = income_df[income_statement_cols]
        income_df_selected.columns = [f"{col}_{symbol}" for col in income_df_selected.columns]
        income_statement_data[symbol] = income_df_selected
        
        # Lấy dữ liệu Chỉ số Tài chính
        ratio_df = company.finance.ratio(period='quarter')
        ratio_df['year'] = ratio_df['year'].astype(str)
        ratio_df['quarter'] = ratio_df['quarter'].astype(str)
        ratio_df['period'] = ratio_df['year'] + 'Q' + ratio_df['quarter']
        ratio_df.set_index('period', inplace=True)
        ratio_df_selected = ratio_df[ratio_cols]
        ratio_df_selected.columns = [f"{col}_{symbol}" for col in ratio_df_selected.columns]
        ratio_data[symbol] = ratio_df_selected

        # Lấy dữ liệu Bảng Cân Đối Kế Toán
        balance_sheet_df = company.finance.balance_sheet(period='quarter')
        balance_sheet_df['year'] = balance_sheet_df['year'].astype(str)
        balance_sheet_df['quarter'] = balance_sheet_df['quarter'].astype(str)
        balance_sheet_df['period'] = balance_sheet_df['year'] + 'Q' + balance_sheet_df['quarter']
        balance_sheet_df.set_index('period', inplace=True)
        balance_sheet_cols_updated = [col for col in balance_sheet_cols if col in balance_sheet_df.columns]
        balance_sheet_df_selected = balance_sheet_df[balance_sheet_cols_updated]
        balance_sheet_df_selected.columns = [f"{col}_{symbol}" for col in balance_sheet_df_selected.columns]
        balance_sheet_data[symbol] = balance_sheet_df_selected

        # Lấy dữ liệu Báo Cáo Lưu Chuyển Tiền Tệ
        cash_flow_df = company.finance.cash_flow(period='quarter')
        cash_flow_df['year'] = cash_flow_df['year'].astype(str)
        cash_flow_df['quarter'] = cash_flow_df['quarter'].astype(str)
        cash_flow_df['period'] = cash_flow_df['year'] + 'Q' + cash_flow_df['quarter']
        cash_flow_df.set_index('period', inplace=True)
        cash_flow_cols_updated = [col for col in cash_flow_cols if col in cash_flow_df.columns]
        cash_flow_df_selected = cash_flow_df[cash_flow_cols_updated]
        cash_flow_df_selected.columns = [f"{col}_{symbol}" for col in cash_flow_df_selected.columns]
        cash_flow_data[symbol] = cash_flow_df_selected

    except Exception as e:
        print(f"Lỗi khi lấy dữ liệu cho mã {symbol}: {e}")

# Kiểm tra và xử lý index không duy nhất
def ensure_unique_index(data_dict):
    for symbol, df in data_dict.items():
        if not df.index.is_unique:
            df = df[~df.index.duplicated(keep='first')]
            data_dict[symbol] = df

ensure_unique_index(income_statement_data)
ensure_unique_index(ratio_data)
ensure_unique_index(balance_sheet_data)
ensure_unique_index(cash_flow_data)

# Kết hợp dữ liệu
def concatenate_data(data_dict):
    if data_dict:
        return pd.concat(data_dict.values(), axis=1)
    else:
        return pd.DataFrame()

income_statement_combined = concatenate_data(income_statement_data)
ratio_combined = concatenate_data(ratio_data)
balance_sheet_combined = concatenate_data(balance_sheet_data)
cash_flow_combined = concatenate_data(cash_flow_data)

# Kiểm tra kích thước DataFrame
print(f"income_statement_combined.shape: {income_statement_combined.shape}")
print(f"ratio_combined.shape: {ratio_combined.shape}")
print(f"balance_sheet_combined.shape: {balance_sheet_combined.shape}")
print(f"cash_flow_combined.shape: {cash_flow_combined.shape}")

# Tính trung bình
def calculate_average(df_combined, cols):
    average_df = pd.DataFrame(index=df_combined.index)
    for col in cols:
        # Tìm tất cả các cột bắt đầu với tên chỉ số
        col_list = [c for c in df_combined.columns if c.startswith(col + '_')]
        if col_list:
            df_combined[col_list] = df_combined[col_list].apply(pd.to_numeric, errors='coerce')
            average_df[col] = df_combined[col_list].mean(axis=1)
        else:
            print(f"Không có dữ liệu cho chỉ số {col}")
    return average_df

average_income_statement = calculate_average(income_statement_combined, income_statement_cols)
average_ratio = calculate_average(ratio_combined, ratio_cols)
average_balance_sheet = calculate_average(balance_sheet_combined, balance_sheet_cols)
average_cash_flow = calculate_average(cash_flow_combined, cash_flow_cols)

# Kết hợp tất cả các DataFrame trung bình
average_df = pd.concat([average_income_statement, average_ratio, average_balance_sheet, average_cash_flow], axis=1)

# Hiển thị các cột trong DataFrame kết quả
print(average_df.columns)


In [27]:
average_df

Unnamed: 0_level_0,revenue,operation_profit,pre_tax_profit,post_tax_profit,share_holder_income,operation_income,price_to_earning,price_to_book,roe,roa,...,cash,inventory,fixed_asset,capital,un_distributed_income,free_cash_flow,from_invest,from_financial,from_sale,invest_cost
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024Q2,8281.666667,10435.666667,5125.416667,4100.416667,4058.666667,7087.750000,22.280,2.172,0.15448,0.03684,...,7105.76,13926.307692,12356.16,31535.12,14213.32,3257.68,-167.00,299.88,8464.44,-653.52
2024Q1,7901.916667,9928.500000,4945.250000,3959.083333,3925.500000,6899.083333,31.044,2.076,0.14772,0.03560,...,6199.28,14110.461538,12385.36,29501.36,13946.12,2341.04,-1258.88,739.04,-3580.04,-1130.96
2023Q4,7858.166667,10006.583333,4727.166667,3755.833333,3726.500000,6484.166667,36.608,1.892,0.15148,0.03540,...,6448.40,13264.846154,12499.64,29501.36,12713.00,3259.28,-1455.96,1931.16,8300.72,-857.96
2023Q3,7155.833333,9264.416667,4252.750000,3412.750000,3385.916667,6181.250000,30.168,2.032,0.15036,0.03508,...,5907.96,13822.692308,12466.92,28590.40,11827.04,2736.08,-730.00,-648.96,4310.48,-484.68
2023Q2,7150.000000,9240.000000,4381.166667,3504.500000,3475.500000,6158.833333,19.740,1.996,0.15724,0.03600,...,6340.64,13364.846154,12111.96,27369.52,11349.88,3072.80,-833.44,-272.00,94.56,-562.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2006Q2,73.500000,90.500000,90.500000,65.500000,65.500000,90.500000,,,,,...,2290.00,,505.00,1900.00,312.00,,,,,
2006Q1,60.500000,52.500000,52.500000,38.000000,38.000000,52.500000,,,,,...,1607.00,,477.00,1251.00,131.00,,,,,
2005Q2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,,,,,...,,,,,,,,,,
2005Q1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,,,,,...,,,,,,,,,,


In [21]:
average_df.columns

Index(['revenue', 'operation_profit', 'pre_tax_profit', 'post_tax_profit',
       'share_holder_income', 'operation_income', 'price_to_earning',
       'price_to_book', 'roe', 'roa', 'earning_per_share',
       'book_value_per_share', 'asset', 'debt', 'equity', 'cash',
       'fixed_asset', 'capital', 'un_distributed_income', 'free_cash_flow',
       'from_invest', 'from_financial', 'from_sale', 'invest_cost',
       'debt_to_equity_ratio', 'net_profit_margin', 'operating_margin',
       'asset_turnover_ratio', 'cash_ratio', 'earnings_yield', 'roce'],
      dtype='object')

In [22]:
cash_flow_df

Unnamed: 0_level_0,quarter,year,invest_cost,from_invest,from_financial,from_sale,free_cash_flow
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024Q2,2,2024,-44,-43,-1649,7393,0.0
2024Q1,1,2024,-83,-83,-1522,-799,0.0
2023Q4,4,2023,-75,-74,0,871,0.0
2023Q3,3,2023,-74,-71,0,1088,0.0
2023Q2,2,2023,-60,-59,-1054,21437,0.0
2023Q1,1,2023,-14,-14,-2108,6423,0.0
2022Q4,4,2022,-74,-74,0,6987,0.0
2022Q3,3,2022,-86,-84,0,-17869,0.0
2022Q2,2,2022,-56,-54,0,9687,0.0
2022Q1,1,2022,-25,-25,0,3113,0.0


In [None]:
# Tính Debt-to-Equity Ratio
average_df['debt_to_equity_ratio'] = average_df['debt'] / average_df['equity']

# Tính Net Profit Margin
average_df['net_profit_margin'] = average_df['post_tax_profit'] / average_df['revenue']

# Tính Operating Margin
average_df['operating_margin'] = average_df['operation_profit'] / average_df['revenue']

# Tính Asset Turnover Ratio
average_df['asset_turnover_ratio'] = average_df['revenue'] / average_df['asset']

# Tính Cash Ratio
average_df['cash_ratio'] = average_df['cash'] / average_df['debt']

# Tính Earnings Yield
average_df['earnings_yield'] = 1 / average_df['price_to_earning']

# Tính Return on Capital Employed (ROCE)
average_df['roce'] = average_df['operation_profit'] / (average_df['asset'] - average_df['debt'])  # Nếu không có 'short_debt'

In [None]:
average_df.to_csv("financial_indicators.csv")