In [1]:
# request to 
# https://m.cafef.vn/du-lieu/Ajax/PageNew/DataHistory/PriceHistory.ashx?Symbol=ACB&StartDate=&EndDate=&PageIndex=1&PageSize=20
import requests
import pandas as pd


In [2]:
def get_stock_data(symbol, start_date='', end_date='', page_index=1, page_size=999999):
    url = 'https://m.cafef.vn/du-lieu/Ajax/PageNew/DataHistory/PriceHistory.ashx'
    params = {
        'Symbol': symbol,
        'StartDate': start_date,
        'EndDate': end_date,
        'PageIndex': page_index,
        'PageSize': page_size
    }

    response = requests.get(url, params=params)
    response.raise_for_status()  # Raise an error for bad responses

    data = response.json()

    count = data['Data']['TotalCount']
    stock_data = data['Data']['Data']

    df = pd.DataFrame(stock_data)
    df['Date'] = pd.to_datetime(df['Ngay'], format="%d/%m/%Y")
    df[['GiaThayDoi', 'ThayDoiPhanTram']] = df['ThayDoi'].str.extract(r'([-+]?\d*\.?\d+)\(([-+]?\d*\.?\d+)\s?%\)')
    df['GiaThayDoi'] = pd.to_numeric(df['GiaThayDoi'])
    df['ThayDoiPhanTram'] = pd.to_numeric(df['ThayDoiPhanTram'])
    df['Stock'] = symbol
    new_order = [
        'Date', 'Ngay', 'Stock', 'GiaDieuChinh', 'GiaDongCua', 'GiaMoCua', 'GiaCaoNhat', 'GiaThapNhat', 'GiaThayDoi', 'ThayDoiPhanTram',
        'ThayDoi',  'KhoiLuongKhopLenh', 'GiaTriKhopLenh', 'KLThoaThuan',
        'GtThoaThuan', 
    ]
    df = df[new_order]
    return df

In [3]:
stock_list = ['ACB', 'HPG', 'KSB', 'NLG', 'VNM']
df_list = []

for stock in stock_list:
    try:
        df = get_stock_data(stock)
        df.to_csv(f'{stock}_data.csv', index=False)
        df_list.append(df)
    except Exception as e:
        print(f"Error fetching data for {stock}: {e}")

In [4]:
# Get min date from each DataFrame
min_dates = [df['Date'].min() for df in df_list if not df.empty]
if min_dates:
    min_date = min(min_dates)
    print(f"Minimum date across all stocks: {min_date}")

Minimum date across all stocks: 2006-01-19 00:00:00


In [5]:
min_dates

[Timestamp('2006-11-21 00:00:00'),
 Timestamp('2007-11-15 00:00:00'),
 Timestamp('2010-01-20 00:00:00'),
 Timestamp('2013-04-08 00:00:00'),
 Timestamp('2006-01-19 00:00:00')]