# Data Collection for Pair Trading (VN Stocks)

This notebook collects stock data for Vietnamese stocks using `vnstock` (version 3.x).

In [1]:
from vnstock import Vnstock
import pandas as pd
from datetime import datetime
import os

def get_vn_stock_data(tickers, start_date, end_date, interval='15m'):
    """
    Fetches historical stock data for given tickers using vnstock 3.x.
    
    Args:
        tickers (list): List of ticker symbols (e.g., ['VCB', 'BID'])
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format
        interval (str): Data interval (e.g., '15m', '1D')
        
    Returns:
        dict: Dictionary of DataFrames for each ticker
    """
    data = {}
    print(f"Fetching data for {tickers} from {start_date} to {end_date} with interval {interval}...")
    
    # Initialize Vnstock
    stock_client = Vnstock()
    
    for ticker in tickers:
        try:
            # Use VCI source as it supports history with interval
            stock = stock_client.stock(symbol=ticker, source='VCI')
            df = stock.quote.history(start=start_date, end=end_date, interval=interval)
            
            if df is None or df.empty:
                print(f"Warning: No data found for {ticker}.")
            else:
                print(f"Successfully fetched {len(df)} rows for {ticker}.")
                # Convert time column if needed
                if 'time' in df.columns:
                    df['time'] = pd.to_datetime(df['time'])
                    print(f"Date range: {df['time'].min()} to {df['time'].max()}")
                
                data[ticker] = df
                # Save to CSV
                filename = f"{ticker}_{interval}_{start_date}_{end_date}.csv"
                df.to_csv(filename, index=False)
                print(f"Saved to {filename}")
                
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")
            
    return data

# Example usage
tickers = ['VCB', 'BID', 'CTG', 'TCB', 'MBB', 'ACB'] # Example pair: Vietcombank and BIDV
start_date = '2022-02-01'
end_date = datetime.now().strftime('%Y-%m-%d')
interval = '15m'

stock_data = get_vn_stock_data(tickers, start_date, end_date, interval)

# Display first few rows
if tickers[0] in stock_data:
    display(stock_data[tickers[0]].head())
    display(stock_data[tickers[0]].tail())

Fetching data for ['VCB', 'BID', 'CTG', 'TCB', 'MBB', 'ACB'] from 2022-02-01 to 2025-12-05 with interval 15m...
Successfully fetched 78359 rows for VCB.
Date range: 2023-09-11 09:15:00 to 2025-12-05 14:45:00
Saved to VCB_15m_2022-02-01_2025-12-05.csv
Successfully fetched 78359 rows for BID.
Date range: 2023-09-11 09:15:00 to 2025-12-05 14:45:00
Saved to BID_15m_2022-02-01_2025-12-05.csv
Successfully fetched 78359 rows for CTG.
Date range: 2023-09-11 09:15:00 to 2025-12-05 14:45:00
Saved to CTG_15m_2022-02-01_2025-12-05.csv
Successfully fetched 78359 rows for TCB.
Date range: 2023-09-11 09:15:00 to 2025-12-05 14:45:00
Saved to TCB_15m_2022-02-01_2025-12-05.csv
Successfully fetched 78359 rows for MBB.
Date range: 2023-09-11 09:15:00 to 2025-12-05 14:45:00
Saved to MBB_15m_2022-02-01_2025-12-05.csv
Successfully fetched 78359 rows for ACB.
Date range: 2023-09-11 09:15:00 to 2025-12-05 14:45:00
Saved to ACB_15m_2022-02-01_2025-12-05.csv


Unnamed: 0,time,open,high,low,close,volume
0,2023-09-11 09:15:00,59.5,59.7,59.5,59.7,50200
1,2023-09-11 09:30:00,59.7,59.77,59.63,59.77,51300
2,2023-09-11 09:45:00,59.77,59.77,59.7,59.7,138700
3,2023-09-11 10:00:00,59.7,59.77,59.63,59.7,54600
4,2023-09-11 10:15:00,59.7,59.7,59.63,59.63,46700


Unnamed: 0,time,open,high,low,close,volume
78354,2025-12-05 13:45:00,58.5,58.6,58.4,58.4,192000
78355,2025-12-05 14:00:00,58.5,58.5,58.3,58.3,310200
78356,2025-12-05 14:15:00,58.3,58.4,58.2,58.3,401200
78357,2025-12-05 14:30:00,,,,,0
78358,2025-12-05 14:45:00,58.3,58.3,58.3,58.3,315000


In [None]:
# T√°ch m·ªói lo·∫°i c·ªï phi·∫øu th√†nh 1 dataframe
df_VCB = stock_data['VCB']
df_BID = stock_data['BID']
df_CTG = stock_data['CTG']
df_TCB = stock_data['TCB']
df_MBB = stock_data['MBB']
df_ACB = stock_data['ACB']

print(f"VCB: {len(df_VCB)} rows")
print(f"BID: {len(df_BID)} rows")
print(f"CTG: {len(df_CTG)} rows")
print(f"TCB: {len(df_TCB)} rows")
print(f"MBB: {len(df_MBB)} rows")
print(f"ACB: {len(df_ACB)} rows")

VCB: 78359 rows
BID: 78359 rows
CTG: 78359 rows
TCB: 78359 rows
MBB: 78359 rows
ACB: 78359 rows


In [4]:
# X√≥a b·ªè d√≤ng c√≥ c·ªôt 'close' r·ªóng
df_VCB = df_VCB.dropna(subset=['close'])
df_BID = df_BID.dropna(subset=['close'])
df_CTG = df_CTG.dropna(subset=['close'])
df_TCB = df_TCB.dropna(subset=['close'])
df_MBB = df_MBB.dropna(subset=['close'])
df_ACB = df_ACB.dropna(subset=['close'])

# Reset index cho dataframe
df_VCB = df_VCB.reset_index(drop=True)
df_BID = df_BID.reset_index(drop=True)
df_CTG = df_CTG.reset_index(drop=True)
df_TCB = df_TCB.reset_index(drop=True)
df_MBB = df_MBB.reset_index(drop=True)
df_ACB = df_ACB.reset_index(drop=True)

In [6]:
# L∆∞u t·ª´ng dataframe th√†nh file CSV
df_VCB.to_csv(f"VCB_cleaned.csv", index=False)
df_BID.to_csv(f"BID_cleaned.csv", index=False)
df_CTG.to_csv(f"CTG_cleaned.csv", index=False)
df_TCB.to_csv(f"TCB_cleaned.csv", index=False)
df_MBB.to_csv(f"MBB_cleaned.csv", index=False)
df_ACB.to_csv(f"ACB_cleaned.csv", index=False)

print("Saved all dataframes to CSV files")

Saved all dataframes to CSV files
