In [None]:
import os
import yaml
import calendar
import pandas as pd

BASE_DIR = 'C:/Users/dell/Desktop/project/stock_market_project/data'
OUTPUT_DIR = 'ticker_csvs' 
os.makedirs(OUTPUT_DIR, exist_ok=True)

def read_yaml_if_exists(folder_path, date_str):
    # Read YAML file starting with the given date
    for file in os.listdir(folder_path):
        if file.startswith(date_str) and file.endswith('.yaml'):
            file_path = os.path.join(folder_path, file)
            with open(file_path, 'r') as f:
                return yaml.safe_load(f)
    return None

def get_all_data():
    df = pd.DataFrame()
    for folder in sorted(os.listdir(BASE_DIR)):
        folder_path = os.path.join(BASE_DIR, folder)

        try:
            # Extract year and month from folder name like "2023-10"
            year, month = map(int, folder.split('-'))
        except ValueError:
            print(f"Skipping folder {folder} (invalid name format)")
            continue

        # Get number of days in the month
        first_weekday, last_day = calendar.monthrange(year, month) # first_weekday(0-monday) not used

        # Loop through all days in that month
        for day in range(1, last_day + 1):
            date_str = f"{year}-{month:02d}-{day:02d}"
            data = read_yaml_if_exists(folder_path, date_str)
            
            if data and isinstance(data, list):
                daily_df = pd.DataFrame(data)
                df = pd.concat([df, daily_df], ignore_index=True)

    # Final DataFrame
    print(f"\nTotal rows loaded: {len(df)}") 
    for ticker, group in df.groupby('Ticker'):
        ticker_file = os.path.join(OUTPUT_DIR, f"{ticker}.csv")
        group.to_csv(ticker_file, index=False)
        

# Run it
get_all_data()



Total rows loaded: 14200
