In [3]:
import os
import yaml
import pandas as pd
from collections import defaultdict

def load_yaml_file(file_path):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)

def extract_stock_data_without_sector(root_dir):
    symbol_data = defaultdict(list)

    for month_folder in sorted(os.listdir(root_dir)):
        month_path = os.path.join(root_dir, month_folder)

        if not os.path.isdir(month_path):
            continue

        for filename in sorted(os.listdir(month_path)):
            if filename.endswith('.yaml'):
                file_path = os.path.join(month_path, filename)

                try:
                    data_entries = load_yaml_file(file_path)
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
                    continue

                if not isinstance(data_entries, list):
                    print(f"‚ö†Ô∏è Skipped non-list YAML: {file_path}")
                    continue

                for entry in data_entries:
                    symbol = entry.get('Ticker')  # ‚úîÔ∏è Correct key here
                    if symbol is None:
                        continue

                    record = {
                        'date': entry.get('date'),
                        'symbol': symbol,
                        'open': entry.get('open'),
                        'close': entry.get('close'),
                        'high': entry.get('high'),
                        'low': entry.get('low'),
                        'volume': entry.get('volume'),
                        'month': entry.get('month')  # Already included
                    }
                    symbol_data[symbol].append(record)

    return symbol_data

def save_symbol_csvs(symbol_data, output_dir='output_csvs'):
    os.makedirs(output_dir, exist_ok=True)

    for symbol, records in symbol_data.items():
        df = pd.DataFrame(records)
        df.sort_values(by='date', inplace=True)
        df.to_csv(os.path.join(output_dir, f'{symbol}.csv'), index=False)
        print(f"‚úÖ Saved: {symbol}.csv with {len(df)} records")

def main():
    root_yaml_dir = 'data'  # ‚ö†Ô∏è Change this to your folder containing month folders
    print("üîÑ Extracting stock data (without sector)...")
    all_symbol_data = extract_stock_data_without_sector(root_yaml_dir)
    save_symbol_csvs(all_symbol_data)
    print("üéâ Step 1 done: CSVs created.")

if __name__ == '__main__':
    main()


üîÑ Extracting stock data (without sector)...
‚úÖ Saved: SBIN.csv with 284 records
‚úÖ Saved: BAJFINANCE.csv with 284 records
‚úÖ Saved: TITAN.csv with 284 records
‚úÖ Saved: ITC.csv with 284 records
‚úÖ Saved: TCS.csv with 284 records
‚úÖ Saved: LT.csv with 284 records
‚úÖ Saved: TATACONSUM.csv with 284 records
‚úÖ Saved: RELIANCE.csv with 284 records
‚úÖ Saved: HCLTECH.csv with 284 records
‚úÖ Saved: JSWSTEEL.csv with 284 records
‚úÖ Saved: ULTRACEMCO.csv with 284 records
‚úÖ Saved: POWERGRID.csv with 284 records
‚úÖ Saved: INFY.csv with 284 records
‚úÖ Saved: TRENT.csv with 284 records
‚úÖ Saved: BHARTIARTL.csv with 284 records
‚úÖ Saved: TATAMOTORS.csv with 284 records
‚úÖ Saved: WIPRO.csv with 284 records
‚úÖ Saved: TECHM.csv with 284 records
‚úÖ Saved: NTPC.csv with 284 records
‚úÖ Saved: HINDUNILVR.csv with 284 records
‚úÖ Saved: APOLLOHOSP.csv with 284 records
‚úÖ Saved: M&M.csv with 284 records
‚úÖ Saved: GRASIM.csv with 284 records
‚úÖ Saved: ICICIBANK.csv with 284 records
‚

In [4]:
def add_sector_to_csvs(csv_dir='output_csvs', sector_mapping=None):
    if sector_mapping is None:
        print("‚ö†Ô∏è No sector mapping provided.")
        return

    for filename in os.listdir(csv_dir):
        if filename.endswith('.csv'):
            symbol = filename.replace('.csv', '')
            filepath = os.path.join(csv_dir, filename)

            try:
                df = pd.read_csv(filepath)
                df['sector'] = sector_mapping.get(symbol, 'Unknown')
                df.to_csv(filepath, index=False)
                print(f"‚úÖ Updated: {filename} with sector")
            except Exception as e:
                print(f"‚ùå Failed to update {filename}: {e}")

def main():
    # üß≠ Add your actual sector mapping below
    sector_mapping = {
        "RELIANCE": "Energy",
    "TCS": "IT",
    "INFY": "IT",
    "HDFCBANK": "Financials",
    "ICICIBANK": "Financials",
    "SBIN": "Financials",
    "ITC": "FMCG",
    "HINDUNILVR": "FMCG",
    "LT": "Capital Goods",
    "AXISBANK": "Financials",
    "KOTAKBANK": "Financials",
    "WIPRO": "IT",
    "BHARTIARTL": "Telecom",
    "ASIANPAINT": "Consumer Durables",
    "ONGC": "Energy",
    "COALINDIA": "Energy",
    "NTPC": "Utilities",
    "POWERGRID": "Utilities",
    "ADANIENT": "Conglomerate",
    "ADANIPORTS": "Transportation",
    "CIPLA": "Pharma",
    "DRREDDY": "Pharma",
    "TATAMOTORS": "Automobile",
    "TATASTEEL": "Metals",
    "JSWSTEEL": "Metals",
    "ULTRACEMCO": "Cement",
    "BAJFINANCE": "Financials",
    "BAJAJFINSV": "Financials",
    "HCLTECH": "IT",
    "MARUTI": "Automobile",
    "HDFCLIFE": "Insurance",
    "DIVISLAB": "Pharma",
    "EICHERMOT": "Automobile",
    "BAJAJ-AUTO": "Automobile",
    "HEROMOTOCO": "Automobile",
    "HINDALCO": "Metals",
    "GRASIM": "Cement",
    "SBILIFE": "Insurance",
    "TECHM": "IT",
    "BRITANNIA": "FMCG",
    "INDUSINDBK": "Financials",
    "NESTLEIND": "FMCG",
    "TITAN": "Consumer Durables",
    "UPL": "Chemicals",
    "APOLLOHOSP": "Healthcare",
    "TRENT": "Retail",
    "TATACONSUM": "FMCG",
    "SUNPHARMA": "Pharma",
    "SHRIRAMFIN": "Financials",
    "M&M": "Automobile",
    "BPCL": "Energy",
    "BEL": "Defense"
        
    }

    print("üõ†Ô∏è Adding sector info to CSVs...")
    add_sector_to_csvs(csv_dir='output_csvs', sector_mapping=sector_mapping)
    print("üéâ Step 2 done: Sector column added.")

if __name__ == '__main__':
    main()


üõ†Ô∏è Adding sector info to CSVs...
‚úÖ Updated: ADANIENT.csv with sector
‚úÖ Updated: ADANIPORTS.csv with sector
‚úÖ Updated: APOLLOHOSP.csv with sector
‚úÖ Updated: ASIANPAINT.csv with sector
‚úÖ Updated: AXISBANK.csv with sector
‚úÖ Updated: BAJAJ-AUTO.csv with sector
‚úÖ Updated: BAJAJFINSV.csv with sector
‚úÖ Updated: BAJFINANCE.csv with sector
‚úÖ Updated: BEL.csv with sector
‚úÖ Updated: BHARTIARTL.csv with sector
‚úÖ Updated: BPCL.csv with sector
‚úÖ Updated: BRITANNIA.csv with sector
‚úÖ Updated: CIPLA.csv with sector
‚úÖ Updated: COALINDIA.csv with sector
‚úÖ Updated: DRREDDY.csv with sector
‚úÖ Updated: EICHERMOT.csv with sector
‚úÖ Updated: GRASIM.csv with sector
‚úÖ Updated: HCLTECH.csv with sector
‚úÖ Updated: HDFCBANK.csv with sector
‚úÖ Updated: HDFCLIFE.csv with sector
‚úÖ Updated: HEROMOTOCO.csv with sector
‚úÖ Updated: HINDALCO.csv with sector
‚úÖ Updated: HINDUNILVR.csv with sector
‚úÖ Updated: ICICIBANK.csv with sector
‚úÖ Updated: INDUSINDBK.csv with sector
‚úÖ 