In [1]:
import os
import yaml
import pandas as pd
from collections import defaultdict # Avoids KeyErrors when accessing non-existent keys and automatically initializes values for missing keys
from collections.abc import Mapping  # Ensure dictionary flattening works
import glob



In [None]:
# Define parent directory containing YAML folders
parent_directory = r"C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Data-20250430T011355Z-1-001\Data\data"

# Dictionary to store data categorized by Ticker
ticker_data = defaultdict(list) #defaultdict(list) creates an empty list by default when the key is missing

def singleLevel_dict(d, parent_key="", sep="_"):
    """Recursively flattens nested dictionary structures 
    instead of deeply nested dictionary converts into a single-level dictionary where keys represent the original hierarchy,
     d as in dictionary, k as in keys and v as in values"""
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, Mapping):
            items.extend(singleLevel_dict(v, new_key, sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

# Loop through all folders and files
for root, _, files in os.walk(parent_directory):
    for filename in files:
        if filename.endswith(".yml") or filename.endswith(".yaml"):
            file_path = os.path.join(root, filename)
            
            with open(file_path, "r") as file: # With ensures the file closes properly, even if an error occurs
                yaml_data = yaml.safe_load(file)

                # Handle list-based YAML structures
                if isinstance(yaml_data, list):
                    for item in yaml_data:
                        ticker = item.get("Ticker")
                        if ticker:  # Skip files without a valid ticker
                            other_data = singleLevel_dict(item)  # Flatten nested data if needed
                            ticker_data[ticker].append({"Source Folder": root, **other_data})
                elif isinstance(yaml_data, dict):
                    ticker = yaml_data.get("Ticker")
                    if ticker:  # Ensure valid ticker exists
                        other_data = singleLevel_dict(yaml_data)  # Flatten nested data if needed
                        ticker_data[ticker].append({"Source Folder": root, **other_data})

# Generate CSV files for each Ticker
# output_folder = r"C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data"
# os.makedirs(output_folder, exist_ok=True)  # Ensure output directory exists

for ticker, records in ticker_data.items():
    df = pd.DataFrame(records)
    output_file = os.path.join(output_folder, f"{ticker}.csv")
    df.to_csv(output_file, index=False)
    print(f"Saved {output_file}")

print("Extraction complete! CSV files are generated.")


Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\SBIN.csv
Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\BAJFINANCE.csv
Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\TITAN.csv
Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\ITC.csv
Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\TCS.csv
Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\LT.csv
Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\TATACONSUM.csv
Saved C:\Users\v-dhramaraj\Desktop\Python\Projects\Assignment2_StockAnalysis\Assignment2_StockAnalysis\Nifty50_data\