## Total Data Merge

### Bhavcopy Merge

In [16]:
import os
import pandas as pd
from datetime import datetime

def get_file_path(exchange_type, date_str_ddmmyyyy):
    # Parse the date input
    date_obj = datetime.strptime(date_str_ddmmyyyy, "%d%m%Y")
    date_str_file = date_obj.strftime("%Y%m%d")
    month_folder_abbr = date_obj.strftime("%b_%y").upper()   # e.g., JUN_25
    month_folder_full = date_obj.strftime("%B_%y").upper()   # e.g., JUNE_25
    month_folder_title = date_obj.strftime("%B")             # e.g., June

    base_path = r"E:\Back-Up Files\Data Team\BSE - NSE"

    if exchange_type == "BSE_FO":
        folder = os.path.join(base_path, f"Bhavcopy 2025\\{month_folder_full}\\BSE")
        filename = f"BhavCopy_BSE_FO_0_0_0_{date_str_file}_F_0000.csv"

    elif exchange_type == "NSE_FO":
        folder = os.path.join(base_path, f"Bhavcopy 2025\\{month_folder_full}\\NSE")
        filename = f"BhavCopy_NSE_FO_0_0_0_{date_str_file}_F_0000.csv"

    elif exchange_type == "NSE_CM":
        folder = os.path.join(base_path, f"Bhavcopy 2025\\CM_BHAVCOPY_25\\{month_folder_abbr}\\NSE_CM")
        filename = f"BhavCopy_NSE_CM_0_0_0_{date_str_file}_F_0000.csv"

    elif exchange_type == "BSE_CM":
        folder = os.path.join(base_path, f"Bhavcopy 2025\\CM_BHAVCOPY_25\\{month_folder_abbr}\\BSE_CM")
        filename = f"BhavCopy_BSE_CM_0_0_0_{date_str_file}_F_0000.csv"

    elif exchange_type == "GT_FO":
        folder = os.path.join(base_path, f"GT-Bhavcopy 2025\\{month_folder_title}")
    # Change date format to YYMMDD for GT files:
        date_str_yt = date_obj.strftime("%y%m%d")  # e.g. 250625
        filename = f"G_T_Bhavcopy_FO_{date_str_yt}.csv"


    elif exchange_type == "CN_FO":
        folder = os.path.join(base_path, f"CN-Bhavcopy 2025\\{month_folder_title}")
        filename = f"F_CN01_NSE_{date_str_ddmmyyyy}.csv"  # <--- Use DDMMYYYY here!

    elif exchange_type == "MS_FO":
        folder = os.path.join(base_path, f"MS-Bhavcopy 2025\\{month_folder_title}")
        filename = f"MS_{date_str_file}_01_Symbol.csv"  # Adjusted filename format based on your example

    else:
        raise ValueError(f"❌ Unknown exchange type: {exchange_type}")

    return os.path.join(folder, filename)


def merge_files(date_str_ddmmyyyy, file_types=None):
    if file_types is None:
        file_types = ["BSE_FO", "NSE_FO", "NSE_CM", "BSE_CM", "GT_FO", "CN_FO", "MS_FO"]

    merged_df = pd.DataFrame()
    expected_columns = None
    files_found = 0

    for ftype in file_types:
        print(f"\n🔍 Looking for: {ftype}")

        try:
            file_path = get_file_path(ftype, date_str_ddmmyyyy)

            folder = os.path.dirname(file_path)
            if not os.path.exists(folder):
                print(f"❌ Folder not found: {folder}")
                continue

            if not os.path.exists(file_path):
                print(f"❌ File not found for {ftype} at {file_path}")
                continue

            df = pd.read_csv(file_path)
            df["Source"] = ftype

            if expected_columns is None:
                expected_columns = df.columns
                merged_df = df
            else:
                # Union of columns
                all_columns = expected_columns.union(df.columns)
                merged_df = merged_df.reindex(columns=all_columns)
                df = df.reindex(columns=all_columns)
                merged_df = pd.concat([merged_df, df], ignore_index=True)
                expected_columns = all_columns

            files_found += 1
            print(f"✅ Added: {file_path}")

        except Exception as e:
            print(f"❌ Error processing {ftype}: {e}")

    output_folder = r"E:\Back-Up Files\Data Team\BSE - NSE\Merged_Bhavcopy_2025\All Files"
    os.makedirs(output_folder, exist_ok=True)
    output_file = os.path.join(output_folder, f"merged_bhavcopy_{date_str_ddmmyyyy}.csv")

    if files_found > 0 and not merged_df.empty:
        merged_df.to_csv(output_file, index=False)
        print(f"\n✅ Merged file saved to: {output_file}")
    else:
        print("\n⚠️ No valid files found. Nothing to merge.")


if __name__ == "__main__":
    # Change this date as needed in DDMMYYYY format
    date_input = "25062025"
    merge_files(date_input)



🔍 Looking for: BSE_FO
✅ Added: E:\Back-Up Files\Data Team\BSE - NSE\Bhavcopy 2025\JUNE_25\BSE\BhavCopy_BSE_FO_0_0_0_20250625_F_0000.csv

🔍 Looking for: NSE_FO
✅ Added: E:\Back-Up Files\Data Team\BSE - NSE\Bhavcopy 2025\JUNE_25\NSE\BhavCopy_NSE_FO_0_0_0_20250625_F_0000.csv

🔍 Looking for: NSE_CM
✅ Added: E:\Back-Up Files\Data Team\BSE - NSE\Bhavcopy 2025\CM_BHAVCOPY_25\JUN_25\NSE_CM\BhavCopy_NSE_CM_0_0_0_20250625_F_0000.csv

🔍 Looking for: BSE_CM
✅ Added: E:\Back-Up Files\Data Team\BSE - NSE\Bhavcopy 2025\CM_BHAVCOPY_25\JUN_25\BSE_CM\BhavCopy_BSE_CM_0_0_0_20250625_F_0000.csv

🔍 Looking for: GT_FO
✅ Added: E:\Back-Up Files\Data Team\BSE - NSE\GT-Bhavcopy 2025\June\G_T_Bhavcopy_FO_250625.csv

🔍 Looking for: CN_FO
✅ Added: E:\Back-Up Files\Data Team\BSE - NSE\CN-Bhavcopy 2025\June\F_CN01_NSE_25062025.csv

🔍 Looking for: MS_FO
✅ Added: E:\Back-Up Files\Data Team\BSE - NSE\MS-Bhavcopy 2025\June\MS_20250625_01_Symbol.csv

✅ Merged file saved to: E:\Back-Up Files\Data Team\BSE - NSE\Merged_Bh

### For Date Range

In [None]:
import os
import pandas as pd
from datetime import datetime, timedelta

def get_file_path(exchange_type, date_obj):
    date_str_file = date_obj.strftime("%Y%m%d")        # 20250116
    month_folder = date_obj.strftime("%b_%y").upper()   # JAN_25

    base_path = r"E:\Back-Up Files\Data Team\BSE - NSE"

    if exchange_type == "BSE_FO":
        folder = os.path.join(base_path, rf"Bhavcopy 2025\{month_folder}\BSE")
        filename = f"BhavCopy_BSE_FO_0_0_0_{date_str_file}_F_0000.csv"
    elif exchange_type == "NSE_FO":
        folder = os.path.join(base_path, rf"Bhavcopy 2025\{month_folder}\NSE")
        filename = f"BhavCopy_NSE_FO_0_0_0_{date_str_file}_F_0000.csv"
    elif exchange_type == "NSE_CM":
        folder = os.path.join(base_path, rf"CM_BHAVCOPY_25\{month_folder}")
        filename = f"BhavCopy_NSE_CM_0_0_0_{date_str_file}_F_0000.csv"
    else:
        raise ValueError("Unknown exchange type.")

    return os.path.join(folder, filename)

def merge_files_range(start_date_str, end_date_str):
    start_date = datetime.strptime(start_date_str, "%d%m%Y")
    end_date = datetime.strptime(end_date_str, "%d%m%Y")

    file_types = ["BSE_FO", "NSE_FO", "NSE_CM"]
    merged_df = pd.DataFrame()

    current_date = start_date
    while current_date <= end_date:
        for ftype in file_types:
            file_path = get_file_path(ftype, current_date)
            print(f"Looking for file: {file_path}")
            if os.path.exists(file_path):
                print(f"✅ Found: {file_path}")
                df = pd.read_csv(file_path)
                df["Source"] = ftype
                df["TradeDate"] = current_date.strftime("%d-%m-%Y")
                merged_df = pd.concat([merged_df, df], ignore_index=True)
            else:
                print(f"❌ File not found: {file_path}")
        current_date += timedelta(days=1)

    output_folder = r"E:\Back-Up Files\Data Team\BSE - NSE\Merged_Bhavcopy_2025"
    os.makedirs(output_folder, exist_ok=True)

    output_file = os.path.join(output_folder, f"merged_bhavcopy_{start_date_str}_to_{end_date_str}.csv")

    if not merged_df.empty:
        merged_df.to_csv(output_file, index=False)
        print(f"\n✅ Merged file saved to: {output_file}")
    else:
        print("\n⚠️ No files found in the given range. No output generated.")

# === Run the script ===
if __name__ == "__main__":
    # Input in DDMMYYYY format
    start_date_input = "16012025"
    end_date_input = "18012025"
    merge_files_range(start_date_input, end_date_input)


### Monthwise Bhavcopy Merge

In [1]:
import os
import pandas as pd 

# Path to the main folder
main_folder_path = r'E:\Back-Up Files\Data Team\BSE - NSE\BSE + NSE\BHAVCOPY\MERGED'  # Use raw string literal to handle backslashes

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame() 

# Function to recursively search for CSV files and merge them
def merge_csv_files(directory):
    global merged_df
    # Walk through the directory tree
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is a CSV file (case insensitive)
            if file.lower().endswith('.csv'):
                # Construct the full path to the CSV file
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file into a DataFrame
                    df = pd.read_csv(file_path)
                    if not df.empty:  # Check if DataFrame is not empty
                        # Append the DataFrame to the merged DataFrame
                        merged_df = pd.concat([merged_df, df], ignore_index=True)
                        print(f'Merged: {file_path}')  # Optional: print each file being merged
                    else:
                        print(f'Skipped empty file: {file_path}')
                except pd.errors.EmptyDataError:
                    print(f'Skipped file with empty data: {file_path}')
                except Exception as e:
                    print(f'Error processing file {file_path}: {e}') 

# Call the function to start merging CSV files
merge_csv_files(main_folder_path) 

# Save the merged DataFrame to a new CSV file
output_file_path = os.path.join(main_folder_path, 'BHAVCOPY2025_06_06.csv')
merged_df.to_csv(output_file_path, index=False) 

print(f"All CSV files have been merged into '{output_file_path}'")

Merged: E:\Back-Up Files\Data Team\BSE - NSE\BSE + NSE\BHAVCOPY\MERGED\BhavCopy_BSE_FO_0_0_0_20250606_F_0000.CSV
Merged: E:\Back-Up Files\Data Team\BSE - NSE\BSE + NSE\BHAVCOPY\MERGED\BhavCopy_NSE_FO_0_0_0_20250606_F_0000.csv
All CSV files have been merged into 'E:\Back-Up Files\Data Team\BSE - NSE\BSE + NSE\BHAVCOPY\MERGED\BHAVCOPY2025_06_06.csv'


## MCX Data merge

In [28]:
import os
import pandas as pd 

# Path to the main folder
main_folder_path = r'F:\DATA TEAM\MCX\Cumulative\2025\January\Jan'  # Use raw string literal to handle backslashes

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame() 

# Function to recursively search for CSV files and merge them
def merge_csv_files(directory):
    global merged_df
    # Walk through the directory tree
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is a CSV file (case insensitive)
            if file.lower().endswith('.csv'):
                # Construct the full path to the CSV file
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file into a DataFrame
                    df = pd.read_csv(file_path)
                    if not df.empty:  # Check if DataFrame is not empty
                        # Append the DataFrame to the merged DataFrame
                        merged_df = pd.concat([merged_df, df], ignore_index=True)
                        print(f'Merged: {file_path}')  # Optional: print each file being merged
                    else:
                        print(f'Skipped empty file: {file_path}')
                except pd.errors.EmptyDataError:
                    print(f'Skipped file with empty data: {file_path}')
                except Exception as e:
                    print(f'Error processing file {file_path}: {e}') 

# Call the function to start merging CSV files
merge_csv_files(main_folder_path) 

# Save the merged DataFrame to a new CSV file
output_file_path = os.path.join(main_folder_path, '25_01.csv')
merged_df.to_csv(output_file_path, index=False) 

print(f"All CSV files have been merged into '{output_file_path}'")

Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\01012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\02012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\03012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\06012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\07012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\08012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\09012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\10012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\13012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\14012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\15012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\16012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\17012025 MCX.csv
Merged: F:\DATA TEAM\MCX\Cumulative\2025\January\Jan\20012025 MCX.csv
Merged: F:\DATA TEAM

## MCX Total Data Merge

In [30]:
import os
import pandas as pd 

# Path to the main folder
main_folder_path = r'F:\DATA TEAM\MCX\Cumulative\Total Data MCX'  # Use raw string literal to handle backslashes

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame() 

# Function to recursively search for CSV files and merge them
def merge_csv_files(directory):
    global merged_df
    # Walk through the directory tree
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is a CSV file (case insensitive)
            if file.lower().endswith('.csv'):
                # Construct the full path to the CSV file
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file into a DataFrame
                    df = pd.read_csv(file_path)
                    if not df.empty:  # Check if DataFrame is not empty
                        # Append the DataFrame to the merged DataFrame
                        merged_df = pd.concat([merged_df, df], ignore_index=True)
                        print(f'Merged: {file_path}')  # Optional: print each file being merged
                    else:
                        print(f'Skipped empty file: {file_path}')
                except pd.errors.EmptyDataError:
                    print(f'Skipped file with empty data: {file_path}')
                except Exception as e:
                    print(f'Error processing file {file_path}: {e}') 

# Call the function to start merging CSV files
merge_csv_files(main_folder_path) 

# Save the merged DataFrame to a new CSV file
output_file_path = os.path.join(main_folder_path, 'Data_24_25_MCX.csv')
merged_df.to_csv(output_file_path, index=False) 

print(f"All CSV files have been merged into '{output_file_path}'")

Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\10.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\11.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\12.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\25_01.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\4.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\5.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\6.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\7.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\8.csv
Merged: F:\DATA TEAM\MCX\Cumulative\Total Data MCX\9.csv
All CSV files have been merged into 'F:\DATA TEAM\MCX\Cumulative\Total Data MCX\Data_24-25_MCX.csv'
