## GREEK MERGE STRUCTURE

In [1]:
import os
import pandas as pd
from datetime import datetime

# === Config ===
source_folder = r"E:\DATA\2025-2026\GREEK TRADEBOOK"
output_folder = r"E:\DATA\2025-2026\MERGE_TRADEBOOK\MERGE_GREEK"

segments = ["FO", "EQ", "CU"]
exchanges = ["BSE", "NSE"]

# === Get all CSV files from the source folder ===
all_files = [f for f in os.listdir(source_folder) if f.lower().endswith(".csv")]

# === Extract dates from filenames ===
dates_found = set()
for file in all_files:
    parts = file.replace(".csv", "").split("_")
    if len(parts) == 3:
        segment, exchange, date_str = parts
        if segment in segments and exchange in exchanges:
            try:
                datetime.strptime(date_str, "%d%m%Y")
                dates_found.add(date_str)
            except ValueError:
                pass

# === Process files for each valid date ===
for date in sorted(dates_found):
    dfs = []
    print(f"\n📆 Processing date: {date}")
    for segment in segments:
        for exchange in exchanges:
            filename = f"{segment}_{exchange}_{date}.csv"
            file_path = os.path.join(source_folder, filename)
            if os.path.exists(file_path):
                try:
                    df = pd.read_csv(file_path, encoding="utf-8")
                    df.insert(0, "SourceFile", filename)  # Add source file column (optional)
                    dfs.append(df)
                    print(f"✅ Included: {filename}")
                except Exception as e:
                    print(f"❌ Error reading {filename}: {e}")
            else:
                print(f"⚠️ File not found: {filename}")

    if len(dfs) > 0:
        try:
            combined_df = pd.concat(dfs, ignore_index=True)
            output_filename = f"MergeGreek{date}.csv"
            output_path = os.path.join(output_folder, output_filename)
            os.makedirs(output_folder, exist_ok=True)
            combined_df.to_csv(output_path, index=False, encoding="utf-8")
            print(f"📁 Merged file saved ({len(dfs)} file(s) merged): {output_filename}")
        except Exception as e:
            print(f"❌ Error while merging or saving for {date}: {e}")
    else:
        print(f"⛔ No valid files to merge for {date}")



📆 Processing date: 01042025
⚠️ File not found: FO_BSE_01042025.csv
✅ Included: FO_NSE_01042025.csv
⚠️ File not found: EQ_BSE_01042025.csv
✅ Included: EQ_NSE_01042025.csv
⚠️ File not found: CU_BSE_01042025.csv
⚠️ File not found: CU_NSE_01042025.csv
📁 Merged file saved (2 file(s) merged): MergeGreek01042025.csv

📆 Processing date: 02042025
⚠️ File not found: FO_BSE_02042025.csv
✅ Included: FO_NSE_02042025.csv
⚠️ File not found: EQ_BSE_02042025.csv
✅ Included: EQ_NSE_02042025.csv
⚠️ File not found: CU_BSE_02042025.csv
⚠️ File not found: CU_NSE_02042025.csv
📁 Merged file saved (2 file(s) merged): MergeGreek02042025.csv

📆 Processing date: 02052025
⚠️ File not found: FO_BSE_02052025.csv
✅ Included: FO_NSE_02052025.csv
⚠️ File not found: EQ_BSE_02052025.csv
✅ Included: EQ_NSE_02052025.csv
⚠️ File not found: CU_BSE_02052025.csv
⚠️ File not found: CU_NSE_02052025.csv
📁 Merged file saved (2 file(s) merged): MergeGreek02052025.csv

📆 Processing date: 02062025
⚠️ File not found: FO_BSE_02062025.

In [10]:
import os
import pandas as pd
from datetime import datetime, timedelta

# === CONFIG ===
source_folder = r"E:\DATA\2025-2026\GREEK TRADEBOOK"
output_folder = r"E:\DATA\2025-2026\MERGE_TRADEBOOK\MERGE_GREEK"
segments = ["FO", "EQ", "CU"]
exchanges = ["BSE", "NSE"]

# === SET DATE RANGE ===
start_date_str = "10062025"  # <-- Change this to your desired start date (format: ddmmyyyy)
end_date_str = "10062025"    # <-- Change or set to None to use today's date

# === VALIDATE DATES ===
try:
    start_date = datetime.strptime(start_date_str, "%d%m%Y")
except ValueError:
    raise ValueError("❌ Invalid start date format! Use ddmmyyyy.")

if end_date_str:
    try:
        end_date = datetime.strptime(end_date_str, "%d%m%Y")
    except ValueError:
        raise ValueError("❌ Invalid end date format! Use ddmmyyyy.")
else:
    end_date = datetime.today()

if end_date < start_date:
    raise ValueError("❌ End date must be after or equal to start date.")

# === LOOP OVER EACH DATE ===
current_date = start_date
while current_date <= end_date:
    date_str = current_date.strftime("%d%m%Y")
    print(f"\n📆 Processing date: {date_str}")

    dfs = []
    for segment in segments:
        for exchange in exchanges:
            filename = f"{segment}_{exchange}_{date_str}.csv"
            file_path = os.path.join(source_folder, filename)
            if os.path.exists(file_path):
                try:
                    df = pd.read_csv(file_path, encoding="utf-8")
                    df.insert(0, "SourceFile", filename)  # Optional column for tracking source
                    dfs.append(df)
                    print(f"✅ Included: {filename}")
                except Exception as e:
                    print(f"❌ Error reading {filename}: {e}")
            else:
                print(f"⚠️ File not found: {filename}")

    # === MERGE FILES ===
    if dfs:
        try:
            combined_df = pd.concat(dfs, ignore_index=True)
            os.makedirs(output_folder, exist_ok=True)
            output_filename = f"MergeGreek{date_str}.csv"
            output_path = os.path.join(output_folder, output_filename)
            combined_df.to_csv(output_path, index=False, encoding="utf-8")
            print(f"📁 Merged file saved ({len(dfs)} file(s) merged): {output_filename}")
        except Exception as e:
            print(f"❌ Error while merging or saving for {date_str}: {e}")
    else:
        print(f"⛔ No valid files to merge for {date_str}")

    current_date += timedelta(days=1)
(f"\n⛔ No valid files to merge for {target_date}")



📆 Processing date: 10062025
✅ Included: FO_BSE_10062025.csv
✅ Included: FO_NSE_10062025.csv
⚠️ File not found: EQ_BSE_10062025.csv
✅ Included: EQ_NSE_10062025.csv
⚠️ File not found: CU_BSE_10062025.csv
⚠️ File not found: CU_NSE_10062025.csv
📁 Merged file saved (3 file(s) merged): MergeGreek10062025.csv


NameError: name 'target_date' is not defined

## Merge File Cleaning

### Manual CLeaning 

In [11]:
import os
import pandas as pd
import re
from datetime import datetime

# === Configuration ===
file_name = "MergeGreek10062025"  # ✅ Just change the filename here (no .csv)
merge_folder = r"E:\DATA\2025-2026\MERGE_TRADEBOOK\MERGE_GREEK"
input_file = os.path.join(merge_folder, f"{file_name}.csv")
output_file = os.path.join(merge_folder, f"{file_name}_.csv")
log_file = os.path.join(merge_folder, f"{file_name}_CLEANING_LOG.csv")

# === Utility Functions ===

def split_symbol(value):
    if pd.isna(value) or not isinstance(value, str):
        return pd.Series([None, None, None, None])
    match = re.match(r"([A-Z]+)(\d{5})(\d{5})([A-Z]{2})", value)
    if match:
        symbol, code, strike, opt_type = match.groups()
        return pd.Series([symbol, code, strike, opt_type])
    elif re.match(r"^[A-Z]+$", value.strip()):
        return pd.Series([value.strip(), None, None, None])
    else:
        return pd.Series([value.strip(), None, None, None])

def clean_security_type(sec_type, source_file):
    exchange = None
    security = None
    if isinstance(source_file, str):
        parts = source_file.split('_')
        if len(parts) >= 2:
            exchange = parts[1].strip()
    if pd.isna(sec_type):
        return pd.Series([exchange, None])
    if isinstance(sec_type, str):
        sec_type_clean = sec_type.replace("L,", "").strip().upper()
        if sec_type_clean in ["OPT", "BSE OPT", "NSE OPT"]:
            security = "OPTSTK"
        elif sec_type_clean in ["FUT", "BSE FUT", "NSE FUT"]:
            security = "FUTSTK"
        elif sec_type_clean in ["OPTSTK", "FUTSTK"]:
            security = sec_type_clean
    return pd.Series([exchange, security])

def unify_trade_datetime_columns(df):
    trade_datetime = pd.Series([pd.NaT] * len(df))

    # Prefer ExchangeTradeTime if present
    if 'ExchangeTradeTime' in df.columns:
        dt_exchange = pd.to_datetime(df['ExchangeTradeTime'], errors='coerce')
        trade_datetime = trade_datetime.fillna(dt_exchange)

    # Fallback: TradeDate1 + TradeTime1
    if 'TradeDate1' in df.columns and 'TradeTime1' in df.columns:
        dt1 = pd.to_datetime(
            df['TradeDate1'].astype(str) + ' ' + df['TradeTime1'].astype(str),
            errors='coerce', dayfirst=True
        )
        trade_datetime = trade_datetime.fillna(dt1)

    # Fallback: TradeDate2 + TradeTime2
    if 'TradeDate2' in df.columns and 'TradeTime2' in df.columns:
        dt2 = pd.to_datetime(
            df['TradeDate2'].astype(str) + ' ' + df['TradeTime2'].astype(str),
            errors='coerce', dayfirst=True
        )
        trade_datetime = trade_datetime.fillna(dt2)

    # Final fallback: Extract date from SourceFile (e.g., 19062025)
    if 'SourceFile' in df.columns:
        mask = trade_datetime.isna()
        fallback = df.loc[mask, 'SourceFile'].str.extract(r'(\d{2})(\d{2})(\d{4})')
        fallback_dates = pd.to_datetime(
            fallback[0] + '/' + fallback[1] + '/' + fallback[2],
            format='%d/%m/%Y', errors='coerce'
        )
        trade_datetime.loc[mask] = fallback_dates

    # Format TradeDateTime
    df['TradeDateTime'] = trade_datetime.dt.strftime('%Y-%m-%d %H:%M:%S')

    # Drop datetime source columns
    drop_cols = [
        'TradeDate1', 'TradeTime1', 'TradeDate2', 'TradeTime2',
        'ExchangeTradeTime', 'ExchangeOrderTime', 'FinalExchangeOrderTime'
    ]
    df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True)

    return df

# === Load Data ===
df = pd.read_csv(input_file, encoding='utf-8')
change_log = {'File': f"{file_name}.csv"}

# Step 1: Drop unnecessary columns
drop_cols = [col for col in df.columns if col.startswith(('Empty', 'Sample', 'Status', 'ClientCode', 'Instruction'))]
if drop_cols:
    df.drop(columns=drop_cols, inplace=True)
    change_log['Removed Columns'] = drop_cols

# Step 2: Clean Side column
if 'Side' in df.columns:
    before = df['Side'].dropna().unique().tolist()
    df['Side'] = df['Side'].astype(str).str.upper().replace({'1': 'BUY', 'B': 'BUY', '2': 'SELL', 'S': 'SELL'})
    after = df['Side'].dropna().unique().tolist()
    change_log['Side Changed'] = before != after

# Step 3: Symbol parsing
if 'Symbol' in df.columns:
    df[['CleanedSymbol', 'Code', 'ParsedStrikePrice', 'ParsedOptionType']] = df['Symbol'].apply(split_symbol)
    df['Symbol'] = df['CleanedSymbol'].combine_first(df['Symbol'])

    df['ParsedStrikePrice'] = pd.to_numeric(df['ParsedStrikePrice'], errors='coerce')
    if 'StrikePrice' not in df.columns:
        df['StrikePrice'] = df['ParsedStrikePrice']
    else:
        df['StrikePrice'] = df['StrikePrice'].where(df['StrikePrice'].notna(), df['ParsedStrikePrice'])

    if 'OptionType' not in df.columns:
        df['OptionType'] = df['ParsedOptionType']
    else:
        df['OptionType'] = df['OptionType'].where(df['OptionType'].notna(), df['ParsedOptionType'])

    df.drop(['CleanedSymbol', 'ParsedStrikePrice', 'ParsedOptionType'], axis=1, inplace=True)
    change_log['Symbol Split'] = True

# Step 4: Clean SecurityType + Exchange
if 'SourceFile' in df.columns:
    df[['Exchange', 'SecurityTypeCleaned']] = df.apply(
        lambda row: clean_security_type(row.get('SecurityType', None), row['SourceFile']), axis=1
    )
    df['SecurityType'] = df['SecurityTypeCleaned']
    df.drop(['SecurityTypeCleaned'], axis=1, inplace=True)
    change_log['SecurityType Cleaned'] = True

# Step 5: Create unified TradeDateTime
df = unify_trade_datetime_columns(df)
change_log['TradeDateTime Unified'] = True

# === Save Output ===
df.to_csv(output_file, index=False, encoding='utf-8')
log_df = pd.DataFrame([change_log])
log_df.to_csv(log_file, index=False)

print(f"✅ Cleaned data saved to: {output_file}")
print(f"📘 Cleaning log saved to: {log_file}")


✅ Cleaned data saved to: E:\DATA\2025-2026\MERGE_TRADEBOOK\MERGE_GREEK\MergeGreek10062025_.csv
📘 Cleaning log saved to: E:\DATA\2025-2026\MERGE_TRADEBOOK\MERGE_GREEK\MergeGreek10062025_CLEANING_LOG.csv


### Auto Date Structure Cleaning

In [2]:
import os
import pandas as pd
import re
from datetime import datetime, timedelta

# === Configuration ===
merge_folder = r"E:\DATA\2025-2026\MERGE_TRADEBOOK\MERGE_GREEK"
start_date = datetime(2025, 6, 30)
end_date = datetime(2025, 7, 4)
log_data = []

# === Utility Functions ===

def split_symbol(value):
    if pd.isna(value) or not isinstance(value, str):
        return pd.Series([None, None, None, None])
    match = re.match(r"([A-Z]+)(\d{5})(\d{5})([A-Z]{2})", value)
    if match:
        symbol, code, strike, opt_type = match.groups()
        return pd.Series([symbol, code, strike, opt_type])
    elif re.match(r"^[A-Z]+$", value):
        return pd.Series([value, None, None, None])
    else:
        return pd.Series([value, None, None, None])

def clean_security_type(sec_type, source_file):
    exchange = None
    security = None
    if isinstance(source_file, str):
        parts = source_file.split('_')
        if len(parts) >= 2:
            exchange = parts[1].strip()
    if pd.isna(sec_type):
        return pd.Series([exchange, None])
    if isinstance(sec_type, str):
        sec_type_clean = sec_type.replace("L,", "").strip().upper()
        if sec_type_clean in ["OPT", "BSE OPT", "NSE OPT"]:
            security = "OPTSTK"
        elif sec_type_clean in ["FUT", "BSE FUT", "NSE FUT"]:
            security = "FUTSTK"
        elif sec_type_clean in ["OPTSTK", "FUTSTK"]:
            security = sec_type_clean
        else:
            security = None
    return pd.Series([exchange, security])

def unify_trade_datetime_columns(df):
    trade_datetime = pd.Series([pd.NaT] * len(df))

    # Primary source: TradeDate1 + TradeTime1
    if 'TradeDate1' in df.columns and 'TradeTime1' in df.columns:
        dt1 = pd.to_datetime(
            df['TradeDate1'].astype(str) + ' ' + df['TradeTime1'].astype(str),
            errors='coerce', dayfirst=True
        )
        trade_datetime = trade_datetime.fillna(dt1)

    # Fallback: TradeDate2 + TradeTime2
    if 'TradeDate2' in df.columns and 'TradeTime2' in df.columns:
        dt2 = pd.to_datetime(
            df['TradeDate2'].astype(str) + ' ' + df['TradeTime2'].astype(str),
            errors='coerce', dayfirst=True
        )
        trade_datetime = trade_datetime.fillna(dt2)

    # Final fallback: ExchangeTradeTime
    if 'ExchangeTradeTime' in df.columns:
        dt3 = pd.to_datetime(df['ExchangeTradeTime'], errors='coerce')
        trade_datetime = trade_datetime.fillna(dt3)

    # Final assignment
    df['TradeDateTime'] = trade_datetime.dt.strftime('%Y-%m-%d %H:%M:%S')

    # Drop source datetime columns if present
    drop_cols = ['TradeDate1', 'TradeTime1', 'TradeDate2', 'TradeTime2', 'ExchangeTradeTime', 'ExchangeOrderTime', 'FinalExchangeOrderTime']
    df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True)

    return df

# === Main Processing Loop ===

date = start_date
while date <= end_date:
    file_name = f"MergeGreek{date.strftime('%d%m%Y')}"
    input_file = os.path.join(merge_folder, f"{file_name}.csv")
    output_file = os.path.join(merge_folder, f"{file_name}_.csv")

    if not os.path.exists(input_file):
        date += timedelta(days=1)
        continue

    df = pd.read_csv(input_file, encoding='utf-8')
    change_log = {'File': f"{file_name}.csv"}

    # Step 1: Drop empty/sample columns
    drop_cols = [col for col in df.columns if col.startswith(('Empty', 'Sample', 'Status', 'ClientCode', 'Instruction'))]
    if drop_cols:
        df.drop(columns=drop_cols, inplace=True)
        change_log['Removed Columns'] = drop_cols

    # Step 2: Clean Side column
    if 'Side' in df.columns:
        before = df['Side'].dropna().unique().tolist()
        df['Side'] = df['Side'].astype(str).str.upper().replace({'1': 'BUY', 'B': 'BUY', '2': 'SELL', 'S': 'SELL'})
        after = df['Side'].dropna().unique().tolist()
        change_log['Side Changed'] = before != after

    # Step 3: Symbol parsing
    if 'Symbol' in df.columns:
        df[['CleanedSymbol', 'Code', 'ParsedStrikePrice', 'ParsedOptionType']] = df['Symbol'].apply(split_symbol)
        df['Symbol'] = df['CleanedSymbol'].combine_first(df['Symbol'])

        df['ParsedStrikePrice'] = pd.to_numeric(df['ParsedStrikePrice'], errors='coerce')
        if 'StrikePrice' not in df.columns:
            df['StrikePrice'] = df['ParsedStrikePrice']
        else:
            df['StrikePrice'] = df['StrikePrice'].where(df['StrikePrice'].notna(), df['ParsedStrikePrice'])

        if 'OptionType' not in df.columns:
            df['OptionType'] = df['ParsedOptionType']
        else:
            df['OptionType'] = df['OptionType'].where(df['OptionType'].notna(), df['ParsedOptionType'])

        df.drop(['CleanedSymbol', 'ParsedStrikePrice', 'ParsedOptionType'], axis=1, inplace=True)
        change_log['Symbol Split'] = True

    # Step 4: Clean SecurityType + Extract Exchange
    if 'SourceFile' in df.columns:
        df[['Exchange', 'SecurityTypeCleaned']] = df.apply(
            lambda row: clean_security_type(row.get('SecurityType', None), row['SourceFile']), axis=1)
        df['SecurityType'] = df['SecurityTypeCleaned']
        df.drop(['SecurityTypeCleaned'], axis=1, inplace=True)
        change_log['SecurityType Cleaned'] = True

    # Step 5: Create unified TradeDateTime column
    df = unify_trade_datetime_columns(df)
    change_log['TradeDateTime Unified'] = True

    # Save cleaned file
    df.to_csv(output_file, index=False, encoding='utf-8')
    log_data.append(change_log)
    print(f"📄 Done: {file_name}.csv — log saved.")

    date += timedelta(days=1)

# === Save Cleaning Log ===
log_df = pd.DataFrame(log_data)
log_file = os.path.join(merge_folder, "MergeGreek_Cleaning_Log.csv")
log_df.to_csv(log_file, index=False)
print(f"📘 Combined log saved: {log_file}")


  dt3 = pd.to_datetime(df['ExchangeTradeTime'], errors='coerce')
  dt3 = pd.to_datetime(df['ExchangeTradeTime'], errors='coerce')
  dt3 = pd.to_datetime(df['ExchangeTradeTime'], errors='coerce')


📄 Done: MergeGreek30062025.csv — log saved.
📄 Done: MergeGreek01072025.csv — log saved.
📄 Done: MergeGreek02072025.csv — log saved.


  dt3 = pd.to_datetime(df['ExchangeTradeTime'], errors='coerce')
  dt3 = pd.to_datetime(df['ExchangeTradeTime'], errors='coerce')


📄 Done: MergeGreek03072025.csv — log saved.
📄 Done: MergeGreek04072025.csv — log saved.
📘 Combined log saved: E:\DATA\2025-2026\MERGE_TRADEBOOK\MERGE_GREEK\MergeGreek_Cleaning_Log.csv
