In [None]:
import pandas as pd
from datetime import datetime
import os

def merge_quarterly_data(tickers, base_path, boycotted_tickers=None):
    """
    Merge quarterly CSV files for a list of tickers.
    
    Parameters:
        tickers (list): List of ticker symbols (e.g. ["COKE", "SBUX", "BROS"])
        base_path (str): Path to your data folder (up to /data)
        boycotted_tickers (list): Optional list of tickers that are boycotted. 
                                  If None, all are treated as control.
    """
    all_dfs = []
    boycotted_tickers = [t.upper() for t in (boycotted_tickers or [])]
    
    for ticker in tickers:
        ticker_lower = ticker.lower()
        is_boycotted = ticker.upper() in boycotted_tickers
        folder_type = "boycott_target" if is_boycotted else "control_group"
        
        file_path = os.path.join(base_path, folder_type, ticker_lower, f"{ticker_lower}_quarterly.csv")
        
        if not os.path.exists(file_path):
            print(f"⚠️ File not found for {ticker}: {file_path}")
            continue
        
        df = pd.read_csv(file_path)
        
        # Add metadata columns only if they don't exist
        if "ticker" not in df.columns:
            df.insert(0, "ticker", ticker.upper())
        else:
            df["ticker"] = ticker.upper()

        if "boycotted" not in df.columns:
            df.insert(0, "boycotted", 1 if is_boycotted else 0)
        else:
            df["boycotted"] = 1 if is_boycotted else 0
        
        all_dfs.append(df)
        print(f"✅ Loaded {ticker}")
    
    if not all_dfs:
        raise ValueError("No valid CSV files found for the given tickers.")
    
    merged_df = pd.concat(all_dfs, ignore_index=True)
    
    # Sort if fiscal year exists
    if "fy" in merged_df.columns:
        merged_df = merged_df.sort_values(by=["ticker", "fy"])
    
    # Save with timestamp
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
    output_filename = f"merged_dataset_{timestamp}.csv"
    output_path = os.path.join(base_path, output_filename)
    merged_df.to_csv(output_path, index=False)
    
    print(f"\n💾 Merged dataset saved to: {output_path}")
    return merged_df


# Example usage:
if __name__ == "__main__":
    base_path = "/Users/giuliamariapetrilli/Documents/GitHub/masters_thesis/data"
    tickers = ["COKE", "SBUX", "BROS", "CMG", "GIS", "NKE", "WEN", "SN"]
    boycotted = ["COKE", "SBUX"]
    
    merged_df = merge_quarterly_data(tickers, base_path, boycotted_tickers=boycotted)
    print(merged_df.head())
