In [3]:
import pandas as pd
from sqlalchemy import create_engine
import os

# Database connection string
connection_string = (
    "mssql+pyodbc://JULIANS_LAPTOP\\SQLEXPRESS/"
    "CWA_Fund_Database?driver=ODBC+Driver+18+for+SQL+Server"
    "&trusted_connection=yes&TrustServerCertificate=yes"
)
engine = create_engine(connection_string)

# Output directory
output_dir = r"C:\Users\JulianHeron\Software Projects\BM_Cat_maps"
output_file = os.path.join(output_dir, "funds_categories_output.xlsx")

# Query 1: Unique category combinations
query1 = """
SELECT DISTINCT 
    yc.Global_Category_Name,
    cwa.CWA_Broad_Category_Name
FROM Funds_to_Screen f
LEFT JOIN YC_Global_Category_List yc 
    ON f.YC_Global_Category_ID = yc.ID
LEFT JOIN CWA_Broad_Category_List cwa 
    ON f.CWA_Broad_Category_ID = cwa.ID
WHERE yc.Global_Category_Name IS NOT NULL 
    AND cwa.CWA_Broad_Category_Name IS NOT NULL
"""

# Query 2: Unique category combinations with return_driver
query2 = """
SELECT DISTINCT 
    yc.Global_Category_Name,
    cwa.CWA_Broad_Category_Name,
    f.return_driver
FROM Funds_to_Screen f
LEFT JOIN YC_Global_Category_List yc 
    ON f.YC_Global_Category_ID = yc.ID
LEFT JOIN CWA_Broad_Category_List cwa 
    ON f.CWA_Broad_Category_ID = cwa.ID
WHERE yc.Global_Category_Name IS NOT NULL 
    AND cwa.CWA_Broad_Category_Name IS NOT NULL
"""

# Query 3: All data for matching funds
query3 = """
SELECT 
    yc.Global_Category_Name,
    cwa.CWA_Broad_Category_Name,
    f.return_driver,
    f.SymbolCUSIP,
    f.ProductName
FROM Funds_to_Screen f
LEFT JOIN YC_Global_Category_List yc 
    ON f.YC_Global_Category_ID = yc.ID
LEFT JOIN CWA_Broad_Category_List cwa 
    ON f.CWA_Broad_Category_ID = cwa.ID
WHERE yc.Global_Category_Name IS NOT NULL 
    AND cwa.CWA_Broad_Category_Name IS NOT NULL
ORDER BY yc.Global_Category_Name, cwa.CWA_Broad_Category_Name, f.return_driver
"""

# Execute queries
df_categories = pd.read_sql(query1, engine)
df_categories_with_return = pd.read_sql(query2, engine)
df_all = pd.read_sql(query3, engine)

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Create Excel writer object
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    # Sheet 1: Unique category combinations
    df_categories.to_excel(writer, sheet_name='Category_Combinations', index=False)
    worksheet1 = writer.sheets['Category_Combinations']
    for i, col in enumerate(df_categories.columns):
        max_len = max(df_categories[col].astype(str).map(len).max(), len(col)) + 2
        worksheet1.set_column(i, i, max_len)

    # Sheet 2: Unique category combinations with return_driver
    df_categories_with_return.to_excel(writer, sheet_name='Categories_With_Return', index=False)
    worksheet2 = writer.sheets['Categories_With_Return']
    for i, col in enumerate(df_categories_with_return.columns):
        max_len = max(df_categories_with_return[col].astype(str).map(len).max(), len(col)) + 2
        worksheet2.set_column(i, i, max_len)

    # Sheet 3: Categories with funds listed below
    # We'll write this manually to group funds under each category combination
    worksheet3 = writer.sheets['Funds_By_Category'] = writer.book.add_worksheet('Funds_By_Category')
    
    # Write headers
    headers = ['Global_Category_Name', 'CWA_Broad_Category_Name', 'return_driver', 'SymbolCUSIP', 'ProductName']
    for col_num, header in enumerate(headers):
        worksheet3.write(0, col_num, header)
    
    # Track current row and current category combination
    current_row = 1
    prev_combination = None
    
    for index, row in df_all.iterrows():
        current_combination = (row['Global_Category_Name'], row['CWA_Broad_Category_Name'], row['return_driver'])
        
        # If new combination, write it as a header row
        if current_combination != prev_combination:
            if prev_combination is not None:
                current_row += 1  # Add blank row between combinations
            worksheet3.write(current_row, 0, row['Global_Category_Name'])
            worksheet3.write(current_row, 1, row['CWA_Broad_Category_Name'])
            worksheet3.write(current_row, 2, row['return_driver'])
            current_row += 1
            prev_combination = current_combination
        
        # Write fund details
        worksheet3.write(current_row, 3, row['SymbolCUSIP'])
        worksheet3.write(current_row, 4, row['ProductName'])
        current_row += 1
    
    # Adjust column widths
    for i, col in enumerate(df_all.columns):
        max_len = max(df_all[col].astype(str).map(len).max(), len(col)) + 2
        worksheet3.set_column(i, i, max_len)

print(f"Excel file has been created at: {output_file}")

# Display samples
print("\nSample from Category Combinations:")
print(df_categories.head())
print("\nSample from Categories with Return Driver:")
print(df_categories_with_return.head())
print("\nSample from All Data (first 5 rows):")
print(df_all.head())

Excel file has been created at: C:\Users\JulianHeron\Software Projects\BM_Cat_maps\funds_categories_output.xlsx

Sample from Category Combinations:
        Global_Category_Name CWA_Broad_Category_Name
0      Aggressive Allocation               Strategic
1      Aggressive Allocation              Allocation
2   Allocation Miscellaneous               Strategic
3   Allocation Miscellaneous              Allocation
4  Alternative Miscellaneous          Nontraditional

Sample from Categories with Return Driver:
       Global_Category_Name CWA_Broad_Category_Name         return_driver
0     Aggressive Allocation              Allocation  active_discretionary
1     Aggressive Allocation              Allocation        multi_strategy
2     Aggressive Allocation              Allocation                  None
3     Aggressive Allocation               Strategic                  None
4  Allocation Miscellaneous              Allocation                  None

Sample from All Data (first 5 rows):
    Glob