In [None]:
#Fixing funds with missing data not originally imported into database due to grok script error

import pandas as pd
from sqlalchemy import create_engine, text
from concurrent.futures import ThreadPoolExecutor, as_completed
import logging
from datetime import datetime
import json
import requests
import urllib3

# Disable InsecureRequestWarning
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    filename='C:\\Users\\JulianHeron\\Software Projects\\Database Project Files\\funds_update_log.txt',
    filemode='w'
)
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)

# Database connection
connection_string = (
    "mssql+pyodbc://JULIANS_LAPTOP\\SQLEXPRESS/"
    "CWA_Fund_Database?driver=ODBC+Driver+18+for+SQL+Server"
    "&trusted_connection=yes&TrustServerCertificate=yes"
)
engine = create_engine(
    connection_string,
    connect_args={
        'pool_size': 20,
        'max_overflow': 50,
        'pool_timeout': 300
    }
)

# Constants for YCharts API (you'll need to replace with your actual API key)
API_KEY = "yIIphqbsQysnTvWWxfW33w"  # Placeholder, use your actual API key securely
BASE_URL = "https://api.ycharts.com/v3"
HEADERS = {
    'X-YCHARTSAUTHORIZATION': API_KEY,
    'X-YCHARTSEXCELSESSION': 'b645cd897b2446bfa3796acfa3a879db',
    'X-YCHARTSEXCELVERSION': '4.4',
    'X-YCHARTSOPERATINGSYSTEM': 'Microsoft Windows NT 10.0.26100.0',
    'X-YCHARTSIP': '',
    'Host': 'api.ycharts.com',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Connection': 'Keep-Alive'
}

def construct_url(product_type, call_type, symbol, function, **kwargs):
    base_path = {
        'ETF': '/companies',
        'mutual_fund': '/mutual_funds',
        'index': '/indices'
    }.get(product_type, '/companies')
    
    if call_type == 'YCI':
        url = f"{BASE_URL}{base_path}/{symbol}/info/{function}"
    elif call_type == 'YCP':
        url = f"{BASE_URL}/excel/points"
    elif call_type == 'YCS':
        url = f"{BASE_URL}{base_path}/{symbol}/series/{function}"
    
    if kwargs:
        url += '?' + '&'.join([f"{k}={v}" for k, v in kwargs.items()])
    
    return url

def handle_response(response):
    try:
        data = response.json()
        if data['meta']['status'] != 'ok':
            error_msg = data['meta'].get('error_message', 'Unknown error')
            logging.error(f"API Error: {error_msg}")
            raise ValueError(error_msg)
        return data['response']
    except json.JSONDecodeError:
        logging.error("Invalid JSON response")
        raise ValueError("Invalid JSON response")

def get_info(product_type, symbol, function):
    url = construct_url(product_type, 'YCI', symbol, function)
    try:
        response = requests.get(url, headers=HEADERS, verify=False)
        response.raise_for_status()
        return handle_response(response)
    except requests.RequestException as e:
        logging.error(f"Request failed for {url}: {str(e)}")
        return None

def get_data_point(product_type, symbol, function):
    url = construct_url(product_type, 'YCP', symbol, function)
    data = {'points': f"{symbol},{function}"}
    try:
        response = requests.post(url, headers=HEADERS, data=data, verify=False)
        response.raise_for_status()
        result = handle_response(response)
        if result and symbol in result and function in result[symbol]['results']:
            # Extract the value safely
            try:
                return result[symbol]['results'][function]['']['results'][1]  # Assuming [date, value]
            except (KeyError, IndexError):
                # Handle case where expected structure is not present
                logging.info(f"No data for {function} for symbol {symbol}")
                return None
        else:
            logging.info(f"No data for {function} for symbol {symbol}")
            return None
    except requests.RequestException as e:
        logging.error(f"Request failed for {url}: {str(e)}")
        return None

def get_fund_type(symbol):
    with engine.connect() as conn:
        result = conn.execute(text("SELECT Fund_Type_ID FROM Funds_to_Screen WHERE SymbolCUSIP = :symbol"), {'symbol': symbol}).fetchone()
        if result:
            fund_type_id = result[0]
            return 'ETF' if fund_type_id == 2 else 'mutual_fund' if fund_type_id == 3 else 'unknown'
        else:
            # Fallback based on symbol prefix if not in the database
            return 'mutual_fund' if symbol.startswith('M:') else 'ETF'

def update_fund_data(symbol):
    fund_type = get_fund_type(symbol)
    if fund_type == 'unknown':
        logging.warning(f"Unknown fund type for {symbol}, skipping.")
        return

    with engine.connect() as conn:
        # Check if the symbol already exists with data
        check = conn.execute(text("SELECT COUNT(*) FROM Funds_to_Screen WHERE SymbolCUSIP = :symbol AND InceptionDate IS NOT NULL"), {'symbol': symbol}).fetchone()
        if check[0] > 0:
            logging.info(f"Symbol {symbol} already has data, skipping update.")
            return

        try:
            # Fetch all relevant data from YCharts first
            raw_data = {
                'InceptionDate': get_info(fund_type, symbol, 'inception_date'),
                'EarliestPerformanceDate': get_info(fund_type, symbol, 'earliest_performance_date'),
                'LCG': get_data_point(fund_type, symbol, 'equity_stylebox_large_cap_growth_exposure'),
                'LCB': get_data_point(fund_type, symbol, 'equity_stylebox_large_cap_blend_exposure'),
                'LCV': get_data_point(fund_type, symbol, 'equity_stylebox_large_cap_value_exposure'),
                'MCG': get_data_point(fund_type, symbol, 'equity_stylebox_mid_cap_growth_exposure'),
                'MCB': get_data_point(fund_type, symbol, 'equity_stylebox_mid_cap_blend_exposure'),
                'MCV': get_data_point(fund_type, symbol, 'equity_stylebox_mid_cap_value_exposure'),
                'SCG': get_data_point(fund_type, symbol, 'equity_stylebox_small_cap_growth_exposure'),
                'SCB': get_data_point(fund_type, symbol, 'equity_stylebox_small_cap_blend_exposure'),
                'SCV': get_data_point(fund_type, symbol, 'equity_stylebox_small_cap_value_exposure'),
                'Giant_Cap_Exposure': get_data_point(fund_type, symbol, 'giant_cap_exposure'),
                'Large_Cap_Exposure': get_data_point(fund_type, symbol, 'large_cap_exposure'),
                'Medium_Cap_Exposure': get_data_point(fund_type, symbol, 'medium_cap_exposure'),
                'Micro_Cap_Exposure': get_data_point(fund_type, symbol, 'micro_cap_exposure'),
                'Sensitive_Exposure': get_data_point(fund_type, symbol, 'sensitive_exposure'),
                'Small_Cap_Exposure': get_data_point(fund_type, symbol, 'small_cap_exposure'),
                'Cyclical_Exposure': get_data_point(fund_type, symbol, 'cyclical_exposure'),
                'Defensive_Exposure': get_data_point(fund_type, symbol, 'defensive_exposure'),
                'AAA_Rated': get_data_point(fund_type, symbol, 'aaa_bond_exposure'),
                'AA_Rated': get_data_point(fund_type, symbol, 'aa_bond_exposure'),
                'A_Rated': get_data_point(fund_type, symbol, 'a_bond_exposure'),
                'BBB_Rated': get_data_point(fund_type, symbol, 'bbb_bond_exposure'),
                'B_Rated': get_data_point(fund_type, symbol, 'b_bond_exposure'),
                'BB_Rated': get_data_point(fund_type, symbol, 'bb_bond_exposure'),
                'Below_B_Rated': get_data_point(fund_type, symbol, 'below_b_bond_exposure'),
                'Not_Rated': get_data_point(fund_type, symbol, 'not_rated_bond_exposure'),
                'Term_10_15_Y': get_data_point(fund_type, symbol, '10_to_15_years_maturity_bond_exposure'),
                'Term_1_3_Y': get_data_point(fund_type, symbol, '1_to_3_years_maturity_bond_exposure'),
                'Term_15_20_Y': get_data_point(fund_type, symbol, '15_to_20_years_maturity_bond_exposure'),
                'Term_1_7_D': get_data_point(fund_type, symbol, '1_to_7_days_maturity_bond_exposure'),
                'Term_183_364_D': get_data_point(fund_type, symbol, '183_to_364_days_maturity_bond_exposure'),
                'Term_20_30_Y': get_data_point(fund_type, symbol, '20_to_30_years_maturity_bond_exposure'),
                'Term_30_Plus_Y': get_data_point(fund_type, symbol, 'over_30_years_maturity_bond_exposure'),
                'Term_31_90_D': get_data_point(fund_type, symbol, '31_to_90_days_maturity_bond_exposure'),
                'Term_3_5_Y': get_data_point(fund_type, symbol, '3_to_5_years_maturity_bond_exposure'),
                'Term_5_7_Y': get_data_point(fund_type, symbol, '5_to_7_years_maturity_bond_exposure'),
                'Term_7_10_Y': get_data_point(fund_type, symbol, '7_to_10_years_maturity_bond_exposure'),
                'Term_8_30_D': get_data_point(fund_type, symbol, '8_to_30_days_maturity_bond_exposure'),
                'Term_91_182_D': get_data_point(fund_type, symbol, '91_to_182_days_maturity_bond_exposure'),
                'LT_Exposure': get_data_point(fund_type, symbol, 'long_term_exposure'),
                'Interm_Exposure': get_data_point(fund_type, symbol, 'intermediate_term_exposure'),
                'ST_Exposure': get_data_point(fund_type, symbol, 'short_term_exposure'),
                'aum_usd': get_info(fund_type, symbol, 'aum_usd'),
                'average_manager_tenure': get_data_point(fund_type, symbol, 'average_manager_tenure'),
                'max_manager_tenure': get_data_point(fund_type, symbol, 'max_manager_tenure'),
                'median_manager_tenure': get_data_point(fund_type, symbol, 'median_manager_tenure'),
                'min_manager_tenure': get_data_point(fund_type, symbol, 'min_manager_tenure'),
                'index_fund': get_info(fund_type, symbol, 'index_fund'),
                'inverse_fund': get_info(fund_type, symbol, 'inverse_fund'),
                'leveraged_fund': get_info(fund_type, symbol, 'leveraged_fund'),
                'socially_responsible_fund': get_info(fund_type, symbol, 'socially_responsible_fund'),
                'synthetic_replication_fund': get_info(fund_type, symbol, 'synthetic_replication_fund'),
                'open_to_existing_investors': get_info(fund_type, symbol, 'open_to_existing_investors'),
                'open_to_new_investors': get_info(fund_type, symbol, 'open_to_new_investors'),
                'ycharts_url': get_info(fund_type, symbol, 'ycharts_url'),
                'years_since_inception': get_data_point(fund_type, symbol, 'years_since_inception'),
                'investment_strategy': get_info(fund_type, symbol, 'investment_strategy'),
                'related_securities': get_info(fund_type, symbol, 'related_securities'),
                'fund_family': get_info(fund_type, symbol, 'fund_family'),
                'fund_of_funds': get_info(fund_type, symbol, 'fund_of_funds'),
            }
            
            # Now calculate fields based on the fetched data
            data = raw_data.copy()
            data.update({
                'Inv_Grade': sum(filter(None, [raw_data.get('AAA_Rated'), raw_data.get('AA_Rated'), raw_data.get('A_Rated'), raw_data.get('BBB_Rated')])),
                'NINV_Grade': sum(filter(None, [raw_data.get('B_Rated'), raw_data.get('BB_Rated'), raw_data.get('Below_B_Rated'), raw_data.get('Not_Rated')])),
                'Growth_Exposure': sum(filter(None, [raw_data.get('LCG'), raw_data.get('MCG'), raw_data.get('SCG'), raw_data.get('LCB', 0)/2, raw_data.get('MCB', 0)/2, raw_data.get('SCB', 0)/2])),
                'Value_Exposure': sum(filter(None, [raw_data.get('LCV'), raw_data.get('MCV'), raw_data.get('SCV'), raw_data.get('LCB', 0)/2, raw_data.get('MCB', 0)/2, raw_data.get('SCB', 0)/2])),
            })

            # Handle null values for database insertion
            for key in data:
                if data[key] is None:
                    data[key] = None  # SQLAlchemy or pyodbc will handle this as NULL in SQL
                elif isinstance(data[key], dict):
                    data[key] = data[key].get('data')
                elif isinstance(data[key], list):
                    data[key] = json.dumps(data[key])  # Convert list to JSON string for storage
                # Convert boolean to 'true'/'false' for database storage
                if key in ['index_fund', 'inverse_fund', 'leveraged_fund', 'socially_responsible_fund', 'synthetic_replication_fund', 'open_to_existing_investors', 'open_to_new_investors']:
                    data[key] = 'true' if data[key] else 'false'

            # Convert dates to SQL compatible format
            for key in ['InceptionDate', 'EarliestPerformanceDate']:
                if data[key] is not None:
                    if isinstance(data[key], str):
                        try:
                            data[key] = datetime.strptime(data[key], '%Y-%m-%d').date()
                        except ValueError:
                            data[key] = None
                    else:
                        data[key] = None  # If it's not a string, assume it's not a valid date

            # SQL Update query (for new or incomplete records)
            update_fields = ', '.join([f"{col} = :{col}" for col in data.keys()])
            query = text(f"""
            UPDATE Funds_to_Screen
            SET 
                {update_fields}
            WHERE SymbolCUSIP = :symbol
            """)
            
            conn.execute(query, **data, symbol=symbol)
            conn.commit()
            logging.info(f"Updated data for {symbol}.")

        except Exception as e:
            logging.error(f"Error updating data for {symbol}: {str(e)}")
            conn.rollback()

def main():
    # Read symbols from Excel file
    df = pd.read_excel('C:\\Users\\JulianHeron\\blank_YC_BM_Symbols.xlsx', usecols=[0], skiprows=1, names=['Symbol'])
    symbols = df['Symbol'].tolist()
    
    with ThreadPoolExecutor(max_workers=20) as executor:
        futures = {executor.submit(update_fund_data, symbol): symbol for symbol in symbols}
        for future in as_completed(futures):
            symbol = futures[future]
            try:
                future.result()
            except Exception as e:
                logging.error(f"Thread for {symbol} raised an exception: {str(e)}")

if __name__ == "__main__":
    main()

2025-02-13 14:05:11,905 - ERROR - Error updating data for TWIO: 'results'


In [5]:
import pandas as pd
import sqlalchemy
from sqlalchemy import text
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import json
from tenacity import retry, stop_after_attempt, wait_exponential
import ssl
import certifi
from requests.adapters import HTTPAdapter
import requests

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[
    logging.StreamHandler()  # Log to console
])
logger = logging.getLogger(__name__)

# Database Connection Configuration
connection_string = (
    "mssql+pyodbc://JULIANS_LAPTOP\\SQLEXPRESS/"
    "CWA_Fund_Database?driver=ODBC+Driver+18+for+SQL+Server"
    "&trusted_connection=yes&TrustServerCertificate=yes"
)
engine = sqlalchemy.create_engine(
    connection_string,
    pool_size=50,  # Increased pool size to avoid timeouts
    max_overflow=100,  # Adjust based on your system capabilities
    pool_timeout=300  # Increased timeout to handle long-running queries
)

# YCharts API Configuration
YCHARTS_API_URL = "https://api.ycharts.com"
API_HEADERS = {
    "X-YCHARTSAUTHORIZATION": "yIIphqbsQysnTvWWxfW33w",  # Replace with your actual API key
    "X-YCHARTSEXCELSESSION": "b645cd897b2446bfa3796acfa3a879db",
    "X-YCHARTSEXCELVERSION": "4.4",
    "X-YCHARTSOPERATINGSYSTEM": "Microsoft Windows NT 10.0.26100.0",
    "Content-Type": "application/x-www-form-urlencoded"
}

# SSL Context for HTTPS connections
context = ssl.create_default_context(cafile=certifi.where())
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE  # Use CERT_REQUIRED in production

class CustomHTTPAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        kwargs['ssl_context'] = context
        return super().init_poolmanager(*args, **kwargs)

session = requests.Session()
session.mount('https://', CustomHTTPAdapter())

# Metrics mapping 
column_to_ycharts_mapping = [
    ("FundID", None, None),  # No direct YCharts function for FundID
    ("ProductName", None, None),  # No direct YCharts function for ProductName
    ("SymbolCUSIP", None, None),  # No direct YCharts function for SymbolCUSIP
    ("ETFComplianceCategory", None, None),  # No direct YCharts function for ETFComplianceCategory
    ("ExpenseRatio", None, None), #Data mapped from custodian, do not use Ycharts data here
    ("Fund_Type_ID", None, None),  # Assuming this is an internal ID, not from YCharts
    ("YC_Broad_Category_ID", None, None),  # Likely internal, not from YCharts
    ("YC_Broad_Asset_Class_ID", None, None),  # Likely internal, not from YCharts
    ("YC_Global_Category_ID", None, None),  # Likely internal, not from YCharts
    ("YC_Category_ID", None, None),  # Likely internal, not from YCharts
    ("CWA_Broad_Category_ID", None, None),  # Likely internal, not from YCharts
    ("ScreeningStatus", None, None),  # No direct YCharts function for ScreeningStatus
    ("LastScreenedDate", None, None),  # No direct YCharts function for LastScreenedDate
    ("InceptionDate", "inception_date", "YCI"),
    ("EarliestPerformanceDate", "earliest_performance_date", "YCI"),
    ("LCG", "equity_stylebox_large_cap_growth_exposure", "YCP"),
    ("LCB", "equity_stylebox_large_cap_blend_exposure", "YCP"),
    ("LCV", "equity_stylebox_large_cap_value_exposure", "YCP"),
    ("MCG", "equity_stylebox_mid_cap_growth_exposure", "YCP"),
    ("MCB", "equity_stylebox_mid_cap_blend_exposure", "YCP"),
    ("MCV", "equity_stylebox_mid_cap_value_exposure", "YCP"),
    ("SCG", "equity_stylebox_small_cap_growth_exposure", "YCP"),
    ("SCB", "equity_stylebox_small_cap_blend_exposure", "YCP"),
    ("SCV", "equity_stylebox_small_cap_value_exposure", "YCP"),
    ("Giant_Cap_Exposure", "giant_cap_exposure", "YCP"),
    ("Large_Cap_Exposure", "large_cap_exposure", "YCP"),
    ("Medium_Cap_Exposure", "medium_cap_exposure", "YCP"),
    ("Micro_Cap_Exposure", "micro_cap_exposure", "YCP"),
    ("Sensitive_Exposure", "sensitive_exposure", "YCP"),
    ("Small_Cap_Exposure", "small_cap_exposure", "YCP"),
    ("Cyclical_Exposure", "cyclical_exposure", "YCP"),
    ("Defensive_Exposure", "defensive_exposure", "YCP"),
    ("AAA_Rated", "aaa_bond_exposure", "YCP"),
    ("AA_Rated", "aa_bond_exposure", "YCP"),
    ("A_Rated", "a_bond_exposure", "YCP"),
    ("BBB_Rated", "bbb_bond_exposure", "YCP"),
    ("B_Rated", "b_bond_exposure", "YCP"),
    ("BB_Rated", "bb_bond_exposure", "YCP"),
    ("Below_B_Rated", "below_b_bond_exposure", "YCP"),
    ("Not_Rated", "not_rated_bond_exposure", "YCP"),
    ("Term_10_15_Y", "10_to_15_years_maturity_bond_exposure", "YCP"),
    ("Term_1_3_Y", "1_to_3_years_maturity_bond_exposure", "YCP"),
    ("Term_15_20_Y", "15_to_20_years_maturity_bond_exposure", "YCP"),
    ("Term_1_7_D", "1_to_7_days_maturity_bond_exposure", "YCP"),
    ("Term_183_364_D", "183_to_364_days_maturity_bond_exposure", "YCP"),
    ("Term_20_30_Y", "20_to_30_years_maturity_bond_exposure", "YCP"),
    ("Term_30_Plus_Y", "over_30_years_maturity_bond_exposure", "YCP"),
    ("Term_31_90_D", "31_to_90_days_maturity_bond_exposure", "YCP"),
    ("Term_3_5_Y", "3_to_5_years_maturity_bond_exposure", "YCP"),
    ("Term_5_7_Y", "5_to_7_years_maturity_bond_exposure", "YCP"),
    ("Term_7_10_Y", "7_to_10_years_maturity_bond_exposure", "YCP"),
    ("Term_8_30_D", "8_to_30_days_maturity_bond_exposure", "YCP"),
    ("Term_91_182_D", "91_to_182_days_maturity_bond_exposure", "YCP"),
    ("LT_Exposure", "long_term_exposure", "YCP"),
    ("Interm_Exposure", "intermediate_term_exposure", "YCP"),
    ("ST_Exposure", "short_term_exposure", "YCP"),
    ("Inv_Grade", None, None),  # Calculated field, no direct YCharts function
    ("NINV_Grade", None, None),  # Calculated field, no direct YCharts function
    ("Growth_Exposure", None, None),  # Calculated field, no direct YCharts function
    ("Value_Exposure", None, None),  # Calculated field, no direct YCharts function
    ("pct_top_10", "percent_of_assets_in_top_10_holdings", "YCP"),
    ("pct_top_25", "percent_of_assets_in_top_25_holdings", "YCP"),
    ("materials", "basic_materials_exposure", "YCP"),
    ("comsvc", "communication_services_exposure", "YCP"),
    ("concyc", "consumer_cyclical_exposure", "YCP"),
    ("condef", "consumer_defensive_exposure", "YCP"),
    ("energy", "energy_exposure", "YCP"),
    ("finsvc", "financial_services_exposure", "YCP"),
    ("healthcare", "healthcare_exposure", "YCP"),
    ("industrials", "industrials_exposure", "YCP"),
    ("realestate", "real_estate_exposure", "YCP"),
    ("technology", "technology_exposure", "YCP"),
    ("utilities", "utilities_exposure", "YCP"),
    ("relative_composition", "relative_composition", "YCP"),
    ("num_bonds", "number_of_bond_holdings", "YCP"),
    ("num_stocks", "number_of_stock_holdings", "YCP"),
    ("dev_mkt", "developed_market_exposure", "YCP"),
    ("em_mkt", "emerging_market_exposure", "YCP"),
    ("europe", "greater_europe_total_exposure", "YCP"),
    ("lat_america", "latin_america_total_exposure", "YCP"),
    ("namerica", "north_america_total_exposure", "YCP"),
    ("africa_mideast", "africa_middle_east_total_exposure", "YCP"),
    ("asia_em", "asia_emerging_total_exposure", "YCP"),
    ("asia_dev", "asia_developed_total_exposure", "YCP"),
    ("australia", "australasia_total_exposure", "YCP"),
    ("india", "india_total_exposure", "YCP"),
    ("w_avg_dc", "weighted_average_debt_to_capital", "YCP"),
    ("avg_mkt_cap", "average_market_cap", "YCP"),
    ("avg_pe_ratio", "weighted_average_pe_ratio", "YCP"),
    ("avg_pb_ratio", "weighted_average_price_to_book_ratio", "YCP"),
    ("avg_pcf_ratio", "weighted_average_price_to_cash_flow", "YCP"),
    ("avg_ps_ratio", "weighted_average_price_to_sales_ratio", "YCP"),
    ("avg_net_margin", "weighted_median_net_margin", "YCP"),
    ("avg_roa", "weighted_median_return_on_assets", "YCP"),
    ("avg_roe", "weighted_median_return_on_equity", "YCP"),
    ("preferreds", "preferred_stock_exposure", "YCP"),
    ("securitized", "securitized_fixed_income_exposure", "YCP"),
    ("muni_exempt", "municipal_tax_exempt_bond_exposure", "YCP"),
    ("muni_tax", "municipal_taxable_bond_exposure", "YCP"),
    ("muni_exposure", "municipal_fixed_income_exposure", "YCP"),
    ("corp_exposure", "corporate_bond_exposure", "YCP"),
    ("corp_fi", "corporate_fixed_income_exposure", "YCP"),
    ("govi_exp", "government_bond_exposure", "YCP"),
    ("govi_fi", "government_fixed_income_exposure", "YCP"),
    ("covered_bond", "covered_bond_exposure", "YCP"),
    ("gov_bond", "government_related_bond_exposure", "YCP"),
    ("converts", "convertible_bond_exposure", "YCP"),
    ("asset_backed_sec", "asset_backed_security_exposure", "YCP"),
    ("index_fund", "index_fund", "YCI"),
    ("inverse_fund", "inverse_fund", "YCI"),
    ("leveraged_fund", "leveraged_fund", "YCI"),
    ("socially_responsible_fund", "socially_responsible_fund", "YCI"),
    ("synthetic_replication_fund", "synthetic_replication_fund", "YCI"),
    ("aum_usd", "aum_usd", "YCI"),
    ("average_manager_tenure", "average_manager_tenure", "YCP"),
    ("max_manager_tenure", "max_manager_tenure", "YCP"),
    ("median_manager_tenure", "median_manager_tenure", "YCP"),
    ("min_manager_tenure", "min_manager_tenure", "YCP"),
    ("open_to_existing_investors", "open_to_existing_investors", "YCI"),
    ("open_to_new_investors", "open_to_new_investors", "YCI"),
    ("ycharts_url", "ycharts_url", "YCI"),
    ("years_since_inception", "years_since_inception", "YCP"),
    ("investment_strategy", "investment_strategy", "YCI"),
    ("related_securities", "related_securities", "YCI"), #returns both symbols and names, only need symbols
    ("fund_family", "fund_family", "YCI")
]

# Extract metrics that should be fetched
METRICS = [(col, code, api) for col, code, api in column_to_ycharts_mapping if api in ["YCP", "YCI"]]

# YCP Functions
def build_points_lines(symbol, yc_code, is_mutual):
    symbol_for_api = f"M:{symbol}" if is_mutual else symbol
    return f"{symbol_for_api},{yc_code}"

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def fetch_ycp_data_point(symbol, metric, fund_type_id):
    is_mutual = (fund_type_id == 3)
    for col, yc_code, api_type in METRICS:
        if col == metric and api_type == "YCP":
            points_line = build_points_lines(symbol, yc_code, is_mutual)
            payload = f"points={points_line}"
            api_url = f"{YCHARTS_API_URL}/v3/excel/points"
            try:
                response = session.post(api_url, headers=API_HEADERS, data=payload, timeout=60)
                response.raise_for_status()
                data = response.json()
                symbol_for_api = f"M:{symbol}" if is_mutual else symbol
                if 'response' in data and symbol_for_api in data['response']:
                    results = data['response'][symbol_for_api].get("results", {})
                    if yc_code in results:
                        datapoints = results[yc_code].get("data")
                        if datapoints and isinstance(datapoints, list):
                            last_point = datapoints[-1]
                            if isinstance(last_point, list) and len(last_point) > 1:
                                return last_point[1]
                logger.warning(f"No valid data returned for symbol '{symbol}' on metric '{metric}'")
                return None
            except requests.exceptions.RequestException as e:
                logger.error(f"API error for symbol '{symbol}' on metric '{metric}': {e}")
                return None
    return None

# YCI Functions
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def fetch_yci_metric(symbol, fund_type_id, metric):
    for col, yc_code, api_type in METRICS:
        if col == metric and api_type == "YCI":
            if fund_type_id == 3:
                symbol_for_api = f"M:{symbol}"
                endpoint = "mutual_funds"
            else:
                symbol_for_api = symbol
                endpoint = "companies"
            api_url = f"{YCHARTS_API_URL}/v3/{endpoint}/{symbol_for_api}/info/{yc_code}?retrieve_ttl=true"
            logger.info(f"Fetching {metric} for {symbol} using URL: {api_url}")
            try:
                response = session.get(api_url, headers=API_HEADERS, timeout=30)
                response.raise_for_status()
                data = response.json()
                response_key = f"M:{symbol}" if fund_type_id == 3 else symbol
                metric_value = data.get("response", {}).get(response_key, {}).get("results", {}).get(yc_code, {}).get("data")
                
                if metric == "related_securities" and isinstance(metric_value, list):
                    metric_value = [sec.get('security_id') for sec in metric_value if isinstance(sec, dict) and 'security_id' in sec]
                return metric_value
            except Exception as e:
                logger.error(f"Error fetching {metric} for {symbol}: {e}")
                return None
    return None

def process_fund(row):
    symbol, fund_type_id = row['Symbol'], row['Fund_Type_ID']
    data = {"symbol": symbol}
    
    # Check if symbol exists in DB
    with engine.connect() as conn:
        result = conn.execute(text("SELECT 1 FROM Funds_to_Screen WHERE SymbolCUSIP = :symbol"), {'symbol': symbol}).fetchone()
    
    if result:  # If exists, we might want to skip or update based on your requirements
        logger.info(f"Symbol {symbol} already exists in DB, skipping insert.")
        return {'symbol': symbol, 'status': 'exists'}  # or update code here
    
    for metric, _, api_type in METRICS:
        if api_type == "YCP":
            data[metric] = fetch_ycp_data_point(symbol, metric, fund_type_id)
        elif api_type == "YCI":
            data[metric] = fetch_yci_metric(symbol, fund_type_id, metric)
    
    # Prepare data for DB insertion, convert lists to JSON for 'related_securities'
    data = {k: json.dumps(v) if k == 'related_securities' and isinstance(v, list) else v for k, v in data.items()}
    
    # Insert or update in DB
    try:
        with engine.begin() as conn:
            columns = ', '.join(data.keys())
            placeholders = ', '.join(':' + k for k in data.keys())
            insert_sql = f"INSERT INTO Funds_to_Screen ({columns}) VALUES ({placeholders})"
            conn.execute(text(insert_sql), data)
        return {'symbol': symbol, 'status': 'inserted'}
    except Exception as e:
        logger.error(f"Database error for {symbol}: {e}")
        return {'symbol': symbol, 'status': 'error'}

def main():
    start_time = time.time()
    logger.info("Starting data fetch and insertion process.")

    # Load symbols from Excel - now using the actual headers from the Excel file
    df = pd.read_excel('C:\\Users\\JulianHeron\\fund_type_and_symbol.xlsx', usecols=["SymbolCUSIP", "Fund_Type_ID"])
    
    processed_count = 0
    error_count = 0
    with ThreadPoolExecutor(max_workers=20) as executor:
        futures = {executor.submit(process_fund, row): row for _, row in df.iterrows()}
        
        for future in as_completed(futures):
            try:
                result = future.result()
                if result['status'] == 'inserted':
                    processed_count += 1
                elif result['status'] == 'error':
                    error_count += 1
                logger.info(f"Processed {result['symbol']}: {result['status']}")
            except Exception as e:
                error_count += 1
                logger.error(f"Exception in processing {futures[future]['SymbolCUSIP']}: {e}")
    
    # Call the stored procedure here after all processing is done
    try:
        with engine.begin() as conn:
            conn.execute(text("EXEC UpdateDerivedMetrics"))
        logger.info("Successfully executed UpdateDerivedMetrics stored procedure.")
    except Exception as e:
        logger.error(f"Failed to execute UpdateDerivedMetrics stored procedure: {e}")

    total_time = time.time() - start_time
    logger.info(f"Completed processing. Processed: {processed_count}, Errors: {error_count}, Total Time: {total_time:.2f} seconds")

if __name__ == "__main__":
    main()

2025-02-13 15:59:20,180 - INFO - Starting data fetch and insertion process.
2025-02-13 15:59:20,572 - ERROR - Exception in processing ATHIX: 'Symbol'
2025-02-13 15:59:20,578 - ERROR - Exception in processing MRGR: 'Symbol'
2025-02-13 15:59:20,579 - ERROR - Exception in processing DEEF: 'Symbol'
2025-02-13 15:59:20,579 - ERROR - Exception in processing RAYJ: 'Symbol'
2025-02-13 15:59:20,581 - ERROR - Exception in processing CXSE: 'Symbol'
2025-02-13 15:59:20,582 - ERROR - Exception in processing PLDR: 'Symbol'
2025-02-13 15:59:20,582 - ERROR - Exception in processing CRFIX: 'Symbol'
2025-02-13 15:59:20,583 - ERROR - Exception in processing SMCF: 'Symbol'
2025-02-13 15:59:20,583 - ERROR - Exception in processing OILT: 'Symbol'
2025-02-13 15:59:20,583 - ERROR - Exception in processing TWGIX: 'Symbol'
2025-02-13 15:59:20,584 - ERROR - Exception in processing ARB: 'Symbol'
2025-02-13 15:59:20,584 - ERROR - Exception in processing GSIE: 'Symbol'
2025-02-13 15:59:20,586 - ERROR - Exception in

In [None]:
import pandas as pd
import sqlalchemy
from sqlalchemy import text
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import json
from tenacity import retry, stop_after_attempt, wait_exponential
import ssl
import certifi
from requests.adapters import HTTPAdapter
import requests

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[
    logging.StreamHandler()  # Log to console
])
logger = logging.getLogger(__name__)

# Database Connection Configuration
connection_string = (
    "mssql+pyodbc://JULIANS_LAPTOP\\SQLEXPRESS/"
    "CWA_Fund_Database?driver=ODBC+Driver+18+for+SQL+Server"
    "&trusted_connection=yes&TrustServerCertificate=yes"
)
engine = sqlalchemy.create_engine(
    connection_string,
    pool_size=50,  # Increased pool size to avoid timeouts
    max_overflow=100,  # Adjust based on your system capabilities
    pool_timeout=300  # Increased timeout to handle long-running queries
)

# YCharts API Configuration
YCHARTS_API_URL = "https://api.ycharts.com"
API_HEADERS = {
    "X-YCHARTSAUTHORIZATION": "yIIphqbsQysnTvWWxfW33w",  # Replace with your actual API key
    "X-YCHARTSEXCELSESSION": "b645cd897b2446bfa3796acfa3a879db",
    "X-YCHARTSEXCELVERSION": "4.4",
    "X-YCHARTSOPERATINGSYSTEM": "Microsoft Windows NT 10.0.26100.0",
    "Content-Type": "application/x-www-form-urlencoded"
}

# SSL Context for HTTPS connections
context = ssl.create_default_context(cafile=certifi.where())
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE  # Use CERT_REQUIRED in production

class CustomHTTPAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        kwargs['ssl_context'] = context
        return super().init_poolmanager(*args, **kwargs)

session = requests.Session()
session.mount('https://', CustomHTTPAdapter())

# Metrics mapping 
column_to_ycharts_mapping = [
    ("InceptionDate", "inception_date", "YCI"),
    ("EarliestPerformanceDate", "earliest_performance_date", "YCI"),
    ("LCG", "equity_stylebox_large_cap_growth_exposure", "YCP"),
    ("LCB", "equity_stylebox_large_cap_blend_exposure", "YCP"),
    ("LCV", "equity_stylebox_large_cap_value_exposure", "YCP"),
    ("MCG", "equity_stylebox_mid_cap_growth_exposure", "YCP"),
    ("MCB", "equity_stylebox_mid_cap_blend_exposure", "YCP"),
    ("MCV", "equity_stylebox_mid_cap_value_exposure", "YCP"),
    ("SCG", "equity_stylebox_small_cap_growth_exposure", "YCP"),
    ("SCB", "equity_stylebox_small_cap_blend_exposure", "YCP"),
    ("SCV", "equity_stylebox_small_cap_value_exposure", "YCP"),
    ("Giant_Cap_Exposure", "giant_cap_exposure", "YCP"),
    ("Large_Cap_Exposure", "large_cap_exposure", "YCP"),
    ("Medium_Cap_Exposure", "medium_cap_exposure", "YCP"),
    ("Micro_Cap_Exposure", "micro_cap_exposure", "YCP"),
    ("Sensitive_Exposure", "sensitive_exposure", "YCP"),
    ("Small_Cap_Exposure", "small_cap_exposure", "YCP"),
    ("Cyclical_Exposure", "cyclical_exposure", "YCP"),
    ("Defensive_Exposure", "defensive_exposure", "YCP"),
    ("AAA_Rated", "aaa_bond_exposure", "YCP"),
    ("AA_Rated", "aa_bond_exposure", "YCP"),
    ("A_Rated", "a_bond_exposure", "YCP"),
    ("BBB_Rated", "bbb_bond_exposure", "YCP"),
    ("B_Rated", "b_bond_exposure", "YCP"),
    ("BB_Rated", "bb_bond_exposure", "YCP"),
    ("Below_B_Rated", "below_b_bond_exposure", "YCP"),
    ("Not_Rated", "not_rated_bond_exposure", "YCP"),
    ("Term_10_15_Y", "10_to_15_years_maturity_bond_exposure", "YCP"),
    ("Term_1_3_Y", "1_to_3_years_maturity_bond_exposure", "YCP"),
    ("Term_15_20_Y", "15_to_20_years_maturity_bond_exposure", "YCP"),
    ("Term_1_7_D", "1_to_7_days_maturity_bond_exposure", "YCP"),
    ("Term_183_364_D", "183_to_364_days_maturity_bond_exposure", "YCP"),
    ("Term_20_30_Y", "20_to_30_years_maturity_bond_exposure", "YCP"),
    ("Term_30_Plus_Y", "over_30_years_maturity_bond_exposure", "YCP"),
    ("Term_31_90_D", "31_to_90_days_maturity_bond_exposure", "YCP"),
    ("Term_3_5_Y", "3_to_5_years_maturity_bond_exposure", "YCP"),
    ("Term_5_7_Y", "5_to_7_years_maturity_bond_exposure", "YCP"),
    ("Term_7_10_Y", "7_to_10_years_maturity_bond_exposure", "YCP"),
    ("Term_8_30_D", "8_to_30_days_maturity_bond_exposure", "YCP"),
    ("Term_91_182_D", "91_to_182_days_maturity_bond_exposure", "YCP"),
    ("LT_Exposure", "long_term_exposure", "YCP"),
    ("Interm_Exposure", "intermediate_term_exposure", "YCP"),
    ("ST_Exposure", "short_term_exposure", "YCP"),
    ("pct_top_10", "percent_of_assets_in_top_10_holdings", "YCP"),
    ("pct_top_25", "percent_of_assets_in_top_25_holdings", "YCP"),
    ("materials", "basic_materials_exposure", "YCP"),
    ("comsvc", "communication_services_exposure", "YCP"),
    ("concyc", "consumer_cyclical_exposure", "YCP"),
    ("condef", "consumer_defensive_exposure", "YCP"),
    ("energy", "energy_exposure", "YCP"),
    ("finsvc", "financial_services_exposure", "YCP"),
    ("healthcare", "healthcare_exposure", "YCP"),
    ("industrials", "industrials_exposure", "YCP"),
    ("realestate", "real_estate_exposure", "YCP"),
    ("technology", "technology_exposure", "YCP"),
    ("utilities", "utilities_exposure", "YCP"),
    ("relative_composition", "relative_composition", "YCP"),
    ("num_bonds", "number_of_bond_holdings", "YCP"),
    ("num_stocks", "number_of_stock_holdings", "YCP"),
    ("dev_mkt", "developed_market_exposure", "YCP"),
    ("em_mkt", "emerging_market_exposure", "YCP"),
    ("europe", "greater_europe_total_exposure", "YCP"),
    ("lat_america", "latin_america_total_exposure", "YCP"),
    ("namerica", "north_america_total_exposure", "YCP"),
    ("africa_mideast", "africa_middle_east_total_exposure", "YCP"),
    ("asia_em", "asia_emerging_total_exposure", "YCP"),
    ("asia_dev", "asia_developed_total_exposure", "YCP"),
    ("australia", "australasia_total_exposure", "YCP"),
    ("india", "india_total_exposure", "YCP"),
    ("w_avg_dc", "weighted_average_debt_to_capital", "YCP"),
    ("avg_mkt_cap", "average_market_cap", "YCP"),
    ("avg_pe_ratio", "weighted_average_pe_ratio", "YCP"),
    ("avg_pb_ratio", "weighted_average_price_to_book_ratio", "YCP"),
    ("avg_pcf_ratio", "weighted_average_price_to_cash_flow", "YCP"),
    ("avg_ps_ratio", "weighted_average_price_to_sales_ratio", "YCP"),
    ("avg_net_margin", "weighted_median_net_margin", "YCP"),
    ("avg_roa", "weighted_median_return_on_assets", "YCP"),
    ("avg_roe", "weighted_median_return_on_equity", "YCP"),
    ("preferreds", "preferred_stock_exposure", "YCP"),
    ("securitized", "securitized_fixed_income_exposure", "YCP"),
    ("muni_exempt", "municipal_tax_exempt_bond_exposure", "YCP"),
    ("muni_tax", "municipal_taxable_bond_exposure", "YCP"),
    ("muni_exposure", "municipal_fixed_income_exposure", "YCP"),
    ("corp_exposure", "corporate_bond_exposure", "YCP"),
    ("corp_fi", "corporate_fixed_income_exposure", "YCP"),
    ("govi_exp", "government_bond_exposure", "YCP"),
    ("govi_fi", "government_fixed_income_exposure", "YCP"),
    ("covered_bond", "covered_bond_exposure", "YCP"),
    ("gov_bond", "government_related_bond_exposure", "YCP"),
    ("converts", "convertible_bond_exposure", "YCP"),
    ("asset_backed_sec", "asset_backed_security_exposure", "YCP"),
    ("index_fund", "index_fund", "YCI"),
    ("inverse_fund", "inverse_fund", "YCI"),
    ("leveraged_fund", "leveraged_fund", "YCI"),
    ("socially_responsible_fund", "socially_responsible_fund", "YCI"),
    ("synthetic_replication_fund", "synthetic_replication_fund", "YCI"),
    ("aum_usd", "aum_usd", "YCI"),
    ("average_manager_tenure", "average_manager_tenure", "YCP"),
    ("max_manager_tenure", "max_manager_tenure", "YCP"),
    ("median_manager_tenure", "median_manager_tenure", "YCP"),
    ("min_manager_tenure", "min_manager_tenure", "YCP"),
    ("open_to_existing_investors", "open_to_existing_investors", "YCI"),
    ("open_to_new_investors", "open_to_new_investors", "YCI"),
    ("ycharts_url", "ycharts_url", "YCI"),
    ("years_since_inception", "years_since_inception", "YCP"),
    ("investment_strategy", "investment_strategy", "YCI"),
    ("related_securities", "related_securities", "YCI"), #returns both symbols and names, only need symbols
    ("fund_family", "fund_family", "YCI")
]

# Extract metrics that should be fetched
METRICS = [(col, code, api) for col, code, api in column_to_ycharts_mapping if api in ["YCP", "YCI"]]

# YCP Functions
def build_points_lines(symbol, yc_code, is_mutual):
    symbol_for_api = f"M:{symbol}" if is_mutual else symbol
    return f"{symbol_for_api},{yc_code}"

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def fetch_ycp_data_point(symbol, metric, fund_type_id):
    is_mutual = (fund_type_id == 3)
    for col, yc_code, api_type in METRICS:
        if col == metric and api_type == "YCP":
            points_line = build_points_lines(symbol, yc_code, is_mutual)
            payload = f"points={points_line}"
            api_url = f"{YCHARTS_API_URL}/v3/excel/points"
            try:
                response = session.post(api_url, headers=API_HEADERS, data=payload, timeout=60)
                response.raise_for_status()
                data = response.json()
                symbol_for_api = f"M:{symbol}" if is_mutual else symbol
                if 'response' in data and symbol_for_api in data['response']:
                    results = data['response'][symbol_for_api].get("results", {})
                    if yc_code in results:
                        datapoints = results[yc_code].get("data")
                        if datapoints and isinstance(datapoints, list):
                            last_point = datapoints[-1]
                            if isinstance(last_point, list) and len(last_point) > 1:
                                return last_point[1]
                logger.warning(f"No valid data returned for symbol '{symbol}' on metric '{metric}'")
                return None
            except requests.exceptions.RequestException as e:
                logger.error(f"API error for symbol '{symbol}' on metric '{metric}': {e}")
                return None
    return None

# YCI Functions
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def fetch_yci_metric(symbol, fund_type_id, metric):
    for col, yc_code, api_type in METRICS:
        if col == metric and api_type == "YCI":
            if fund_type_id == 3:
                symbol_for_api = f"M:{symbol}"
                endpoint = "mutual_funds"
            else:
                symbol_for_api = symbol
                endpoint = "companies"
            api_url = f"{YCHARTS_API_URL}/v3/{endpoint}/{symbol_for_api}/info/{yc_code}?retrieve_ttl=true"
            logger.info(f"Fetching {metric} for {symbol} using URL: {api_url}")
            try:
                response = session.get(api_url, headers=API_HEADERS, timeout=30)
                response.raise_for_status()
                data = response.json()
                response_key = f"M:{symbol}" if fund_type_id == 3 else symbol
                metric_value = data.get("response", {}).get(response_key, {}).get("results", {}).get(yc_code, {}).get("data")
                
                if metric == "related_securities" and isinstance(metric_value, list):
                    metric_value = [sec.get('security_id') for sec in metric_value if isinstance(sec, dict) and 'security_id' in sec]
                return metric_value
            except Exception as e:
                logger.error(f"Error fetching {metric} for {symbol}: {e}")
                return None
    return None

def process_fund(row):
    symbol, fund_type_id = row['SymbolCUSIP'], row['Fund_Type_ID']
    data = {"symbol": symbol}
    
    for metric, _, api_type in METRICS:
        if api_type == "YCP":
            data[metric] = fetch_ycp_data_point(symbol, metric, fund_type_id)
        elif api_type == "YCI":
            data[metric] = fetch_yci_metric(symbol, fund_type_id, metric)
    
    # Prepare data for DB update, convert lists to JSON for 'related_securities'
    data = {k: json.dumps(v) if k == 'related_securities' and isinstance(v, list) else v for k, v in data.items()}
    
    try:
        with engine.begin() as conn:
            # Update existing record
            update_sql = f"UPDATE Funds_to_Screen SET {', '.join([f'{col} = :{col}' for col in data.keys() if col != 'symbol'])} WHERE SymbolCUSIP = :symbol"
            result = conn.execute(text(update_sql), data)
            
            # Check if any rows were affected by the UPDATE operation
            if result.rowcount > 0:
                logger.info(f"Updated metrics for symbol {symbol}")
                return {'symbol': symbol, 'status': 'updated'}
            else:
                logger.warning(f"No rows updated for symbol {symbol}, symbol might not exist in the database")
                return {'symbol': symbol, 'status': 'not_updated'}

    except Exception as e:
        logger.error(f"Database error for {symbol}: {e}")
        return {'symbol': symbol, 'status': 'error'}

def main():
    start_time = time.time()
    logger.info("Starting data fetch and update process.")

    # Load symbols from Excel
    df = pd.read_excel('C:\\Users\\JulianHeron\\fund_type_and_symbol.xlsx', usecols=["SymbolCUSIP", "Fund_Type_ID"])
    
    processed_count = 0
    error_count = 0
    not_updated_count = 0
    with ThreadPoolExecutor(max_workers=20) as executor:
        futures = {executor.submit(process_fund, row): row for _, row in df.iterrows()}
        
        for future in as_completed(futures):
            try:
                result = future.result()
                if result['status'] == 'updated':
                    processed_count += 1
                elif result['status'] == 'not_updated':
                    not_updated_count += 1
                elif result['status'] == 'error':
                    error_count += 1
                logger.info(f"Processed {result['symbol']}: {result['status']}")
            except Exception as e:
                error_count += 1
                logger.error(f"Exception in processing {futures[future]['SymbolCUSIP']}: {e}")
    
    # Call the stored procedure here after all processing is done
    try:
        with engine.begin() as conn:
            conn.execute(text("EXEC UpdateDerivedMetrics"))
        logger.info("Successfully executed UpdateDerivedMetrics stored procedure.")
    except Exception as e:
        logger.error(f"Failed to execute UpdateDerivedMetrics stored procedure: {e}")

    total_time = time.time() - start_time
    logger.info(f"Completed processing. Updated: {processed_count}, Not Updated: {not_updated_count}, Errors: {error_count}, Total Time: {total_time:.2f} seconds")

if __name__ == "__main__":
    main()

2025-02-13 16:11:39,552 - INFO - Starting data fetch and update process.
2025-02-13 16:11:39,636 - INFO - Fetching InceptionDate for MDIV using URL: https://api.ycharts.com/v3/companies/MDIV/info/inception_date?retrieve_ttl=true
2025-02-13 16:11:39,638 - INFO - Fetching InceptionDate for ROMO using URL: https://api.ycharts.com/v3/companies/ROMO/info/inception_date?retrieve_ttl=true
2025-02-13 16:11:39,654 - INFO - Fetching InceptionDate for RISN using URL: https://api.ycharts.com/v3/companies/RISN/info/inception_date?retrieve_ttl=true
2025-02-13 16:11:39,655 - INFO - Fetching InceptionDate for MAPP using URL: https://api.ycharts.com/v3/companies/MAPP/info/inception_date?retrieve_ttl=true
2025-02-13 16:11:39,657 - INFO - Fetching InceptionDate for GAL using URL: https://api.ycharts.com/v3/companies/GAL/info/inception_date?retrieve_ttl=true
2025-02-13 16:11:39,659 - INFO - Fetching InceptionDate for AOA using URL: https://api.ycharts.com/v3/companies/AOA/info/inception_date?retrieve_ttl=

In [1]:
import pandas as pd
import sqlalchemy
from sqlalchemy import text
import logging
import time
import json
from tenacity import retry, stop_after_attempt, wait_exponential
import ssl
import certifi
from requests.adapters import HTTPAdapter
import requests

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[
    logging.StreamHandler()  # Log to console
])
logger = logging.getLogger(__name__)

# Database Connection Configuration
connection_string = (
    "mssql+pyodbc://JULIANS_LAPTOP\\SQLEXPRESS/"
    "CWA_Fund_Database?driver=ODBC+Driver+18+for+SQL+Server"
    "&trusted_connection=yes&TrustServerCertificate=yes"
)
engine = sqlalchemy.create_engine(
    connection_string,
    pool_size=50,  # Increased pool size to avoid timeouts
    max_overflow=100,  # Adjust based on your system capabilities
    pool_timeout=300  # Increased timeout to handle long-running queries
)

# YCharts API Configuration
YCHARTS_API_URL = "https://api.ycharts.com"
API_HEADERS = {
    "X-YCHARTSAUTHORIZATION": "yIIphqbsQysnTvWWxfW33w",  # Replace with your actual API key
    "X-YCHARTSEXCELSESSION": "b645cd897b2446bfa3796acfa3a879db",
    "X-YCHARTSEXCELVERSION": "4.4",
    "X-YCHARTSOPERATINGSYSTEM": "Microsoft Windows NT 10.0.26100.0",
    "Content-Type": "application/x-www-form-urlencoded"
}

# SSL Context for HTTPS connections
context = ssl.create_default_context(cafile=certifi.where())
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE  # Use CERT_REQUIRED in production

class CustomHTTPAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        kwargs['ssl_context'] = context
        return super().init_poolmanager(*args, **kwargs)

session = requests.Session()
session.mount('https://', CustomHTTPAdapter())

# Metrics mapping 
column_to_ycharts_mapping = [
    # ... (all your mappings here as in the previous code blocks)
]

# Extract metrics that should be fetched
METRICS = [(col, code, api) for col, code, api in column_to_ycharts_mapping if api in ["YCP", "YCI"]]

# YCP Functions
def build_points_lines(symbol, yc_code, is_mutual):
    symbol_for_api = f"M:{symbol}" if is_mutual else symbol
    return f"{symbol_for_api},{yc_code}"

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def fetch_ycp_data_point(symbol, metric, fund_type_id):
    is_mutual = (fund_type_id == 3)
    for col, yc_code, api_type in METRICS:
        if col == metric and api_type == "YCP":
            points_line = build_points_lines(symbol, yc_code, is_mutual)
            payload = f"points={points_line}"
            api_url = f"{YCHARTS_API_URL}/v3/excel/points"
            try:
                response = session.post(api_url, headers=API_HEADERS, data=payload, timeout=60)
                response.raise_for_status()
                data = response.json()
                symbol_for_api = f"M:{symbol}" if is_mutual else symbol
                if 'response' in data and symbol_for_api in data['response']:
                    results = data['response'][symbol_for_api].get("results", {})
                    if yc_code in results:
                        datapoints = results[yc_code].get("data")
                        if datapoints and isinstance(datapoints, list) and len(datapoints) > 0:
                            last_point = datapoints[-1]
                            if isinstance(last_point, list) and len(last_point) > 1:
                                return last_point[1]
                logger.warning(f"No valid data returned for symbol '{symbol}' on metric '{metric}'")
                return None
            except requests.exceptions.RequestException as e:
                logger.error(f"API error for symbol '{symbol}' on metric '{metric}': {e}")
                return None
    return None

# YCI Functions
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def fetch_yci_metric(symbol, fund_type_id, metric):
    for col, yc_code, api_type in METRICS:
        if col == metric and api_type == "YCI":
            if fund_type_id == 3:
                symbol_for_api = f"M:{symbol}"
                endpoint = "mutual_funds"
            else:
                symbol_for_api = symbol
                endpoint = "companies"
            api_url = f"{YCHARTS_API_URL}/v3/{endpoint}/{symbol_for_api}/info/{yc_code}?retrieve_ttl=true"
            logger.info(f"Fetching {metric} for {symbol} using URL: {api_url}")
            try:
                response = session.get(api_url, headers=API_HEADERS, timeout=30)
                response.raise_for_status()
                data = response.json()
                response_key = f"M:{symbol}" if fund_type_id == 3 else symbol
                metric_value = data.get("response", {}).get(response_key, {}).get("results", {}).get(yc_code, {}).get("data")
                
                if metric == "related_securities" and isinstance(metric_value, list):
                    metric_value = [sec.get('security_id') for sec in metric_value if isinstance(sec, dict) and 'security_id' in sec]
                
                if metric_value is None:
                    logger.warning(f"No valid data returned for symbol '{symbol}' on metric '{metric}'")
                    return None
                return metric_value
            except Exception as e:
                logger.error(f"Error fetching {metric} for {symbol}: {e}")
                return None
    return None

def process_symbol(symbol, fund_type_id):
    data = {"symbol": symbol}
    for metric, _, api_type in METRICS:
        if api_type == "YCP":
            data[metric] = fetch_ycp_data_point(symbol, metric, fund_type_id)
        elif api_type == "YCI":
            data[metric] = fetch_yci_metric(symbol, fund_type_id, metric)
        
        # Log the result of each metric fetch
        if data[metric] is None:
            logger.warning(f"No data for {symbol} on {metric}")
        else:
            logger.info(f"{symbol} - {metric}: {data[metric]}")

    # Update or insert data into the database
    try:
        with engine.begin() as conn:
            # Check if symbol exists, then update or insert
            result = conn.execute(text("SELECT 1 FROM Funds_to_Screen WHERE SymbolCUSIP = :symbol"), {'symbol': symbol}).fetchone()
            if result:
                update_sql = f"UPDATE Funds_to_Screen SET {', '.join([f'{col} = :{col}' for col in data.keys() if col != 'symbol'])} WHERE SymbolCUSIP = :symbol"
                conn.execute(text(update_sql), data)
                logger.info(f"Updated data for {symbol}")
            else:
                insert_sql = f"INSERT INTO Funds_to_Screen ({', '.join(data.keys())}) VALUES ({', '.join([':' + k for k in data.keys()])})"
                conn.execute(text(insert_sql), data)
                logger.info(f"Inserted data for {symbol}")
    except Exception as e:
        logger.error(f"Database error for {symbol}: {e}")

def main():
    start_time = time.time()
    logger.info("Starting data fetch and processing one symbol at a time.")

    # Load symbols from Excel
    df = pd.read_excel('C:\\Users\\JulianHeron\\fund_type_and_symbol.xlsx', usecols=["SymbolCUSIP", "Fund_Type_ID"])
    
    processed_count = 0
    error_count = 0
    for _, row in df.iterrows():
        symbol, fund_type_id = row['SymbolCUSIP'], row['Fund_Type_ID']
        try:
            process_symbol(symbol, fund_type_id)
            processed_count += 1
        except Exception as e:
            logger.error(f"Unhandled error processing {symbol}: {e}")
            error_count += 1

    # Call the stored procedure here after all processing is done
    try:
        with engine.begin() as conn:
            conn.execute(text("EXEC UpdateDerivedMetrics"))
        logger.info("Successfully executed UpdateDerivedMetrics stored procedure.")
    except Exception as e:
        logger.error(f"Failed to execute UpdateDerivedMetrics stored procedure: {e}")

    total_time = time.time() - start_time
    logger.info(f"Completed processing. Processed: {processed_count}, Errors: {error_count}, Total Time: {total_time:.2f} seconds")

if __name__ == "__main__":
    main()

SyntaxError: unterminated string literal (detected at line 72) (122854328.py, line 72)

In [None]:
import pandas as pd
import sqlalchemy
from sqlalchemy import text
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import json
from tenacity import retry, stop_after_attempt, wait_exponential
import ssl
import certifi
from requests.adapters import HTTPAdapter
import requests

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[
    logging.StreamHandler()  # Log to console
])
logger = logging.getLogger(__name__)

# Database Connection Configuration
connection_string = (
    "mssql+pyodbc://JULIANS_LAPTOP\\SQLEXPRESS/"
    "CWA_Fund_Database?driver=ODBC+Driver+18+for+SQL+Server"
    "&trusted_connection=yes&TrustServerCertificate=yes"
)
engine = sqlalchemy.create_engine(
    connection_string,
    pool_size=50,  # Increased pool size to avoid timeouts
    max_overflow=100,  # Adjust based on your system capabilities
    pool_timeout=300  # Increased timeout to handle long-running queries
)

# YCharts API Configuration
YCHARTS_API_URL = "https://api.ycharts.com"
API_HEADERS = {
    "X-YCHARTSAUTHORIZATION": "yIIphqbsQysnTvWWxfW33w",  # Replace with your actual API key
    "X-YCHARTSEXCELSESSION": "b645cd897b2446bfa3796acfa3a879db",
    "X-YCHARTSEXCELVERSION": "4.4",
    "X-YCHARTSOPERATINGSYSTEM": "Microsoft Windows NT 10.0.26100.0",
    "Content-Type": "application/x-www-form-urlencoded"
}

# SSL Context for HTTPS connections
context = ssl.create_default_context(cafile=certifi.where())
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE  # Use CERT_REQUIRED in production

class CustomHTTPAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        kwargs['ssl_context'] = context
        return super().init_poolmanager(*args, **kwargs)

session = requests.Session()
session.mount('https://', CustomHTTPAdapter())

# Metrics mapping 
column_to_ycharts_mapping = [
    ("InceptionDate", "inception_date", "YCI"),
    ("EarliestPerformanceDate", "earliest_performance_date", "YCI"),
    ("LCG", "equity_stylebox_large_cap_growth_exposure", "YCP"),
    ("LCB", "equity_stylebox_large_cap_blend_exposure", "YCP"),
    ("LCV", "equity_stylebox_large_cap_value_exposure", "YCP"),
    ("MCG", "equity_stylebox_mid_cap_growth_exposure", "YCP"),
    ("MCB", "equity_stylebox_mid_cap_blend_exposure", "YCP"),
    ("MCV", "equity_stylebox_mid_cap_value_exposure", "YCP"),
    ("SCG", "equity_stylebox_small_cap_growth_exposure", "YCP"),
    ("SCB", "equity_stylebox_small_cap_blend_exposure", "YCP"),
    ("SCV", "equity_stylebox_small_cap_value_exposure", "YCP"),
    ("Giant_Cap_Exposure", "giant_cap_exposure", "YCP"),
    ("Large_Cap_Exposure", "large_cap_exposure", "YCP"),
    ("Medium_Cap_Exposure", "medium_cap_exposure", "YCP"),
    ("Micro_Cap_Exposure", "micro_cap_exposure", "YCP"),
    ("Sensitive_Exposure", "sensitive_exposure", "YCP"),
    ("Small_Cap_Exposure", "small_cap_exposure", "YCP"),
    ("Cyclical_Exposure", "cyclical_exposure", "YCP"),
    ("Defensive_Exposure", "defensive_exposure", "YCP"),
    ("AAA_Rated", "aaa_bond_exposure", "YCP"),
    ("AA_Rated", "aa_bond_exposure", "YCP"),
    ("A_Rated", "a_bond_exposure", "YCP"),
    ("BBB_Rated", "bbb_bond_exposure", "YCP"),
    ("B_Rated", "b_bond_exposure", "YCP"),
    ("BB_Rated", "bb_bond_exposure", "YCP"),
    ("Below_B_Rated", "below_b_bond_exposure", "YCP"),
    ("Not_Rated", "not_rated_bond_exposure", "YCP"),
    ("Term_10_15_Y", "10_to_15_years_maturity_bond_exposure", "YCP"),
    ("Term_1_3_Y", "1_to_3_years_maturity_bond_exposure", "YCP"),
    ("Term_15_20_Y", "15_to_20_years_maturity_bond_exposure", "YCP"),
    ("Term_1_7_D", "1_to_7_days_maturity_bond_exposure", "YCP"),
    ("Term_183_364_D", "183_to_364_days_maturity_bond_exposure", "YCP"),
    ("Term_20_30_Y", "20_to_30_years_maturity_bond_exposure", "YCP"),
    ("Term_30_Plus_Y", "over_30_years_maturity_bond_exposure", "YCP"),
    ("Term_31_90_D", "31_to_90_days_maturity_bond_exposure", "YCP"),
    ("Term_3_5_Y", "3_to_5_years_maturity_bond_exposure", "YCP"),
    ("Term_5_7_Y", "5_to_7_years_maturity_bond_exposure", "YCP"),
    ("Term_7_10_Y", "7_to_10_years_maturity_bond_exposure", "YCP"),
    ("Term_8_30_D", "8_to_30_days_maturity_bond_exposure", "YCP"),
    ("Term_91_182_D", "91_to_182_days_maturity_bond_exposure", "YCP"),
    ("LT_Exposure", "long_term_exposure", "YCP"),
    ("Interm_Exposure", "intermediate_term_exposure", "YCP"),
    ("ST_Exposure", "short_term_exposure", "YCP"),
    ("pct_top_10", "percent_of_assets_in_top_10_holdings", "YCP"),
    ("pct_top_25", "percent_of_assets_in_top_25_holdings", "YCP"),
    ("materials", "basic_materials_exposure", "YCP"),
    ("comsvc", "communication_services_exposure", "YCP"),
    ("concyc", "consumer_cyclical_exposure", "YCP"),
    ("condef", "consumer_defensive_exposure", "YCP"),
    ("energy", "energy_exposure", "YCP"),
    ("finsvc", "financial_services_exposure", "YCP"),
    ("healthcare", "healthcare_exposure", "YCP"),
    ("industrials", "industrials_exposure", "YCP"),
    ("realestate", "real_estate_exposure", "YCP"),
    ("technology", "technology_exposure", "YCP"),
    ("utilities", "utilities_exposure", "YCP"),
    ("relative_composition", "relative_composition", "YCP"),
    ("num_bonds", "number_of_bond_holdings", "YCP"),
    ("num_stocks", "number_of_stock_holdings", "YCP"),
    ("dev_mkt", "developed_market_exposure", "YCP"),
    ("em_mkt", "emerging_market_exposure", "YCP"),
    ("europe", "greater_europe_total_exposure", "YCP"),
    ("lat_america", "latin_america_total_exposure", "YCP"),
    ("namerica", "north_america_total_exposure", "YCP"),
    ("africa_mideast", "africa_middle_east_total_exposure", "YCP"),
    ("asia_em", "asia_emerging_total_exposure", "YCP"),
    ("asia_dev", "asia_developed_total_exposure", "YCP"),
    ("australia", "australasia_total_exposure", "YCP"),
    ("india", "india_total_exposure", "YCP"),
    ("w_avg_dc", "weighted_average_debt_to_capital", "YCP"),
    ("avg_mkt_cap", "average_market_cap", "YCP"),
    ("avg_pe_ratio", "weighted_average_pe_ratio", "YCP"),
    ("avg_pb_ratio", "weighted_average_price_to_book_ratio", "YCP"),
    ("avg_pcf_ratio", "weighted_average_price_to_cash_flow", "YCP"),
    ("avg_ps_ratio", "weighted_average_price_to_sales_ratio", "YCP"),
    ("avg_net_margin", "weighted_median_net_margin", "YCP"),
    ("avg_roa", "weighted_median_return_on_assets", "YCP"),
    ("avg_roe", "weighted_median_return_on_equity", "YCP"),
    ("preferreds", "preferred_stock_exposure", "YCP"),
    ("securitized", "securitized_fixed_income_exposure", "YCP"),
    ("muni_exempt", "municipal_tax_exempt_bond_exposure", "YCP"),
    ("muni_tax", "municipal_taxable_bond_exposure", "YCP"),
    ("muni_exposure", "municipal_fixed_income_exposure", "YCP"),
    ("corp_exposure", "corporate_bond_exposure", "YCP"),
    ("corp_fi", "corporate_fixed_income_exposure", "YCP"),
    ("govi_exp", "government_bond_exposure", "YCP"),
    ("govi_fi", "government_fixed_income_exposure", "YCP"),
    ("covered_bond", "covered_bond_exposure", "YCP"),
    ("gov_bond", "government_related_bond_exposure", "YCP"),
    ("converts", "convertible_bond_exposure", "YCP"),
    ("asset_backed_sec", "asset_backed_security_exposure", "YCP"),
    ("index_fund", "index_fund", "YCI"),
    ("inverse_fund", "inverse_fund", "YCI"),
    ("leveraged_fund", "leveraged_fund", "YCI"),
    ("socially_responsible_fund", "socially_responsible_fund", "YCI"),
    ("synthetic_replication_fund", "synthetic_replication_fund", "YCI"),
    ("aum_usd", "aum_usd", "YCI"),
    ("average_manager_tenure", "average_manager_tenure", "YCP"),
    ("max_manager_tenure", "max_manager_tenure", "YCP"),
    ("median_manager_tenure", "median_manager_tenure", "YCP"),
    ("min_manager_tenure", "min_manager_tenure", "YCP"),
    ("open_to_existing_investors", "open_to_existing_investors", "YCI"),
    ("open_to_new_investors", "open_to_new_investors", "YCI"),
    ("ycharts_url", "ycharts_url", "YCI"),
    ("years_since_inception", "years_since_inception", "YCP"),
    ("investment_strategy", "investment_strategy", "YCI"),
    ("related_securities", "related_securities", "YCI"), #returns both symbols and names, only need symbols
    ("fund_family", "fund_family", "YCI")
]

# Extract metrics that should be fetched
METRICS = [(col, code, api) for col, code, api in column_to_ycharts_mapping if api in ["YCP", "YCI"]]

# YCP Functions
def build_points_lines(symbol, yc_code, is_mutual):
    symbol_for_api = f"M:{symbol}" if is_mutual else symbol
    return f"{symbol_for_api},{yc_code}"

def fetch_ycp_data_point(symbol, metric, is_mutual):
    symbol_for_api = f"M:{symbol}" if is_mutual else symbol
    points_line = f"{symbol_for_api},{metric}"
    payload = f"points={points_line}"
    api_url = f"{YCHARTS_API_URL}/v3/excel/points"
    
    try:
        response = session.post(api_url, headers=API_HEADERS, data=payload, timeout=60)
        response.raise_for_status()
        data = response.json()
        logger.info(f"API response for {symbol} on {metric}: {data}")

        if 'response' in data and symbol_for_api in data['response']:
            results = data['response'][symbol_for_api].get("results", {})
            if metric in results and "" in results[metric]:
                datapoints = results[metric][""].get("results")
                if datapoints and isinstance(datapoints, list) and len(datapoints) > 0:
                    # Assuming the last element in the list contains the latest data
                    last_point = datapoints[-1]
                    if isinstance(last_point, list) and len(last_point) > 1:
                        value = last_point[1]
                        if value is not None:
                            return value
                        else:
                            logger.warning(f"Data returned for {symbol} on {metric} is None")
                    else:
                        logger.warning(f"Unexpected data format for {symbol} on {metric}")
                else:
                    logger.warning(f"No valid data points for {symbol} on {metric}")
            else:
                logger.warning(f"Metric '{metric}' not found or empty for {symbol}")
        else:
            logger.error(f"API response does not contain expected data for '{symbol}' on metric '{metric}': {data}")
        return None
    except requests.RequestException as e:
        logger.error(f"API error for symbol '{symbol}' on metric '{metric}': {e}")
        return None

# YCI Functions
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def fetch_yci_metric(symbol, fund_type_id, metric):
    for col, yc_code, api_type in METRICS:
        if col == metric and api_type == "YCI":
            if fund_type_id == 3:
                symbol_for_api = f"M:{symbol}"
                endpoint = "mutual_funds"
            else:
                symbol_for_api = symbol
                endpoint = "companies"
            api_url = f"{YCHARTS_API_URL}/v3/{endpoint}/{symbol_for_api}/info/{yc_code}?retrieve_ttl=true"
            logger.info(f"Fetching {metric} for {symbol} using URL: {api_url}")
            try:
                response = session.get(api_url, headers=API_HEADERS, timeout=30)
                response.raise_for_status()
                data = response.json()
                logger.debug(f"API response for {symbol} on {yc_code}: {data}")
                
                response_key = f"M:{symbol}" if fund_type_id == 3 else symbol
                metric_value = data.get("response", {}).get(response_key, {}).get("results", {}).get(yc_code, {}).get("data")
                
                if metric == "related_securities" and isinstance(metric_value, list):
                    metric_value = [sec.get('security_id') for sec in metric_value if isinstance(sec, dict) and 'security_id' in sec]
                return metric_value
            except Exception as e:
                logger.error(f"Error fetching {metric} for {symbol}: {e}")
                return None
    return None

def process_fund(row):
    symbol, fund_type_id = row['SymbolCUSIP'], row['Fund_Type_ID']
    data = {"symbol": symbol}
    
    for metric, _, api_type in METRICS:
        if api_type == "YCP":
            data[metric] = fetch_ycp_data_point(symbol, metric, fund_type_id)
        elif api_type == "YCI":
            data[metric] = fetch_yci_metric(symbol, fund_type_id, metric)
    
    # Prepare data for DB update, convert lists to JSON for 'related_securities'
    data = {k: json.dumps(v) if k == 'related_securities' and isinstance(v, list) else v for k, v in data.items()}
    
    try:
        with engine.begin() as conn:
            # Update existing record
            update_sql = f"UPDATE Funds_to_Screen SET {', '.join([f'{col} = :{col}' for col in data.keys() if col != 'symbol'])} WHERE SymbolCUSIP = :symbol"
            result = conn.execute(text(update_sql), data)
            
            # Check if any rows were affected by the UPDATE operation
            if result.rowcount > 0:
                logger.info(f"Updated metrics for symbol {symbol}")
                return {'symbol': symbol, 'status': 'updated'}
            else:
                logger.warning(f"No rows updated for symbol {symbol}, symbol might not exist in the database")
                return {'symbol': symbol, 'status': 'not_updated'}

    except Exception as e:
        logger.error(f"Database error for {symbol}: {e}")
        return {'symbol': symbol, 'status': 'error'}

def main():
    start_time = time.time()
    logger.info("Starting data fetch and update process.")

    # Load symbols from Excel
    df = pd.read_excel('C:\\Users\\JulianHeron\\fund_type_and_symbol.xlsx', usecols=["SymbolCUSIP", "Fund_Type_ID"])
    
    processed_count = 0
    error_count = 0
    not_updated_count = 0
    with ThreadPoolExecutor(max_workers=20) as executor:
        futures = {executor.submit(process_fund, row): row for _, row in df.iterrows()}
        
        for future in as_completed(futures):
            try:
                result = future.result()
                if result['status'] == 'updated':
                    processed_count += 1
                elif result['status'] == 'not_updated':
                    not_updated_count += 1
                elif result['status'] == 'error':
                    error_count += 1
                logger.info(f"Processed {result['symbol']}: {result['status']}")
            except Exception as e:
                error_count += 1
                logger.error(f"Exception in processing {futures[future]['SymbolCUSIP']}: {e}")
    
    # Call the stored procedure here after all processing is done
    try:
        with engine.begin() as conn:
            conn.execute(text("EXEC UpdateDerivedMetrics"))
        logger.info("Successfully executed UpdateDerivedMetrics stored procedure.")
    except Exception as e:
        logger.error(f"Failed to execute UpdateDerivedMetrics stored procedure: {e}")

    total_time = time.time() - start_time
    logger.info(f"Completed processing. Updated: {processed_count}, Not Updated: {not_updated_count}, Errors: {error_count}, Total Time: {total_time:.2f} seconds")

if __name__ == "__main__":
    main()

2025-02-13 16:43:37,756 - INFO - Starting data fetch and update process.
2025-02-13 16:43:38,072 - INFO - Fetching InceptionDate for MDIV using URL: https://api.ycharts.com/v3/companies/MDIV/info/inception_date?retrieve_ttl=true
2025-02-13 16:43:38,074 - INFO - Fetching InceptionDate for ROMO using URL: https://api.ycharts.com/v3/companies/ROMO/info/inception_date?retrieve_ttl=true
2025-02-13 16:43:38,074 - INFO - Fetching InceptionDate for RISN using URL: https://api.ycharts.com/v3/companies/RISN/info/inception_date?retrieve_ttl=true
2025-02-13 16:43:38,074 - INFO - Fetching InceptionDate for MAPP using URL: https://api.ycharts.com/v3/companies/MAPP/info/inception_date?retrieve_ttl=true
2025-02-13 16:43:38,077 - INFO - Fetching InceptionDate for GAL using URL: https://api.ycharts.com/v3/companies/GAL/info/inception_date?retrieve_ttl=true
2025-02-13 16:43:38,079 - INFO - Fetching InceptionDate for AOA using URL: https://api.ycharts.com/v3/companies/AOA/info/inception_date?retrieve_ttl=

In [4]:
import requests
import logging
import json

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[
    logging.StreamHandler()  # Log to console
])
logger = logging.getLogger(__name__)

# Disable SSL certificate verification (for debugging only)
requests.packages.urllib3.disable_warnings()
session = requests.Session()
session.verify = False  # This disables SSL certificate verification

# YCharts API Configuration
YCHARTS_API_URL = "https://api.ycharts.com"
API_HEADERS = {
    "X-YCHARTSAUTHORIZATION": "yIIphqbsQysnTvWWxfW33w",  # Replace with your actual API key
    "X-YCHARTSEXCELSESSION": "b645cd897b2446bfa3796acfa3a879db",
    "X-YCHARTSEXCELVERSION": "4.4",
    "X-YCHARTSOPERATINGSYSTEM": "Microsoft Windows NT 10.0.26100.0",
    "Content-Type": "application/x-www-form-urlencoded"
}

# Metrics mapping 
column_to_ycharts_mapping = [
    ("InceptionDate", "inception_date", "YCI"),
    ("EarliestPerformanceDate", "earliest_performance_date", "YCI"),
    ("LCG", "equity_stylebox_large_cap_growth_exposure", "YCP"),
    ("LCB", "equity_stylebox_large_cap_blend_exposure", "YCP"),
    ("LCV", "equity_stylebox_large_cap_value_exposure", "YCP"),
    ("MCG", "equity_stylebox_mid_cap_growth_exposure", "YCP"),
    ("MCB", "equity_stylebox_mid_cap_blend_exposure", "YCP"),
    ("MCV", "equity_stylebox_mid_cap_value_exposure", "YCP"),
    ("SCG", "equity_stylebox_small_cap_growth_exposure", "YCP"),
    ("SCB", "equity_stylebox_small_cap_blend_exposure", "YCP"),
    ("SCV", "equity_stylebox_small_cap_value_exposure", "YCP"),
    ("Giant_Cap_Exposure", "giant_cap_exposure", "YCP"),
    ("Large_Cap_Exposure", "large_cap_exposure", "YCP"),
    ("Medium_Cap_Exposure", "medium_cap_exposure", "YCP"),
    ("Micro_Cap_Exposure", "micro_cap_exposure", "YCP"),
    ("Sensitive_Exposure", "sensitive_exposure", "YCP"),
    ("Small_Cap_Exposure", "small_cap_exposure", "YCP"),
    ("Cyclical_Exposure", "cyclical_exposure", "YCP"),
    ("Defensive_Exposure", "defensive_exposure", "YCP"),
    ("AAA_Rated", "aaa_bond_exposure", "YCP"),
    ("AA_Rated", "aa_bond_exposure", "YCP"),
    ("A_Rated", "a_bond_exposure", "YCP"),
    ("BBB_Rated", "bbb_bond_exposure", "YCP"),
    ("B_Rated", "b_bond_exposure", "YCP"),
    ("BB_Rated", "bb_bond_exposure", "YCP"),
    ("Below_B_Rated", "below_b_bond_exposure", "YCP"),
    ("Not_Rated", "not_rated_bond_exposure", "YCP"),
    ("Term_10_15_Y", "10_to_15_years_maturity_bond_exposure", "YCP"),
    ("Term_1_3_Y", "1_to_3_years_maturity_bond_exposure", "YCP"),
    ("Term_15_20_Y", "15_to_20_years_maturity_bond_exposure", "YCP"),
    ("Term_1_7_D", "1_to_7_days_maturity_bond_exposure", "YCP"),
    ("Term_183_364_D", "183_to_364_days_maturity_bond_exposure", "YCP"),
    ("Term_20_30_Y", "20_to_30_years_maturity_bond_exposure", "YCP"),
    ("Term_30_Plus_Y", "over_30_years_maturity_bond_exposure", "YCP"),
    ("Term_31_90_D", "31_to_90_days_maturity_bond_exposure", "YCP"),
    ("Term_3_5_Y", "3_to_5_years_maturity_bond_exposure", "YCP"),
    ("Term_5_7_Y", "5_to_7_years_maturity_bond_exposure", "YCP"),
    ("Term_7_10_Y", "7_to_10_years_maturity_bond_exposure", "YCP"),
    ("Term_8_30_D", "8_to_30_days_maturity_bond_exposure", "YCP"),
    ("Term_91_182_D", "91_to_182_days_maturity_bond_exposure", "YCP"),
    ("LT_Exposure", "long_term_exposure", "YCP"),
    ("Interm_Exposure", "intermediate_term_exposure", "YCP"),
    ("ST_Exposure", "short_term_exposure", "YCP"),
    ("pct_top_10", "percent_of_assets_in_top_10_holdings", "YCP"),
    ("pct_top_25", "percent_of_assets_in_top_25_holdings", "YCP"),
    ("materials", "basic_materials_exposure", "YCP"),
    ("comsvc", "communication_services_exposure", "YCP"),
    ("concyc", "consumer_cyclical_exposure", "YCP"),
    ("condef", "consumer_defensive_exposure", "YCP"),
    ("energy", "energy_exposure", "YCP"),
    ("finsvc", "financial_services_exposure", "YCP"),
    ("healthcare", "healthcare_exposure", "YCP"),
    ("industrials", "industrials_exposure", "YCP"),
    ("realestate", "real_estate_exposure", "YCP"),
    ("technology", "technology_exposure", "YCP"),
    ("utilities", "utilities_exposure", "YCP"),
    ("relative_composition", "relative_composition", "YCP"),
    ("num_bonds", "number_of_bond_holdings", "YCP"),
    ("num_stocks", "number_of_stock_holdings", "YCP"),
    ("dev_mkt", "developed_market_exposure", "YCP"),
    ("em_mkt", "emerging_market_exposure", "YCP"),
    ("europe", "greater_europe_total_exposure", "YCP"),
    ("lat_america", "latin_america_total_exposure", "YCP"),
    ("namerica", "north_america_total_exposure", "YCP"),
    ("africa_mideast", "africa_middle_east_total_exposure", "YCP"),
    ("asia_em", "asia_emerging_total_exposure", "YCP"),
    ("asia_dev", "asia_developed_total_exposure", "YCP"),
    ("australia", "australasia_total_exposure", "YCP"),
    ("india", "india_total_exposure", "YCP"),
    ("w_avg_dc", "weighted_average_debt_to_capital", "YCP"),
    ("avg_mkt_cap", "average_market_cap", "YCP"),
    ("avg_pe_ratio", "weighted_average_pe_ratio", "YCP"),
    ("avg_pb_ratio", "weighted_average_price_to_book_ratio", "YCP"),
    ("avg_pcf_ratio", "weighted_average_price_to_cash_flow", "YCP"),
    ("avg_ps_ratio", "weighted_average_price_to_sales_ratio", "YCP"),
    ("avg_net_margin", "weighted_median_net_margin", "YCP"),
    ("avg_roa", "weighted_median_return_on_assets", "YCP"),
    ("avg_roe", "weighted_median_return_on_equity", "YCP"),
    ("preferreds", "preferred_stock_exposure", "YCP"),
    ("securitized", "securitized_fixed_income_exposure", "YCP"),
    ("muni_exempt", "municipal_tax_exempt_bond_exposure", "YCP"),
    ("muni_tax", "municipal_taxable_bond_exposure", "YCP"),
    ("muni_exposure", "municipal_fixed_income_exposure", "YCP"),
    ("corp_exposure", "corporate_bond_exposure", "YCP"),
    ("corp_fi", "corporate_fixed_income_exposure", "YCP"),
    ("govi_exp", "government_bond_exposure", "YCP"),
    ("govi_fi", "government_fixed_income_exposure", "YCP"),
    ("covered_bond", "covered_bond_exposure", "YCP"),
    ("gov_bond", "government_related_bond_exposure", "YCP"),
    ("converts", "convertible_bond_exposure", "YCP"),
    ("asset_backed_sec", "asset_backed_security_exposure", "YCP"),
    ("index_fund", "index_fund", "YCI"),
    ("inverse_fund", "inverse_fund", "YCI"),
    ("leveraged_fund", "leveraged_fund", "YCI"),
    ("socially_responsible_fund", "socially_responsible_fund", "YCI"),
    ("synthetic_replication_fund", "synthetic_replication_fund", "YCI"),
    ("aum_usd", "aum_usd", "YCI"),
    ("average_manager_tenure", "average_manager_tenure", "YCP"),
    ("max_manager_tenure", "max_manager_tenure", "YCP"),
    ("median_manager_tenure", "median_manager_tenure", "YCP"),
    ("min_manager_tenure", "min_manager_tenure", "YCP"),
    ("open_to_existing_investors", "open_to_existing_investors", "YCI"),
    ("open_to_new_investors", "open_to_new_investors", "YCI"),
    ("ycharts_url", "ycharts_url", "YCI"),
    ("years_since_inception", "years_since_inception", "YCP"),
    ("investment_strategy", "investment_strategy", "YCI"),
    ("related_securities", "related_securities", "YCI"), #returns both symbols and names, only need symbols
    ("fund_family", "fund_family", "YCI")
]

def fetch_ycp_data_point(symbol, metric, is_mutual):
    symbol_for_api = f"M:{symbol}" if is_mutual else symbol
    points_line = f"{symbol_for_api},{metric}"
    payload = f"points={points_line}"
    api_url = f"{YCHARTS_API_URL}/v3/excel/points"
    
    try:
        response = session.post(api_url, headers=API_HEADERS, data=payload, timeout=60)
        response.raise_for_status()
        data = response.json()
        logger.info(f"API response for {symbol} on {metric}: {data}")

        if 'response' in data and symbol_for_api in data['response']:
            results = data['response'][symbol_for_api].get("results", {})
            if metric in results and "" in results[metric]:
                datapoints = results[metric][""].get("results")
                if datapoints and isinstance(datapoints, list) and len(datapoints) > 0:
                    # Assuming the last element in the list contains the latest data
                    last_point = datapoints[-1]
                    if isinstance(last_point, list) and len(last_point) > 1:
                        value = last_point[1]
                        if value is not None:
                            return value
                        else:
                            logger.warning(f"Data returned for {symbol} on {metric} is None")
                    else:
                        logger.warning(f"Unexpected data format for {symbol} on {metric}")
                else:
                    logger.warning(f"No valid data points for {symbol} on {metric}")
            else:
                logger.warning(f"Metric '{metric}' not found or empty for {symbol}")
        else:
            logger.error(f"API response does not contain expected data for '{symbol}' on metric '{metric}': {data}")
        return None
    except requests.RequestException as e:
        logger.error(f"API error for symbol '{symbol}' on metric '{metric}': {e}")
        return None

def fetch_yci_metric(symbol, metric, is_mutual):
    endpoint = "mutual_funds" if is_mutual else "companies"
    symbol_for_api = f"M:{symbol}" if is_mutual else symbol
    api_url = f"{YCHARTS_API_URL}/v3/{endpoint}/{symbol_for_api}/info/{metric}?retrieve_ttl=true"
    
    try:
        response = session.get(api_url, headers=API_HEADERS, timeout=30)
        response.raise_for_status()
        data = response.json()
        logger.info(f"API response for {symbol} on {metric}: {data}")
        
        response_key = symbol_for_api
        metric_value = data.get("response", {}).get(response_key, {}).get("results", {}).get(metric, {}).get("data")
        
        if metric == "related_securities" and isinstance(metric_value, list):
            metric_value = [sec.get('security_id') for sec in metric_value if isinstance(sec, dict) and 'security_id' in sec]
        return metric_value
    except requests.RequestException as e:
        logger.error(f"Error fetching {metric} for {symbol}: {e}")
        return None

def main():
    symbols = [
        {"symbol": "RDVY", "is_mutual": False},  # ETF
        {"symbol": "FTBFX", "is_mutual": True}  # Mutual Fund
    ]
    results = {}

    for item in symbols:
        symbol, is_mutual = item['symbol'], item['is_mutual']
        results[symbol] = {}
        for db_col, yc_code, api_type in column_to_ycharts_mapping:
            if api_type == "YCP":
                results[symbol][db_col] = fetch_ycp_data_point(symbol, yc_code, is_mutual)
            elif api_type == "YCI":
                results[symbol][db_col] = fetch_yci_metric(symbol, yc_code, is_mutual)
            
            if results[symbol][db_col] is None:
                logger.warning(f"No data for {symbol} on metric {db_col} ({yc_code})")

    for symbol, metric_data in results.items():
        logger.info(f"Results for {symbol}:")
        for db_col, value in metric_data.items():
            if db_col == "related_securities" and value:
                logger.info(f"  {db_col}: {json.dumps(value)}")
            else:
                logger.info(f"  {db_col}: {value}")

if __name__ == "__main__":
    main()

2025-02-13 17:53:58,151 - INFO - API response for RDVY on inception_date: {'response': {'RDVY': {'meta': {'status': 'ok'}, 'results': {'inception_date': {'meta': {'status': 'ok', 'ttl': 43200}, 'data': '2014-01-06'}}}}, 'meta': {'url': 'https://api.ycharts.com/v3/companies/RDVY/info/inception_date?retrieve_ttl=true', 'status': 'ok'}}
2025-02-13 17:53:58,270 - INFO - API response for RDVY on earliest_performance_date: {'response': {'RDVY': {'meta': {'status': 'ok'}, 'results': {'earliest_performance_date': {'meta': {'status': 'ok', 'ttl': 43200}, 'data': '2014-01-06'}}}}, 'meta': {'url': 'https://api.ycharts.com/v3/companies/RDVY/info/earliest_performance_date?retrieve_ttl=true', 'status': 'ok'}}
2025-02-13 17:53:58,392 - INFO - API response for RDVY on equity_stylebox_large_cap_growth_exposure: {'response': {'RDVY': {'meta': {'status': 'ok'}, 'results': {'equity_stylebox_large_cap_growth_exposure': {'meta': {'status': 'ok'}, '': {'results': ['2025-02-11', 0.01935], 'meta': {'status': '