In [8]:
import os 
os.environ["PERPLEXITY_API_KEY"] = "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"
import pandas as pd
from typing import Dict, Any, List
from Polygon_Client import PolygonClient
from alpha_vantage_client import AlphaVantageClient
from prompt_eng import endpointMatcher
from sqlalchemy import create_engine
from matcher_config import api_endpoint_map

In [None]:
class Ingestor:
    def __init__(
        self,
        config: Dict[str, Any],
        api_feature_map: Dict[str, Any],
        prompt: str,
        embedding_model: str = "ProsusAI/finbert"
    ):
        """
        1) Call LLM to get recommended APIs from prompt.
        2) Build static feature requests with dynamic api_choice.
        3) Fetch and parse in __init__, storing DataFrames.
        """
        # set up HTTP clients
        self.clients = {
            'polygon_api': PolygonClient(api_key=config.get('polygon_api_key')),
            'alpha_vantage_api': AlphaVantageClient(api_key=config.get('alpha_vantage_api_key')),
        }
        # set up LLM matcher
        self.matcher = endpointMatcher(api_feature_map, embedding_model)
        # call LLM for API recommendations
        _, recommended_apis, _, _= self.matcher.match_prompt(prompt)
        if not recommended_apis:
            raise RuntimeError(f"No APIs recommended for prompt: {prompt!r}")

        # build feature requests with static params for now
        self.requests: List[Dict[str, Any]] = []
        for api in recommended_apis:
            if api == 'polygon_api':
                params = {
                    'ticker': 'AAPL',
                    'multiplier': 1,
                    'timespan': 'day',
                    'from': '2025-01-01',
                    'to': '2025-02-01',
                    'endpoint_type': 0
                }
            elif api == 'alpha_vantage_api':
                params = {
                    'ticker': 'AAPL',
                    'function': 'TIME_SERIES_DAILY_ADJUSTED'
                }
            else:
                params = {}
            # include api choice
            feature = {'api': api, **params}
            self.requests.append(feature)

        # fetch, parse, and store DataFrames
        self.dfs: Dict[str, pd.DataFrame] = {}
        for req in self.requests:
            api_name = req.pop('api')
            client = self.clients.get(api_name)
            if not client:
                raise ValueError(f"No client found for API: {api_name}")
            raw = client.fetch_data(req)
            parsed = client.parse_response(raw)
            if (api_name == "alpha_vantage_api" or api_name == "polygon_api") and isinstance(parsed, tuple):
                parsed = parsed[0]
                self.dfs[api_name] = parsed

    def print_dfs(self) -> None:
        """Print each DataFrame for each API"""
        for api, df in self.dfs.items():
            print(f"\n=== DataFrame for {api} ===")
            print(df)



    def to_mysql(self, db_url: str, if_exists: str = 'replace'):
        i = 0
        engine = create_engine(db_url)
        for api, df in self.dfs.items():
            i += 1
            # ✅ Extract DataFrame if stored as tuple (e.g., (meta, df))
            if isinstance(df, tuple):
                df = df[1]

            # Confirm it's a DataFrame before saving
            if not isinstance(df, pd.DataFrame):
                print(f"⚠️ Skipped {api}: not a valid DataFrame")
                continue

            table_name = f"{api}_data"
            df.to_sql(table_name, con=engine, if_exists=if_exists, index=False)
            print(f"✅ Dumped to table: {table_name}")




config = {
    'polygon_api_key': 'amT2HDpKSqyIvpdbz5DY9qLwWwPDpaB0',
    'alpha_vantage_api_key': 'WXOG38FYIAUD05SZ'
}
prompt = "Get daily open & close prices plus volume for AAPL over Jan 2025"

ing = Ingestor(config, api_endpoint_map, prompt)
ing.print_dfs()

db_url = "mysql+mysqlconnector://root@localhost/Ingestor_dump"
ing.to_mysql(db_url)


 Prompt: Get daily open & close prices plus volume for AAPL over Jan 2025
 GPT Keywords: ['AAPL', 'Apple', 'stock', 'price', 'open', 'close', 'volume', 'January', '2025', 'daily', 'finance', 'data']

 endpoint Matches: {'finance': {'endpoint': 'FX_DAILY', 'score': np.float32(0.77), 'apis': ['alpha_vantage_api']}, 'daily': {'endpoint': 'TIME_SERIES_WEEKLY', 'score': np.float32(0.81), 'apis': ['alpha_vantage_api']}}

 Extracted Parameters: {'TIME_SERIES_WEEKLY': {'alpha_vantage_api': {'symbol': 'AAPL', 'datatype': 'json'}}}

=== DataFrame for alpha_vantage_api ===
              open      high       low   close    volume
1999-11-01   80.00   80.6900   77.3700   77.62   2487300
1999-11-02   78.00   81.6900   77.3100   80.25   3564600
1999-11-03   81.62   83.2500   81.0000   81.50   2932700
1999-11-04   82.06   85.3700   80.6200   83.62   3384700
1999-11-05   84.62   88.3700   84.0000   88.31   3721500
...            ...       ...       ...     ...       ...
2025-05-12  210.97  211.2679  2